#! /usr/bin/perl 
use strict;

use Getopt::Long; 

##########################################################################################
#											 #
#	       Pre-process : dealing with options, reading dictionary			 #
#											 #
##########################################################################################

##########################
#Global technic variables#
##########################
my $script = "giec-eval_check-format.pl"; 
my $help = << "end_of_help;";

Usage: giec-eval_check-format.pl -p <predictions-file> -d <dictionary-file>
Options :
	-h	help
	-p	path of the predictions-file
	-d	path of the dictionary
please questioning and reporting bugs at lll05\@jouy.inra.fr

end_of_help;
my %options;

#############################
#Global conceptual variables#
#############################
my @ListOfCanonicalForms;
my @Responses;
my %ResponseBlock;


#Dealing with options
GetOptions(\%options, 
	   "h",
	   "p=s",
	   "d=s",
           ); 

if (defined ($options{'h'})) {print $help;exit;};

if (not (defined ($options{'p'}))) {
		print "Caution : please give \"p\" option\n";
		print $help;
		exit;
		};

if (not (defined ($options{'d'}))) {
		print "Caution : please give \"d\" option\n";
		print $help;
		exit;
		};

if ($#ARGV>=0) {die "Error : $script takes no arguments ! \n"};

#Reading Dictionary
open(DIC,$options{'d'}) or die "$script: could not open file ($options{'d'}) ! \n";
my @dicLines=<DIC>;
close DIC;
foreach my $iRD (0..$#dicLines) {
	if ($dicLines[$iRD]=~ s/^([^%|\r|\n|\t]+)[\r|\n|\t]//) {
		push(@ListOfCanonicalForms,$1);
		};
	};


##########################################################################################
#											 #
#	       			       Main program					 #
#											 #
##########################################################################################

@Responses=readResponses($options{'p'});
print "Format checking succeeded without error. Thanks. \n";



##########################################################################################
#											 #
#		              Reading and Checking responses				 #
#											 #
##########################################################################################


sub verifyCoherence{
#verify the coherence of the fields for 1 ID, and dying with a message if an error is found

	my %responsevC=%{$_[0]};
	my $nameFilevC=$_[1];

	#making a list of all entities mentionned in the interactions
	my @listOfAgentsUsedInInteractions;
	my @listOfTargetsUsedInInteractions;
 	foreach my $interactionvC (@{$responsevC{'interactions'}}) {
		
		if (not (myInclude($interactionvC->{'agent'},\@listOfAgentsUsedInInteractions))) {
			push (@listOfAgentsUsedInInteractions,$interactionvC->{'agent'});
			};
		if (not (myInclude($interactionvC->{'target'},\@listOfTargetsUsedInInteractions))) {
			push (@listOfTargetsUsedInInteractions,$interactionvC->{'target'});
			};
		};

	#verifying that each agent is a canonical form, and is used in a interaction
	my @listOfAgentsDeclared;
	foreach my $agentDeclared (@{$responsevC{'agents'}}) {
		#verify that the agent is used in an interaction
		if (not (myInclude($agentDeclared,\@listOfAgentsUsedInInteractions))) {
			die "Error in $nameFilevC : for ID $responsevC{'ID'}, agent $agentDeclared seems to not interact\n";
			};
		#verify that the agent is in canonical form
		if (not (myInclude($agentDeclared,\@ListOfCanonicalForms))) {
			die "Error in $nameFilevC : for ID $responsevC{'ID'}, agent $agentDeclared is not recognized as a valid canonical form\n";
			};
		#making the list of agents or targets declared
		if (not (myInclude($agentDeclared,\@listOfAgentsDeclared))) {
			push (@listOfAgentsDeclared,$agentDeclared);
			};	
		};

	#verifying that each target is a canonical form, and is used in a interaction
	my @listOfTargetsDeclared;
	foreach my $targetDeclared (@{$responsevC{'targets'}}) {	
		#verify that the target is used in an interaction
		if (not (myInclude($targetDeclared,\@listOfTargetsUsedInInteractions))) {
			die "Error in $nameFilevC : for ID $responsevC{'ID'}, target $targetDeclared seems to not interact\n";
			};
		#verify that the target is in canonical form
		if (not (myInclude($targetDeclared,\@ListOfCanonicalForms))) {
			die "Error in $nameFilevC : for ID $responsevC{'ID'}, target $targetDeclared is not recognized as a valid canonical form\n";
			};
		#making the list of agents or targets declared
		if (not (myInclude($targetDeclared,\@listOfTargetsDeclared))) {
			push (@listOfTargetsDeclared,$targetDeclared);
			};	
		};

	#verifying that agents and targets are coherent with interactions
	foreach my $agentUsed (@listOfAgentsUsedInInteractions) {
		if (not (myInclude($agentUsed,\@listOfAgentsDeclared))) {
			die "Error in $nameFilevC : for ID $responsevC{'ID'}, agent $agentUsed interacts but is not declared\n";
			};
		};
	foreach my $targetUsed (@listOfTargetsUsedInInteractions) {
		if (not (myInclude($targetUsed,\@listOfTargetsDeclared))) {
			die "Error in $nameFilevC : for ID $responsevC{'ID'}, target $targetUsed interacts but is not declared\n";
			};
		};
	}


sub readResponses {
#read the responses file, extract the IDs, targets, agents and interactions and check them

	my $nameFile=$_[0];
	open (FILE,$nameFile) or die "$script: could not open file ($nameFile) ! \n"; 
	my @lines=<FILE>;#array of lines from response file
	close FILE;

	my @list;#list of responses, one per ID
	my $irR=0;
	while (not ($irR>$#lines)) {
	
		# jumping blank lines
		while (not ($lines[$irR] =~ /^ID/)) {
			$irR++;
			if ($irR>$#lines) {last;}
			};
		if ($irR>$#lines) {last;}
		
		my %response;#response is a hash with 1 ID, 1 list of agents, 1 list of targets, and 1 list of interactions
	
		# reading ID
		if (not ($lines[$irR] =~ /^ID\t([0-9|-]+)[\r|\n]/)) {die "$script: format error \($nameFile line ".($irR+1)."\) ! \n";}
			else {$response{'ID'}=$1};
		
		# jumping to next line
		$irR++;
		if ($irR>$#lines) {die "$script: format error \($nameFile line ".($irR+1)."\)! \n";};

		#reading agents
		if (not ($lines[$irR] =~ /^agents/)) {die "$script: format error \($nameFile line ".($irR+1)."\) ! \n";}
			else {
			my @listAgents;
			while ($lines[$irR] =~ s/^agents\t([^\t|\r|\n]+)[\t|\r|\n]/agents\t/) {
				if (not ($1=~ /^agent\(\'(.+)\'\)/)) {die "$script: format error \($nameFile line ".($irR+1)."\) ! \n";}
					else {push (@listAgents,$1);}
				};
			$response{'agents'}=\@listAgents;
			};
	
		# jumping to next line
		$irR++;
		if ($irR>$#lines) {die "$script: format error \($nameFile line ".($irR+1)."\)! \n";};
	
		#reading targets
		if (not ($lines[$irR] =~ /^targets/)) {die "$script: format error \($nameFile line ".($irR+1)."\) ! \n";}
			else {
			my @listTargets;
			while ($lines[$irR] =~ s/^targets\t([^\t|\r|\n]+)[\t|\r|\n]/targets\t/) {
				if (not ($1=~ /^target\(\'(.+)\'\)/)) {die "$script: format error \($nameFile line ".($irR+1)."\) ! \n";}
					else {push (@listTargets,$1);}
				};
			$response{'targets'}=\@listTargets;
			};
	
		# jumping to next line
		$irR++;
		if ($irR>$#lines) {die "$script: format error \($nameFile line ".($irR+1)."\)! \n";};
	
		#reading interactions
		if (not ($lines[$irR] =~ /^genic_interactions/)) {die "$script: format error \($nameFile line ".($irR+1)."\) ! \n";}
			else {
			my @listInteractions;
			while ($lines[$irR] =~ s/^genic_interactions\t([^\t|\r|\n]+)[\t|\r|\n]/genic_interactions\t/) {
				if (not ($1=~ /^genic_interaction\(\'([^\']+)\',\'(.+)\'\)/)) {die "$script: format error \($nameFile line ".($irR+1)."\) ! \n";}
				else {
					my %interaction;
					$interaction{'agent'}=$1;
					$interaction{'target'}=$2;
					push(@listInteractions,\%interaction);
					};
				};
			$response{'interactions'}=\@listInteractions;
			};
	
		#verifying response's coherence
		verifyCoherence(\%response,$nameFile);

		#saving response in list
		push (@list,\%response);
	
		};
	return @list;
	};




##########################################################################################
#											 #
#		 		     Little scripts					 #
#											 #
##########################################################################################


sub myInclude{
#return 1 if the element (1st parameter) is an element of the array (2nd parameter), 0 if not

	my $element=$_[0];
	my @array=@{$_[1]};
	
	my $found=0;
	my $imI=0;
	while ((not $found) and ($imI<=$#array)) {
		if ($element eq $array[$imI]) {$found=1};
		$imI++;
		};

	return $found;
	}
