#/usr/local/bin/perl -w 
use utf8;
# file1 is the gold file. 
# file2 is the output from the program.

$file1 = shift;
$file2 = shift;

@Ar=undef;
@A = undef;
@len_ar = undef;
@len_a = undef;
build_array( $file1 , \@Ar, \@len_ar, 0);
build_array( $file2, \@A, \@len_a, 1);
#  for($i = 0; $i <= $first_index; $i++){
#      print "{ $i , Maps = GOLD: $Ar[$i][1], OURS: $A[$i][1], length = $len_ar[1][$i] $len_ar[0][$i] }\n";
#  }
#  print "MAxis : $MAX[0][0], $MAX[0][1] \n";

evaluate(\@Ar, \@A, \@len_ar);


###############
# evaluate : This function takes in the two maps that have been
# created, Also takes the corresponding lengths as parameters and
# outputs the "F" evaluated using the formula:
# F = 2*precision*recall/(recall + precision).
###############

sub evaluate {
    my $ar = shift;
    my $a = shift;
    my $len = shift;
    my $intersection = 0;
    my $total_ar = 0;
    my $total_a = 0;
    # intersection and Ar cardinality found
    for( my $i = 0; $i < $MAX[0][0]; $i++ ){
	for( my $j=1; $j <= $ar->[$i][0]; $j++ ){
	    $k = $ar->[$i][$j];
	    $total_ar += $len->[0][$i] * $len->[1][$k];
	    $inter=0;
	    for(my $ip = 1; $ip <= $a->[$i][0]; $ip++){
		if($a->[$i][$ip]==$k){
		    $inter=1;
		    last;
		}
	    }
	    if($inter==1){
		$intersection += $len->[0][$i] * $len->[1][$k];
	    }
	}
    }

    for( my $i = 0; $i < $MAX[1][0]; $i++ ){
	for( my $j=1; $j <= $a->[$i][0]; $j++ ){
	    $total_a += $len->[0][$i] * $len->[1][$a->[$i][$j]];
	}
    }
    $recall = $intersection / $total_ar;
    $precision = $intersection / $total_a;
#    print "\n $recall \t $precision \t $intersection \n";
    $F = (2 * $recall * $precision) / ($recall + $precision);
    print "$F\n";
}


# building the A or Ar array.
sub build_array {
    $file = shift;
    $a = shift;
    $lengths = shift;
    $max_ind = shift;
open(GOLD, $file);
$incomplete_tag = 0;
$article = 0;
$language = 0;
$align = 0;
$first_index = -1;
while( <GOLD> ) {
    if ( $align ){
	if( /<\/alignment>/ ){
	    $sentence = $sentence . $`;
	    $align=0;
	}
	else {
	    $sentence = $sentence . $_;
	}
	if( $align == 0 ){
#	    print "$sentence \n";
	    $sentence =~ s/\\n//g;
	    $sentence =~ s/\s+//g;
	    $sent_length = split( //, $sentence );
	    if ( $language == 1 ){
		$ind = $first_index;
	    }
	    else {
		$ind = $first_index - $MAX[$max_ind][$language-2];
	    }
#	    print "INDICES: $first_index, $ind\n";
#	    print "Lang: $language\n";
	    $lengths->[$language-1][$ind]=$sent_length;
	}
    }
    elsif ( $article == 0 ){
	if( /<article language=/ ) {  # found tag
	    ($lang[$language++], @junk) = split(/ /, $');
#	    print "LANG = $lang[$language-1] \n";
	    $article = 1;
#	    $MAX = $first_index + 1;
	}
    }
    else {  # already in the article
	if( /<\/article>/ ) {
	    $article = 0;
	    if ( $language == 1 ){
		$MAX[$max_ind][$language-1] = $first_index+1;
	    }
	    else {
		$MAX[$max_ind][$language-1] = $first_index + 1 - $MAX[max_ind][0];
	    }
	}
	else {  
	    if ( /<alignment / ){ #found alignment
		$first_index++;
		$maps = $';
		$maps =~ s/>$/ /;
		$maps =~ s/\s+/ /;
#		print "MAPS = [$maps]\n";
		$maps =~ m/=/;
		$sent_num = $`;
		@sent_map = split(/ /, $');
		$sent_map_size = @sent_map;
#		print "MAPPING = $sent_num , [@sent_map] , ($sent_map_size) \n"; 
		$second_index = 0;
		$a->[$first_index][$second_index++] = $sent_map_size;
		foreach $k (@sent_map){
		    $a->[$first_index][$second_index++] = $k-1;
		}
		$align = 1;
		$sentence = "";
	    }
	    
	}	
    }
}

}



