#!/usr/local/bin/perl 
use utf8;

# CS 8995 - Corpus Based Natural Language Processing

# Spring 2001

# Dr. Ted Pedersen

# Final Project: Empirical Methods for Multilingual Text
#####

# Stage 2

# Team Morelia

#   Srinivas Vadrevu
#   Deepa Krishnamoorthy
#   Jayantha Nath

#   EVALUATION PROGRAM (TAILORED TO STAGE 2)

# USAGE : $ perl morelia-eval.pl $gold.utx $output.utx
# ( gold.utx is original gold data and output.utx is the output from
#  Gale-Church or the Baseline program )


my $x;

$file1 = shift;
$file2 = shift;

$GOLD = 0;
$ALIGNED = 1;

# Create Four arrays with all the articles in them (as strings)
@temparray = &Readfile( $file1, $GOLD );
$lengarticles1 = shift @temparray;
$lengarticles2 = shift @temparray;
for ($i = 0; $i < $lengarticles1; $i++)
{   $garticles1[$i] = $temparray[$i];   }
for ($j = $lengarticles1; $j < ($lengarticles1+$lengarticles2); $j++)
{   $garticles2[$j-$lenmaps1] = $temparray[$j];   }    

@temparray = &Readfile( $file2, $ALIGNED );
$lenaarticles1 = shift @temparray;
@lenaarticles2 = shift @temparray;
for ($i = 0; $i < $lenaarticles1; $i++)
{   $aarticles1[$i] = $temparray[$i];   }
for ($j = $lenaarticles1; $j < ($lenaarticles1+$lenaarticles2); $j++)
{   $aarticles2[$j-$lenmaps1] = $temparray[$j];   }    

$temp = @garticles1;

#print "# garticles1 = $temp1\n";
foreach $x(0..$#garticles1)
{
    @gsentlens1 = &CreateSenArrays($garticles1[$x], $GOLD);
    @gsentlens2 = &CreateSenArrays1($garticles2[$x]);
    @asentlens1 = &CreateSenArrays($aarticles1[$x], $ALIGNED);
    @asentlens2 = &CreateSenArrays1($aarticles2[$x]);

    #print "gsentlens1 = @gsentlens1\n";

    $temp1 = @gsentlens1;  
    $temp2 = @asentlens2;
    
    @temparray = &CreateMappings($temp1, $temp2, @gsentlens1, @asentlens1);
    $lenmaps1 = shift @temparray;
    $lenmaps2 = shift @temparray;
    for ($i = 0; $i < $lenmaps1; $i++)
    {   $GoldEMappings[$i] = $temparray[$i];   }
    for ($j = $lenmaps1; $j < ($lenmaps1+$lenmaps2); $j++)
    {   $AlignEMappings[$j-$lenmaps1] = $temparray[$j];   }    


    $temp1 = @gsentlens2;  $temp2 = @asentlens2;
    @temparray = &CreateMappings($temp1, $temp2, @gsentlens2, @asentlens2,);

    $lenmaps1 = shift @temparray;
    $lenmaps2 = shift @temparray;
    for ($i = 0; $i < $lenmaps1; $i++)
    {   $GoldOMappings[$i] = $temparray[$i];   }
    for ($j = $lenmaps1; $j < ($lenmaps1+$lenmaps2); $j++)
    {   $AlignOMappings[$j-$lenmaps1] = $temparray[$j];   }    

#    print "\n\n\n";
#    foreach $i(0..$#GoldEMappings)
#    {   print "GoldEmappings[$i] = $GoldEMappings[$i],  ";   }
#    print "\n\n\n";
#    foreach $i(0..$#AlignEMappings)
#    {   print "AlignEmappings[$i] = $AlignEMappings[$i],  ";   }
#    print "\n\n\n";
#    foreach $i(0..$#GoldOMappings)
#    {   print "GoldOmappings[$i] = $GoldOMappings[$i],  ";   }
#    print "\n\n\n";
#    foreach $i(0..$#AlignOMappings)
#    {   print "AlignOmappings[$i] = $AlignOMappings[$i],  ";   }

    $temp1 = @Gold;
    $temp2 = @GoldEMappings;
    $temp3 = @GoldOMappings;
    @A = &CreateFinalArrays( $temp1, $temp2, $temp3, @Gold, @GoldEMappings, @GoldOMappings );

    $temp1 = @Aligned;
    $temp2 = @AlignEMappings;
    $temp3 = @AlignOMappings;    
    @Ar = &CreateFinalArrays( $temp1, $temp2, $temp3, @Aligned, @AlignEMappings, @AlignOMappings );

    $temp1 = @A;
    $temp2 = @Ar;
    @inter =  &Intersection($temp1, $temp2, @A, @Ar);

    $SampleSpace = &SSpace($garticles1[$x], $garticles2[$x]);

    #print "sample space = $SampleSpace\n";

    $inter = @inter;
    $lena = @A;
    $lenar = @Ar;

    #print "lena = $lena,  lenar = $lenar , inter = $inter\n";
    $recall[$x] = $inter / $lena;
    $precision[$x] = $inter / $lenar;

    $tp = $inter;
    $fp = $lena - $inter;
    $fn = $lenar - $inter;
    $tn = $SampleSpace - $fp - $fn - $tp;

    $accuracy[$x] = ($tp+$tn) / $SampleSpace;
}


$recallsum = $precesionsum = $accuracysum = 0;
foreach $x (0..$#garticles1)
{
    $recallsum += $recall[$x];
    $precisionsum += $precision[$x];
    $accuracysum += $accuracy[$x];
}

$frecall = $recallsum / @garticles1;
$fprecision = $precisionsum / @garticles1;
$faccuracy = $accuracysum / @garticles1;

print "\n$fprecision $frecall $faccuracy\n";


#---------------End of Main----------------------------------------#


#---------------Readfile-------------------------------------------#

sub Readfile
{
    my $filelocal = shift;
    my $flagtype = shift;

    open ( FOUT, $filelocal );
    #print "file = $file";
    $atagcount = $btagcount = 0;
    $articleflag = 0;
    $artcount1 = $artcount2 = 0;
    $flag = 0;
    while ( <FOUT> ) {
	if ( /<bitext.*>/o or /<\/bitext>/o ) {
	    $bitexttags[$btagcount++] = $_;
	    next;
	}
	if ( $flag == 0 and /<article.*>/o ) {
	    $articletags[$atagcount++] = $_;
	    $articleflag = 1;
	    if ( $flagtype == $GOLD )
	    {
		$garticles1[$artcount1] = "";
	    }
	    if ( $flagtype == $ALIGNED )
	    {
		$aarticles1[$artcount1] = "";
	    }
	    $flag = 1;   
       	    next;
	}
	if ( $flag == 1 and /<article.*>/o ) {
	    $articletags[$atagcount++] = $_;
	    $articleflag = 2;
	    if ( $flagtype == $GOLD )
	    {    $garticles2[$artcount2] = "";   }
	    if ( $flagtype == $ALIGNED )
	    {    $aarticles2[$artcount2] = "";   }
	    $flag = 0;
	    next;
	}
	if ( /<\/article>/o ) {
	    if ( $articleflag == 1 ) {
		$artcount1++;
	    }
	    if ( $articleflag == 2 ) {
		$artcount2++;
	    }
	    $articleflag = 0;
	    $articletags[$atagcount++] = $_;   
	    next;
	}
	if ( $articleflag == 1 ) 
	{   
	    if ( $flagtype == $GOLD )
	    {    $garticles1[$artcount1] = $garticles1[$artcount1].$_;   }
	    if ( $flagtype == $ALIGNED )
	    {    $aarticles1[$artcount1] = $aarticles1[$artcount1].$_;   }
	}    
	if ( $articleflag == 2 ) 
	{
	    if ( $flagtype == $GOLD )
	    {    $garticles2[$artcount2] = $garticles2[$artcount2].$_;   }
	    if ( $flagtype == $ALIGNED )
	    {    $aarticles2[$artcount2] = $aarticles2[$artcount2].$_;   }
	}
    }

}   # End of Readfile sub routine


#---------------------------------------------------------------------------------#	


sub CreateSenArrays
{
    my $lstring = shift;
    my $typeflag = shift;
    my (@larray, @larray1);
    my $count = 0;
 
    @larray = split /\n/, $lstring;

    foreach $i (0..$#larray)
    {
	if ( $larray[$i] =~ m/^<alignment (.*)=(.*?)>/g )
	{
	    $index = $1;
	    $mapping = $2;	 
	    #print "index = $index, mapping = $mapping\n";
	    
	    if( $typeflag == $GOLD )
	    {  $Gold[$index] = $mapping;   }
	    
	    if( $typeflag == $ALIGNED )
	    {    $Aligned[$index] = $mapping;   }
	}
    }
	
    #foreach $i (1..$#Gold)
    #{   print "Mapping $i = $Gold[$i]\n";   }

    @larray1 = &CreateSenArrays1($lstring);

    return @larray1;
}
    


sub CreateSenArrays1
{
    my $lstring = shift;
    my (@sents, @sentlens);
    my $count;

    $count = 0;

    #print "$lstring\n\n\n";
    $lstring =~ s/<alignment.*>//g;
    $lstring =~ s/\n//g;
    @sents = split( /<\/alignment>/, $lstring );
    $sentlens[0] = 0;
	      
    foreach $i(0..$#sents)
    {
	$sents[$i] =~ s/\s+$//;
	$sents[$i] =~ s/^\s+//;
	#$sents[$i] =~ s/\n//g;
	#print "Sentence = {$sents[$i]}\n";
	$count += &leng($sents[$i]);
	$sentlens[$i+1] = $count;
	#print "Length of each sentence : $sentlens[$i+1]\n";
    }     
    #print "\n\n\n";

    #print "sentlens = @sentlens\n";
    
    return @sentlens;  
}
    
sub CreateMappings
{
    my @larray = @_;
    my @sents1, @sents2, @map;

    #print "\n\nlarray = @larray\n\n\n";

    my $lensents1 = shift @larray;
    my $lensents2 = shift @larray;

    for ($i = 0; $i < $lensents1; $i++)
    {   $sents1[$i] = $larray[$i];   }
    for ($j = $lensents1; $j < ($lensents1+$lensents2); $j++)
    {   $sents2[$j-$lensents1] = $larray[$j];   }

    my @map;
    my $count;
    $count = 0;

    foreach $i (0..$#sents1)
    {
	if ( $sents1[$i] == $sents2[$i] )
	{
	    $map[$count++] = $sents1[$i];
	    next;
	}
	elsif ( $sents1[$i] < $sents2[$i] )
	{
	    $map[$count++] = $sents1[$i];
	    $map[$count++] = $sents2[$i];
	    next;
	}
	elsif ( $sents2[$i] < $sents1[$i] )
	{
	    $map[$count++] = $sents2[$i];
	    $map[$count++] = $sents1[$i];
	    next;
	}
    }

    if ( $#sents2 >= $i )
    {
	for ($j = $i; $j < $#sents1; $j++)
	{	   
	    $map[$count++] = $sents1[$j];
	}
    }
    if ( $#sents2 >= $i )
    {
	for ($j = $i; $j < $#sents2; $j++)
	{	   
	    $map[$count++] = $sents2[$j];
	}
    }
    
    $prevmatch = 0;
    $count = 0;
    $position = 0;

    # Repeats for each of sentence lens1
    for($i=0; $i < $#sents1;  $i++)
    {
	$position = &findpos($sents1[$i], @map);
	$map1[$count] = "";
	for ( $j = $prevmatch; $j <= $position; $j++ )
	{
	    $map1[$count++] .= $j." ";
	}
	$prevmatch = $j;
    }
    

    $prevmatch = 0;
    $count = 0;
    for($i=0; $i < $#sents2; $i++)
    {
	$position = &findpos($sents2[$i], @map);
	$map2[$count] = "";
	for ( $j = $prevmatch; $j <= $position; $j++ )
	{
	    $map2[$count++] .= $j." ";
	}
	$prevmatch = $j;
    }

    $temp1 = @map1;  
    $temp2 = @map2;
    @temparray = ($temp1, $temp2, @map1, @map1);
    return @temparray;
}


sub CreateFinalArrays
{
    my (@larray, @gold, @egold, @ogold, $lengold, $lenegold, $lenogold);
    my (@Earr, @Oarr, @Marr, @Alocal, $i, $j);
    ($lengold, $lenegold, $lenogold, @larray) = @_;

    #print "lengold = $lengold, lenegold = $lenegold, lenogold = $lenogold\n";

    for ($i = 0; $i < $lengold; $i++)
    {   $gold[$i] = $larray[$i];   }
    for ($j = $lengold; $j < ($lengold+$lenegold); $j++)
    {   $egold[$j-$lengold] = $larray[$j];   }    
    for ($j = ($lengold+$lenegold); $j < ($lengold+$lenegold+$lenogold); $j++)
    {   $ogold[$j-$lengold-$lenegold] = $larray[$j];   }    

    
    #print "\n\n\n";
    #foreach $i(0..$#gold)
    #{  print "gold[$i] = $gold[$i]";   }
    #print "\n\n\n";
    #foreach $i(0..$#egold)
    #{  print "egold[$i] = $egold[$i]";   }
    #print "\n\n\n";
    #foreach $i(0..$#ogold)
    #{  print "ogold[$i] = $ogold[$i]";   }
   
    @Alocal = undef;

    for ($i=0; $i < $#egold; $i++)
    {
	@Earr = split ( / /, $egold[$i] );	
	@Garr = split ( / /, $gold[$i] );

	#print "Earr = @Earr\n";
	#print "egold[$i] = $egold[$i]\n";

	foreach $j (0..$#Garr)
	{
	    @Oarr = split ( / /, $ogold[$Garr[$j]] );
	    $lenearr = @Earr;
	    $lenoarr = @Oarr;
	    @Marr = &merge($lenearr, $lenoarr, @Earr, @Oarr);
	    @Alocal = (@Alocal, @Marr);
	}
    }

    return @Alocal;
}


sub Intersection
{
    my (@larray, @array1, @array2, @farray, $lenarray1, $lenarray2, $count);
    ($lenarray1, $lenarray2, @larray) = @_;

    for ($i = 0; $i < $lenarray1; $i++)
    {   $array1[$i] = $larray[$i];   }
    for ($j = $lenarray1; $j < ($lenarray1+$lenarray2); $j++)
    {   $array2[$j-$lenarray1] = $larray[$j];   }    

    $count = 0;
    for($i=0; $i < $lenarray1; $i++)
    {
	for($j=0; $j < $lenarray2; $j++)
	{
	    if ( $array1[$i] eq $array2[$j] )
	    {
		$farray[$count++] = $array1[$i];
		last;
	    }
	}
    }
    return @farray;
}

sub SSpace
{
    my ($string1, $string2, $len1, $len2, $ss);
    $string1 = shift;
    $string2 = shift;

    $string1 =~ s/\s+$//;
    $string1 =~ s/^\s+//;
    $string1 =~ s/\n//g;
    $string1 =~ s/<alignment.*?>//g;
    $string1 =~ s/<\/alignment>//g;
    $len1 = length($string1);

    $string2 =~ s/\s+$//;
    $string2 =~ s/^\s+//;
    $string2 =~ s/\n//g;
    $string2 =~ s/<alignment.*?>//g;
    $string2 =~ s/<\/alignment>//g;
    $len2 = &leng($string2);

    #print "len1 = $len1, len2 = $len2\n";

    $ss = $len1 * $len2;

    return $ss;
}
    

    


sub leng
{
    my $lstring = shift;
    my $numchar;
    $numchar = 0;
    while( $lstring =~ m/./g )
    {
	$numchar++;
    }
    return $numchar;
}

sub findpos
{
    my $var; 
    my @givenarray;
    my $pos, $n;

    ($var, @givenarray) = @_;
    
    #print "var = $var, givenarray = @givenarray\n";

    if ($var == 0)
    {   return 0;   }

    for($n = 1;$n < $#givenarray;$n++)
    {
	if($var > $givenarray[$n-1] && $var <= $givenarray[$n])
	{
	    $pos = $n;
	}
    }
    return $pos;
}


sub merge
{
    my (@larray, @array1, @array2, @farray, $p, $q, $count);
    ($lenarray1, $lenarray2, @larray) = @_;

    for ($i = 0; $i < $lenarray1; $i++)
    {   $array1[$i] = $larray[$i];   }
    for ($j = $lenarray1; $j < ($lenarray1+$lenarray2); $j++)
    {   $array2[$j-$lenarray1] = $larray[$j];   }    
    
    $count = 0;
    for ($p=0; $p < $lenarray1; $p++)
    {
	for ($q=0; $q < $lenarray2; $q++)
	{
	    $farray[$count++] = $array1[$p]." ".$array2[$q];
	}
    }

    #foreach $p(0..$#farray)
    #{   print "farray[$p] = {$farray[$p]},  ";   }
    #print "\n";
    
    return @farray;
}
