#!/usr/local/bin/perl

use utf8;

@files = @ARGV;

#----------------Divide Files into Articles--------------------#

@goldArticles = CreateArt($files[0]);
@alignArticles = CreateArt($files[1]);

#--------------------------------------------------------------#

for($i = 0 ; $i <= $#goldArticles ; $i += 2)
   {
   @goldEnglishSentences = CreateSent($goldArticles[$i]);
   @goldForiegnSentences = CreateSent($goldArticles[$i + 1]);
   @alignEnglishSentences = CreateSent($alignArticles[$i]);
   @alignForiegnSentences = CreateSent($alignArticles[$i + 1]);

   foreach(@alignEnglishSentences)
       {
       $count++;
       }
   
     $totalForiegnLength = 0;

     foreach(@alignForiegnSentences)
         {
         $totalForiegnLength += GetLength($_);
         }

     $goldEnglishElement = 0;
     $alignEnglishElement = 0;
     $article = $i;

     for($j = 0 ; $j <= $#alignEnglishSentences ; $j++)
        {
        $goldTranslation = FindGoldTrans($alignEnglishSentences[$j]);
        $alignTranslation = FindAlignTrans($alignEnglishSentences[$j]);

        ($tp, $fp, $tn, $fn) = CalculateParams($goldTranslation, $alignTranslation);

        $truePositives += $tp;
        $falsePositives += $fp;
        $trueNegatives += $tn;
        $falseNegatives += $fn;
        }

     $recall = $truePositives / ($truePositives + $falseNegatives);
     $precision = $truePositives / ($truePositives + $falsePositives);
   $accuracy = ($truePositives + $trueNegatives) / ($truePositives+$falsePositives+$trueNegatives+$falseNegatives);

     $results[0] += $recall;
     $results[1] += $precision;
     $results[2] += $accuracy;
   }



$recall = 2 * $results[0] / ($#goldArticles+1);
$precision = 2 * $results[1] /($#goldArticles+1);
$accuracy = 2 * $results[2] /($#goldArticles+1);

print "recall = $recall\nprecision = $precision\naccr = $accuracy\n\n"; 

#--------------------------------------------------------------#

sub CalculateParams
    {
    my($goldTranslation,$alignTranslation) = @_;

    my($goldTrans, $alignTrans, $tp, $fp, $tn, $fn, @params, $count, $found);


    $goldTrans = $goldTranslation;
    $alignTrans = $alignTranslation;
    
    $goldTrans  = escape($goldTrans);#=~ s/([\/\|\(\)\[\]\{\}\^\$\*\+\?\.])/\\$1/g;
    $alignTrans = escape($alignTrans);#=~ s/([\/\|\(\)\[\]\{\}\^\$\*\+\?\.])/\\$1/g;

    $found = 0;

    if($goldTranslation =~ /$alignTrans/)
      {
      $tp = GetLength($&);
      $fn = GetLength($`) + GetLength($');
      $fp = 0;
      $tn = $totalForiegnLength - (GetLength($alignTranslation)) - $fn; 
      $found = 1;
      }

    elsif($alignTranslation =~ /$goldTrans/)
	 {
	 $tp = GetLength($&);
	 $fn = 0;
	 $fp = GetLength($`) + GetLength($');
	 $tn = $totalForiegnLength - (GetLength($alignTranslation));
	 $found = 1;
	 }

    else
	{
	$count = 1;

	while( $count < (GetLength($alignTranslation)) && $found == 0)
	     {
	     $alignTrans = substr $alignTranslation, $count;
	     $alignTrans = escape($alignTrans);

	     if($goldTranslation =~ /^$alignTrans/)
	       {
	       $tp = GetLength($&);
	       $fn = GetLength($');
	       $fp = $count;
	       $tn = $totalForiegnLength - (GetLength($alignTranslation)) - $fn;
	       $found = 1; 
	       }

	     $count++;
	     }
	}

    if($found == 0)
      {
      $count = 1;

      while( $count < (GetLength($goldTranslation)) && $found == 0)
	   {
	   $goldTrans = substr $goldTranslation, $count;
	   $goldTrans = escape($goldTrans);

	   if($alignTranslation =~ /^$goldTrans/)
	     {
	     $tp = GetLength($&);
	     $fn = $count;
	     $fp = GetLength($');
	     $tn = $totalForiegnLength - (GetLength($alignTranslation)) - $fn;
	     $found = 1;
	     }
	   $count++;
	   }
      }

    if($found == 0)
      {
      $tp = 0;
      $fn = GetLength($goldTranslation);
      $fp = GetLength($alignTranslation);
      $tn = $totalForiegnLength - (GetLength($alignTranslation)) - $fn;
      }

    @params = ($tp, $fp, $tn, $fn);

    return (@params);
    } 

#--------------------------------------------------------------#

sub FindAlignTrans
    {
    my($engSent) = @_;
    my($targetSent, @targetForiegn);

    @targetForiegn = FindForiegn($alignEnglishElement+1, $alignArticles[$article]);

    foreach(@targetForiegn)
	{
	$targetSent .= $alignForiegnSentences[$_-1];
	}

    $alignEnglishElement++;
    return $targetSent;
    }

#--------------------------------------------------------------#

sub FindGoldTrans
    {
    my($engSent) = @_;
    my($engTargetSent, $foriegnTargetSent, @targetForiegn);
    my($match, $lenMatch, $lenPre, $proportion, $targetSent);

    $match = 0;
    $engSent = escape($engSent);

    while($match == 0)
	 {
	 $engTargetSent .= $goldEnglishSentences[$goldEnglishElement];

	 print"goldEngEle = $goldEnglishElement\n";
	 print "<engSent>$engSent\n";
	 print "<engTargetSent>$engTargetSent\n\n";

	 @targetForiegn = FindForiegn($goldEnglishElement+1, $goldArticles[$article]);
	 foreach(@targetForiegn)
	     {
	     $foriegnTargetSent .= $goldForiegnSentences[$_-1];
	     }

	 print "<foriegnTargetSent>$foriegnTargetSent\n\n";

	 if($engTargetSent)
	     {
	     if($engTargetSent =~ /$engSent/)
		 {
		 print "<matchfound>$&\n\n";
		 if(!$') {$goldEnglishElement++;}
		 $match = 1;
		 $proportion = (GetLength($foriegnTargetSent)) / length $engTargetSent;
		 $lenMatch = int (( length $& ) * $proportion );
		 $lenPre = int ( (length $`) * $proportion );
		 
		 print "<data>proportion = $proportion, lenMatch = $lenMatch, lenPre = $lenPre\n\n";
		 
		 $targetSent = substr $foriegnTargetSent, $lenPre, $lenMatch;
		 
		 print "<targetSent>$targetSent\n\n";
		 }
	     }     
	 if($match == 0) {$goldEnglishElement++;}
	 }
    return $targetSent;
    }


#--------------------------------------------------------------#

sub CreateArt
    {
    ($file) = @_;
    my($string, @articles);

    open(FILE, $file);
    
    while(<FILE>)
	 {
	 s/<bitext.*?>//g;
	 s/<\/bitext>//g;
	 $string .= $_;
	 }
    $string =~ s/\s+$//;
    $string =~ s/\x{2028}+$//;
    
    @articles = split(/<\/article>/, $string);

    foreach (@articles)
	{
	s/<article.*?>//;
	s/\s+$//;
	}

    close FILE;

    return (@articles);
    }    

#--------------------------------------------------------------#

sub CreateSent
    {
    my($article) = @_;
    my($sentence, @sentences);

    @sentences = split(/<\/alignment>/, $article);

    foreach $sentence (@sentences)
	{
	$sentence =~ s/<alignment.*?>//;
	$sentence =~ s/^(\x{2028}+)//g;
	$sentence =~ s/(\x{2028}+)$//g;
	$sentence =~ s/\s+/ /g;
	$sentence =~ s/^\s+//;
	$sentence =~ s/\s+$//;
	}
    return @sentences;
    }

#--------------------------------------------------------------#

sub FindForiegn
    {
    my($eng, $article) = @_;
    my($tag, @foriegn);
    
    if($article =~ /<alignment\s*$eng.*?>/)
      {
      $tag = $&;
      $tag =~ s/<alignment\s*$eng\s*=//;
      $tag =~ s/>//;
      
      @foriegn = split(/ /, $tag);
      
      return @foriegn;	  
      }
    }

#--------------------------------------------------------------#

sub escape 
    {
    my $param = shift;
    my @temp = split //, $param;
    my @new = ();
    my $item;
    
    foreach $item (@temp)
	{
	$item =~ s/([\/\|\(\)\[\]\{\}\^\$\*\+\?\.])/\\$1/g;
	push @new, $item; 
	}
    
    my $new = join "", @new;
    return $new;
    }

#--------------------------------------------------------------#

sub GetLength
    {
    my($string) = @_;

    my $index = 0;

    while($string =~ /./g)
	 {
	 $index++;
	 }
    return $index;
    }

#--------------------------------------------------------------#
