#!/usr/local/bin/perl
use utf8;

@goldEng = ();
@goldTrans = ();

@ourEng = ();
@ourTrans = ();

@goldLineArray = ();
@ourLineArray = ();

$tagOpen = "<";
$tagClose = ">";
$sentStartTag = "alignment";
$sentEndTag = "/alignment";
$space = " ";
$equals = "=";
$articleEnds = "</article>";
$line =0 ;
$bitextStart = "<bitext ";
$bitextEnd = "</bitext>";

$fileGold = $ARGV[0];
$fileOur = $ARGV[1];


open(FILEGOLD, $fileGold);
open(FILEOUR, $fileOur);

$total = 0;
$recall = 0;
$precision = 0;

$numline = 1;

readGold();

readOur();


if ($total > 0) {
  $recall /= $total;
  $precision /= $total;
}

$accuracy = (($total_gold * $total_our) - ($total_gold_un + $total_our_un)) / ($total_gold * $total_our);

print "$precision $recall $accuracy\n";


sub processBitext {
  my($myline) ;
  my($fileFlag);
  
  $myline = 0;
  $fileFlag = 0;
  
  
  ($myline, $fileFlag) = @_;
  
    
  if ($myline =~ /$articleEnds/) {
    $languageTag = 1;
    $numSentences = 0;
  }
  
  if ($myline =~ /$sentStartTag ([0-9]*=[ 0-9]*)/) {
    
    
    
    if (!$languageTag){
      
      $line = $&;
      $line=~m/[0-9]*=[ 0-9]*/;
      $line = $&;
      
      $line =~/$equals/;
      $temp  = $`;
      $temp =~ s/$space//g;			    
      
      @charArray = (@charArray, $temp);
      $line = $';
      $line =~ s/^$space//g;	
      $line =~ s/$space$//g;	
      @charArray= (@charArray,split(/$space/,$line));
      
      for ($i = 0; $i < $#charArray+1 ; $i++)	{
	
	$charArray[$i] =~ s/$space//g;
	if ($i > 0) {
	  if ((defined $charArray[$i]) && ($charArray[$i] ne $space)) 
	    {
	      if ($fileFlag == 1) {
		$goldLineArray[$numLine][0] = $charArray[0];	
		$goldLineArray[$numLine][1] = $charArray[$i];
		$numLine++;
	      }
	      else {
		$ourLineArray[$numLine][0] = $charArray[0];	
		$ourLineArray[$numLine][1] = $charArray[$i];
		$numLine++;
	      }	
	      
	    }
	}
      }
      @charArray=();
      
    }
    
  }
  
  if ($myline =~ /$sentStartTag ([0-9]*=[ 0-9]*)/) {
    $sentenceFlag = 1;
    $numSentences++;
  }	
  elsif ($myline =~ /$sentEndTag/) {
    $sentenceFlag = 0;
  }
  
  
  
  
  if (($myline !~ /^$tagOpen/) && ($myline !~ /$tagClose$/)) 
    {
      
      
      
      
      if ($sentenceFlag)
	{
	  if (!$languageTag) 
	    {	
	      if ($fileFlag == 1) 
		{
		  $goldEng[$numSentences] .= $myline;
		  
		  $goldEng[$numSentences] =~ s/$space+/$space/g;
		  
		}
	      else 
		{
		  $ourEng[$numSentences] .= $myline;
		  $ourEng[$numSentences] =~ s/$space+/$space/g;
		}	
	    }
	  else	 	
	    {	
	      if ($fileFlag == 1) 
		{
		  $goldTrans[$numSentences] .= $myline;
		  $goldTrans[$numSentences] =~ s/$space+/$space/g;
		  
		}
	      else 
		{
		  $ourTrans[$numSentences] .= $myline;
		  $ourTrans[$numSentences] =~ s/$space+/$space/g;
		  
		}	
	    }
	  
	}
    }
  
}


sub printArray {
  
  for ($i =0 ; $i < $#goldEng +1 ; $i++)  {
    print " $i    $goldEng[$i] \n\n\n";
  }
  
  print (" \n\n\n\n");
  
  for ($i =0 ; $i < $#goldTrans +1 ; $i++)  {
    print " $i    $goldTrans[$i] \n\n\n";
  }
  
  print (" \n\n\n\n");
  for ($i =0 ; $i < $#ourEng +1 ; $i++)  {
    print " $i    $ourEng[$i] \n\n\n";
  }
  
  print (" \n\n\n\n");
  for ($i =0 ; $i < $#ourTrans +1 ; $i++)  {
    print " $i    $ourTrans[$i] \n\n\n";
  }
  
}



sub articleInitialize {

  $numSentences=0;
  $languageTag =0;
  $numLine = 1;

}

sub initializeBitextGold {

  @goldEng = ();
  @goldTrans = ();
  @goldLineArray = ();
  
  $numLine = 1;
}


sub initializeBitextOur {


  @ourEng = ();
  @ourTrans = ();
  @ourLineArray = ();
  
  $numLine = 1;
}


sub readGold {

  while ($goldLine = <FILEGOLD>)
    {
      
      chomp($goldLine);
      next if ($goldLine =~ /^$/);	
      
      if ($goldLine =~ /$bitextStart/) {
	initializeBitextGold();
      }
      if ($goldLine =~ /$bitextEnd/){
	articleInitialize();
	initializeBitextOur();
	readOur();
      }
      processBitext($goldLine,1);
    }
  
}

sub readOur {
  
  while ($ourLine = <FILEOUR>)
    {
      
      if ($ourLine =~ /$bitextStart/) {
	initializeBitextOur();
      }
      if ($ourLine =~ /$bitextEnd/){
		
	process();
	initializeBitextGold();
	articleInitialize();
	readGold();
	
      }
      
      chomp($ourLine);
      $temp =0;		
      next if ($ourLine =~ /^$/);	
      
      processBitext($ourLine,2);
    }
  
}


sub process {
 
    
  $goldIdx = 0; $ourIdx = 1;
  $ours = "";
  @o = ();
  for ($u = 1, $v = 1; $u < $#goldEng + 1; $u++) {
    my @goldMatchOur;
    my $m = 0;
    my ($gold_line, $our_line);
    
    
    
    while (defined($goldLineArray[$goldIdx][0]) && $goldLineArray[$goldIdx][0] == $u &&
	   $goldIdx < $#goldLineArray + 1) {
      $gold_line .= $goldTrans[$goldLineArray[$goldIdx][1]];
      $goldIdx++;
    }
        
    $goldEng[$u] =~ s/^\s+//; $goldEng[$u] =~ s/\s+$//;
    $pattern = $goldEng[$u];
    $done = 0;
    
    while ($done == 0) {
      if (length($ours) >= length($pattern)) {
	$ours = substr($ours, length($pattern), length($ours) - length($pattern));
	$done = 1;
      }elsif (!defined($ourEng[$v])) {
	$done = 1;
      }else {
	$ourEng[$v] =~ s/^\s+//; $ourEng[$v] =~ s/\s+$//;
	$ours .= " $ourEng[$v]";
	$v++;
      }
    }
    
    while (defined($ourLineArray[$ourIdx][0]) &&  $ourLineArray[$ourIdx][0] < $v && 
	   $ourIdx < $#ourLineArray + 1) {
      $our_line .= $ourTrans[$ourLineArray[$ourIdx][1]];
      $ourIdx++;
    }
    
        
    @gold_line_array = split(//, $gold_line);
    @our_line_array = split(//, $our_line);
    
    #print "gold[$gold_line]\nour [$our_line]\n";
    
    $gold_length = @gold_line_array;
    $our_length = @our_line_array;
    
    $m = $n = 0; $matched = 0; $save_pos = 0;
    while (defined($gold_line_array[$m])) {
      while (defined($our_line_array[$n]) && defined($gold_line_array[$m])) {
	if ($gold_line_array[$m] eq $our_line_array[$n]) {
	  $m++; $n++; $matched++;
	} else {
	  $n++; $m = $save_pos;
	}
      }
      
      goto FINISHED if ($matched > 0);
            
      $m++; $save_pos = $m;
    }
    
  FINISHED:
    
    
    $m = $n = 0; $matched2 = 0; $save_pos = 0;
    while (defined($our_line_array[$n])) {
      while (defined($gold_line_array[$m]) && defined($our_line_array[$n])) {
	if ($our_line_array[$n] eq $gold_line_array[$m]) {
	  $m++; $n++; $matched2++;
	} else {
	  $m++; $n = $save_pos;
	}
      }
      
      goto FINISHED2 if ($matched2 > 0);
      #&& (!defined($gold_line_array[$m]) || !defined($our_line_array[$n]))); 
      
      $n++; $save_pos = $n;
    }
  FINISHED2:
    
    $inter_length = ($matched > $matched2) ? $matched : $matched2;
        
    $total_inter += $inter_length;
    $total_gold += $gold_length;
    $total_our += $our_length;
    $total_gold_un += ($gold_length - $inter_length);
    $total_our_un += ($our_length - $inter_length);

    if ($gold_length > 0) {
      $recall += ($inter_length / $gold_length);
      $total++;
    }
    if ($our_length > 0) {
      $precision += ($inter_length / $our_length);
    }
  }
}
