#!/usr/local/bin/perl -w

# CS 8995 - Corpus Based Natural Language Processing

# Spring 2001

# Dr. Ted Pedersen

# Final Project: Empirical Methods for Multilingual Text
#####

# Stage 1

# Team Morelia

#   Steve Holtz
#   Jayanta Nath
#   Srinivas Vadrevu

# Evaluation routine
#####

use utf8;

die "usage: eval.pl gold-input.utf non-gold-input.utf\n" if ( $#ARGV != 1 );

$true = 1;
$false = 0;

$GD = $ARGV[0];
$FD = $ARGV[1];
open(GD) or die "ERROR: cannot open this file: $GD\n";

$ArCount1 = $ArCount2 = $ArCountG = 0;
$g = "";

while ( <GD> ) {
    chomp;
    next if ( /^$/ );
    if ( /^<bitext/ ) {
	$first = $true;
	next;
    }
    if ( /^<article/ and $first ) {
	$firstLang = $true;
	next;
    }
    next if ( /^<article/ and ! $first );
    if ( /^<\/article/ and $first ) {
	$first = $false;
	$firstLang = $false;
	next;
    }
    if ( /^<align/ ) {
	/[\d]+/g;
	$countG = $&;
	/[\d]+[ \d]*/g;
	if ( $firstLang ) {
	    $ArCount1++;
	}
	else {
	    $G2[$countG] = $&;
	    $ArCount2++;
	}
	next;
    }
    if ( /^<\/align/ ) {
	if ( $firstLang ) {
	    $G1a[$countG] = length $g;
	}
	else {
	    $G2a[$countG] = length $g;
	    undef @a;
	    @a = split /\s+/, $G2[$countG] if ( $G2[$countG] =~ / / );
	    if ( defined @a ) {
		$temp = 0;
		foreach $i ( 0 .. $#a ) {
		    $temp += $G1a[$a[$i]];
		}
		$ArCountG += $temp * $G2a[$countG];
	    }
	    else {
		$ArCountG += $G1a[$G2[$countG]] * $G2a[$countG];
	    }
	}
	$g = "";
	next;
    }
    $g .= $_;
}

close GD;

open(FD) or die "ERROR: cannot open this file: $FD\n";

$ACount1 = $ACount2 = 0;
$f = "";

while ( <FD> ) {
    chomp;
    next if ( /^$/ );
    if ( /^<bitext/ ) {
	$first = $true;
	next;
    }
    if ( /^<article/ and $first ) {
	$firstLang = $true;
	next;
    }
    next if ( /^<article/ and ! $first );
    if ( /^<\/article/ and $first ) {
	$first = $false;
	$firstLang = $false;
	next;
    }
    if ( /^<align/ ) {
	/[\d]+/g;
	$countF = $&;
	/[\d]+[ \d]*/g;
	if ( $firstLang ) {
	    $F1[$countF] = $&;
	    $ACount1++;
	}
	else {
	    $F2[$countF] = $&;
	    $ACount2++;
	}
	next;
    }
    if ( /^<\/align/ ) {
	if ( $firstLang ) {
	    $F1a[$countF] = length $f;
	}
	else {
	    $F2a[$countF] = length $f;
	    undef @a;
	    @a = split /\s+/, $F2[$countF] if ( $F2[$countF] =~ / / );
	    if ( defined @a ) {
		$temp = 0;
		foreach $i ( 0 .. $#a ) {
		    $temp += $F1a[$a[$i]];
		}
		$ACountF += $temp * $F2a[$countF];
	    }
	    else {
		$ACountF += $F2a[$countF] * $F1a[$F2[$countF]];
	    }
	}
	$f = "";
	next;
    }
    $f .= $_;
}

close FD;

$AinterAr = 0;

foreach $i ( 1 .. $countF ) {
    undef @b;
    @b = split /\s+/, $F2[$i] if ( $F2[$i] =~ / / );
    undef @c;
    @c = split /\s+/, $G2[$i] if ( $G2[$i] =~ / / );
    if ( defined @b  and ! defined @c ) {
	foreach $x ( 0 .. $#b ) {
	    $AinterAr += $F1a[$b[$x]] * $G2a[$i] if ( $G2[$i] == $b[$x] );
	}
    }
    elsif ( ! defined @b  and defined @c ) {
	foreach $x ( 0 .. $#c ) {
	    $AinterAr += $F1a[$c[$x]] * $F2a[$i] if ( $F2[$i] == $c[$x] );
	}
    }
    elsif ( defined @b  and defined @c ) {
	foreach $x ( 0 .. $#b ) {
	    foreach $y ( 0 .. $#c ) {
		$AinterAr += $F1a[$b[$x]] * $F2a[$c[$y]] if ( $F2[$b[$x]] eq $c[$y] );
	    }
	}
    }
    else {
	$AinterAr += $F1a[$i] * $F2a[$i] if ( $F1[$i] eq $G2[$i] );
    }
}

$recall = $AinterAr / $ArCountG;

$precision = $AinterAr / $ACountF;

$Fmeasure = 2 * $recall * $precision / ( $recall + $precision );

print "F-measure: $Fmeasure\n";
