# edge.pm version 0.13
# (Updated 07/08/2002 -- Sid)
#
# Semantic Distance Measure package implementing the baseline 
# edge counting semantic distance measure.
#
# Copyright (c) 2002,
# Siddharth Patwardhan, University of Minnesota, Duluth
# patw0006@d.umn.edu
# Ted Pedersen, University of Minnesota, Duluth
# tpederse@d.umn.edu
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to 
#
# The Free Software Foundation, Inc., 
# 59 Temple Place - Suite 330, 
# Boston, MA  02111-1307, USA.


package edge;

require Exporter;

@ISA = qw(Exporter);

@EXPORT = qw(initialize distance allDistances $errorString $mod_posConsidered);

# Include my "complex" interface to WordNet (through QueryData)
use MyQD;

# Local Variables.
my $wn;               # A copy of the WordNet QueryData interface object
my $trace;            # Flag for 'verbose' mode.

# Initialization subroutine ... should be called as soon as the
# package is imported, before using any of the other functions.
# INPUT PARAMS  : $wn .. The WordNet QueryData interface object.
# RETURN VALUES : None.
sub initialize
{
    # WordNet QueryData object from main program.
    $wn = shift;
    
    # Trace option ... from main program.
    $trace = shift;

    # Variable exported to main program to communicate error messages.
    $errorString = "No Error.";

    # Variable exported to the main program to communicate the various 
    # parts of speech considered in this measure.
    $mod_posConsidered = "n";
}

# The Edge-counting distance measure subroutine ...
# INPUT PARAMS  : $word1    .. one of the two words.
#                 $word2    .. the second word of the two whose 
#                              semantic distance needs to be measured.
# RETURN VALUES : $distance .. the semantic distance between the two
#                              words.
sub distance
{
    my $word1;
    my $word2;
    my $offset1;
    my $offset2;
    my $selOffset1;
    my $selOffset2;
    my $dist;
    my $minDist;
    my @offsets1;
    my @offsets2;

    $word1 = shift;
    $word2 = shift;

    # Check if either of the words is not found in WordNet.
    if(!$word1 || !$word2)
    {
	$errorString = "Word not defined in WordNet.";
	return undef;
    }

    # Get the offsets of all the synsets of <Word1> ... 
    @offsets1 = &MyQD_getNounSynsetOffsets($word1);

    # Get the offsets of all the synsets of <Word2> ... 
    @offsets2 = &MyQD_getNounSynsetOffsets($word2);

    # [trace]
    if($trace)
    {
	print "'$word1' synsets: \n";
	foreach $offset1 (@offsets1)
	{
	    print "  ";
	    &MyQD_printSet($offset1);
	    print ": ";
	    &MyQD_printSynset($offset1);
	    print "\n";
	}
	print "\n'$word2' synsets: \n";
	foreach $offset2 (@offsets2)
	{
	    print "  ";
	    &MyQD_printSet($offset2);
	    print ": ";
	    &MyQD_printSynset($offset2);
	    print "\n";
	}
	print "\n";
    }
    # [/trace]
    
    # For each offset1-offset2 pair calculate the distance value for
    # each pair ... select the pair with the smallest distance.
    $minDist = 65535;
    foreach $offset1 (@offsets1)
    {
	foreach $offset2 (@offsets2)
	{
	    $dist = &senseDistance($offset1, $offset2);
	    return undef if($dist == -1);
	    if($dist < $minDist)
	    {
		$selOffset1 = $offset1;
		$selOffset2 = $offset2;
		$minDist = $dist;
	    }
	}
    }
    
    # [trace]
    if($trace)
    {
	print "Selected synset ($word1): ";
	&MyQD_printSet($selOffset1);
	print "\nSelected synset ($word2): ";
	&MyQD_printSet($selOffset2);
	print "\n\n";
    }
    # [/trace]

    return $minDist;
}

# The Simple edge-counting all-distances subroutine ...
# Calculates and returns distances between every pair of synsets of
# two given words.
# INPUT PARAMS  : $word1     .. one of the two words.
#                 $word2     .. the second word of the two whose 
#                               semantic distances needs to be measured.
# RETURN VALUES : %distances .. a hash of hashes with the semantic distances
#                               every pair of synsets of the two words.
sub allDistances
{
    my $word1;
    my $word2;
    my $offset1;
    my $offset2;
    my @offsets1;
    my @offsets2;
    my %returnHash;

    $word1 = shift;
    $word2 = shift;

    # Checks if either of the two words is not in WordNet.
    if(!$word1 || !$word2)
    {
	$errorString = "Word not defined in WordNet.";
	return undef;
    }

    # Get the offsets of all the synsets of <Word1> ... 
    @offsets1 = &MyQD_getNounSynsetOffsets($word1);

    # Get the offsets of all the synsets of <Word2> ... 
    @offsets2 = &MyQD_getNounSynsetOffsets($word2);
    
    # [trace]
    if($trace)
    {
	print "'$word1' synsets: \n";
	foreach $offset1 (@offsets1)
	{
	    print "  ";
	    &MyQD_printSet($offset1);
	    print ": ";
	    &MyQD_printSynset($offset1);
	    print "\n";
	}
	print "\n'$word2' synsets: \n";
	foreach $offset2 (@offsets2)
	{
	    print "  ";
	    &MyQD_printSet($offset2);
	    print ": ";
	    &MyQD_printSynset($offset2);
	    print "\n";
	}
	print "\n";
    }
    # [/trace]

    # For each offset1-offset2 pair calculate the distance value for
    # each pair ... select the pair with the smallest distance.
    %returnHash = ();
    foreach $offset1 (@offsets1)
    {
	foreach $offset2 (@offsets2)
	{
	    $returnHash{$offset1}{$offset2} = &senseDistance($offset1, $offset2);
	    return undef if($returnHash{$offset1}{$offset2} == -1);
	}
    }

    return {%returnHash};
}

# Subroutine that finds the edge-count between two noun synsets. The synsets 
# of other parts of speech are not considered. The subroutine takes as input 
# two WordNet synset offsets and returns the distance between them.
# INPUT PARAMS:  $offset1, $offset2   .. The synset offsets to determine
#                                        the distance between.
# RETURN PARAMS: $distance  .. Edge-count between the synsets.
sub senseDistance
{
    my $offset;
    my $lOffset;
    my $rOffset;
    my $lTree;
    my $rTree;
    my $lCount;
    my $rCount;
    my $leastCommonSubsumer;
    my $minDist;
    my @lTrees;
    my @rTrees;

    # Get the offsets.
    $lOffset = shift;
    $rOffset = shift;

    # Get the hypernym trees.
    @lTrees = &MyQD_getHypernymTrees($lOffset);
    foreach $lTree (@lTrees)
    {
	push(@{$lTree}, $lOffset);
    }

    # Get the hypernym trees.
    @rTrees = &MyQD_getHypernymTrees($rOffset);
    foreach $rTree (@rTrees)
    {
	push(@{$rTree}, $rOffset);
    }

    # [trace]
    if($trace)
    {
	foreach $lTree (@lTrees)
	{
	    print "HyperTree: ";
	    &MyQD_printSet(@{$lTree});
	    print "\n";
	}
	foreach $rTree (@rTrees)
	{
	    print "HyperTree: ";
	    &MyQD_printSet(@{$rTree});
	    print "\n";
	}
    }
    # [/trace]

    # Find the smallest path in these trees.
    $minDist = 1000;
    foreach $lTree (@lTrees)
    {
	foreach $rTree (@rTrees)
	{
	    $leastCommonSubsumer = &MyQD_getLCSfromTrees($lTree, $rTree);
	    $lCount = 0;
	    foreach $offset (reverse @{$lTree})
	    {
		$lCount++;
		if($offset == $leastCommonSubsumer)
		{
		    last;
		}
	    }
	    $rCount = 0;
	    foreach $offset (reverse @{$rTree})
	    {
		$rCount++;
		if($offset == $leastCommonSubsumer)
		{
		    last;
		}
	    }
	    if($rCount + $lCount - 2 < $minDist)
	    {
		$minDist = $lCount + $rCount - 2;
		$LCSOffset = $leastCommonSubsumer;
	    }
	}
    }
    
    # [trace]
    if($trace)
    {
	print "LCS: ";
	&MyQD_printSet($leastCommonSubsumer);
	print "  Path length: $minDist.\n\n";
    }
    # [/trace]

    if($minDist >= 0)
    {
	return $minDist;
    }
    else
    {
	$errorString = "Internal Error.";
	return -1;
    }
}

1;

