#!/bin/csh

#######################################################################
# for use with weka classifier zeror, which implements a simple
# majority classifier that only determines the most frequent
# sense of a word in the training data, and assigns that to
# every instance in the test data
#######################################################################

if ($#argv != 3) then
   echo "zeror source-dir stoplist token"
   echo "run from directory where these files reside"
   exit 1
endif   

# specify the name of your stop list and token definition file
# for some reason things work better if you specify the full
# path name of the stoplist and token files

set sourcedir=$1
set stoplist=$PWD/$2
set token=$PWD/$3

# the methods are feature extraction routines that build views of the
# text for the machine learning system weka.

foreach method (zeror-f)

	# the results for each method are contained in a directory,
	# which has the same name as the method

	rm -fr $method
	mkdir $method

	# within each method directory, there is a directory for 
	# each word

## Spanish senseval-2 data

       foreach word ( apoyar      claro       gracia      organo  tocar apuntar     clavar      grano  partido     autoridad   conducir actuar  hermano     pasaje      tratar   bomba       copiar      local       popular     usar   brillante   corazon     masa        programa   vencer  canal       corona      natural     saltar      verde   ciego       coronar     naturaleza  simple      vital  circuito    explotar    operacion   tabla )
                                                           

## senseval-2 data

##foreach word ( art.n authority.n bar.n begin.v blind.a bum.n call.v carry.v  chair.n channel.n child.n church.n circuit.n collaborate.v  colourless.a cool.a day.n detention.n develop.v draw.v dress.v drift.v drive.v dyke.n face.v facility.n faithful.a fatigue.n feeling.n ferret.v find.v fine.a fit.a free.a graceful.a  green.a grip.n hearth.n holiday.n keep.v lady.n leave.v live.v local.a match.v material.n mouth.n nation.n natural.a nature.n oblique.a play.v post.n pull.v replace.v restraint.n see.v sense.n serve.v simple.a solemn.a spade.n stress.n strike.v train.v treat.v turn.v use.v vital.a wander.v wash.v  work.v yew.n )

## senseval-1 data

## foreach word ( accident-n consume-v  scrap-n amaze-v invade-v scrap-v band-p derive-v knee-n  seize-v  behaviour-n  modest-a shake-p bet-n   excess-n onion-n shirt-n  bet-v   float-n  promise-n  slight-a  bitter-p    float-v promise-v  bother-v  floating-a  wooden-a  brilliant-a generous-a  sack-n bury-v giant-a   sack-v   calculate-v   giant-n   sanction-p )
                                             

		# move the text for a word into the appropriate directory

		cd $sourcedir
		cp -r $word ../$method
		cd ..
		cd $method/$word

		# now process that text with the desired method
		
		$method $word $stoplist $token

		# convert the text into a form that weka likes (arff)

		xml2arff $word

		# now run weka to do machine learning and tag the test data

		wekarun $word ZeroR ' '

		# score your results with senseval 2 scoring program
		# note that weka will provide some diagnostic output too
		
		score-word $word ZeroR

		# get out of this word directory and move to the next!

		cd ../..
	
	end
end

# zeror is a single Naive Bayesian classifier, where the
# training data is viewed through the prism of the f2 feature
# set 

ensembleByDist.pl fz

score ZeroR-fz


