#!/bin/csh

# output bigram features based on bsp

if ($#argv != 8) then 
   echo "Usage: task freq window stat score rank stoplist token"
   exit 1
endif

# parameters for bsp and sensetools

set freq=$2                # frequency cut-off (exclude bigrams < freq)
set window=$3              # window size for bigram counting (2 is default)
set stat=$4                # statistic to be computed ($stat.pm)
set score=$5               # cutoff value for statistic
				# ll and mi use "raw" score
				# leftFisher uses p-value

set rank=$6

set stoplist=$7
set token=$8

# clean up old files

rm -f $1-training.gram2.cnt
rm -f $1-training.gram2.$stat
rm -f $1-training.gram2.hist
rm -f $1-training.gram2.regex

# newLine - don't count bigrams across end of line - needed for senseval

count.pl --extended --newLine --window $window --histogram $1-training.gram2.hist --token $token --stop $stoplist $1-training.gram2.cnt $1-training.count

echo "count in esp-gram2 $1"

statistic.pl --extended --rank $rank --frequency $freq --score $score $stat.pm $1-training.gram2.$stat $1-training.gram2.cnt 

echo "statistic in esp-gram2 $1"

bsp2regex.pl $1-training.gram2.$stat --token $token > $1-training.gram2.regex

echo "bsp2regex in esp-gram2  $1"
