#!/usr/bin/perl -w

# this is the script used by ted pedersen in the i2b2 2011 suicide notes challenge
# it uses simple manually developed rules to assign emotions
# it will assign at most $taglimit emotions per sentence
#
# this script attained performance of : 
# F1 = .45269, Precision = .45985, Recall = .44575, N = 1,233
# taglimit was set to 2

# run this script as follows : 
# suicide-rules-final.pl test-data/*.txt 

# Contact info : tpederse@d.umn.edu http://www.d.umn.edu/~tpederse
# written july 2011, approx 40 hours development and testing time
# you can find this script at : 
# http://www.d.umn.edu/~tpederse/Code/suicide-rules-final.pl

# ------------------------------------------------------------------------
# Copyright (c) Ted Pedersen, 2011
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
# ------------------------------------------------------------------------

# rules are checked based on frequency order in training data

#    820 instructions
#    455 hopelessness
#    296 love
#    295 information
#    208 guilt
#    107 blame
#     94 thankfulness
#     69 anger
#     51 sorrow
#     47 hopefulness
#     25 happiness_peacefulness
#     25 fear
#     15 pride
#      9 abuse
#      6 forgiveness

# how many emotion tags do we allow per sentence?

$taglimit = 2;

# keep track of lines per file

$linecount = 1;

while (<>)  {

	chomp;

	# reset the counter for the number of emotions that have been assigned

	$tagcount = 0;

	# get a word count without compounding, save original form of input

	@rawwords = split;
	$wordcount = $#rawwords;

	# apply rules to assign emotions

	# frequency order

	&instructions;
	&hopelessness;
	&love;
	&information;
	&guilt;
	&blame;
	&thankfulness;
	&anger;
	&sorrow;
	&hopefulness;
	&happiness_peacefulness;
	&fear;
	&pride;
	&abuse;
	&forgiveness;

	$linecount++;
	
}

# --------------------------------------------------------------

sub instructions {
		
	if (

		# polite request

		/

		## (please|kindly)|

		kindly|if\ you\ (could|would)|

		# complete business

		\bpay|financial|collect|cash|deposit|deliver|\bowe\b|\bdebt|obligation|	
		return(ed)?\ to|to\ be\ destroyed|\bsend\bmail|

		# notifications

		(inform|\btell|notify|\bcall)\ (john|mary|everyone|every\ one|him|her)|	
		please\ (tell|notify|\bcall)|

		# phone numbers

		9999|				

		# don't do something

		(do\ not|dont|do\ n't)\ (allow|mourn|\bpay\b|grieve|\bmiss\b|forget)|

		# disposestion of possessions

		(should|will|to)\ be\ given|
		(should|will|to)\ go\ to|
		(left|leave|\bgave|\bgive)\ every\ ?thing|
		(left|leave|\bgave|\bgive|\bgo)\ to|
		(left|leave|\bgave|\bgive)\ (my|\bhim\b|\bher\b|\bthem\b)|
		every(\ )?thing\ left|
		\bi\ (hereby\ )?(leave|give|bequeath)|
		i\ am\ giving|
		wordly\ (goods|possesions)|
		(personal|my)\ (belongings|effects|things|possession)|
		(you|he|she|they)\ will\ have|

		# funeral arrangements
		
		cremat(e)?(d)?|\bbur(y|ied)|\bburn\b|\bbody\b|burial|
		\bgrave|my\ (remains|ashes|bones)|funeral|casket|flowers|suit|dress|

		# wishes

		(ask|want|wish)\ (you|her|him|them)|
		(ask|want|wish)\ that|
		all\ i\ (ask|wish|want)|
		(find|take|put)\ my|
		see\ that|
		take\ care\ that|
		you\ must|
		do\ (all|what)\ you|
		will\ need\ all\ the\ (help|support)|

		# last will and testament

		last\ will|being\ of\ sound\ mind|testament|last\ wish(es)?|

		# remember me, care for others

		think\ of\ me|take\ care\ of|look\ after\ (him|her|your)|

		# start of enumerated list of instructions

		^\d+\ \.

		# end of regex - allow comments, make matches case insensitive

		/ix )

		{

		$class= 'instructions';
		&printline;
	}

} 

# --------------------------------------------------------------------------------------------------------------------

sub hopelessness {

	if (/

		# without you 

		(live|life|be)\ without\ you|

		# current state unbearable

		live\ like\ this|
		(ca\ n\'t|cant|can\ not|could\ n't|couldnt|could\ not)\ (work|stand|go(\ on)?|bear|endure|\blive|\bface|\btolerate|fight|keep|suffer)|
		anymore|(no|any)\ longer|
		at\ a\ loss|
		unbearable|

		# want to go away

		be\ no\ more|going\ away|go\ away|way\ out|

		# only solution

		(no\ other|only|best)\ (way|thing)|
		tired\ of|
		ve\ tried|
		this\ is\ (the\ )?best|
		tried\ so\ hard|
		too\ late|

		# worthless

		(is\ n\'t|wo\ n\'t\ be|not)\ worth|
		unworthy|
		worthless|
		burden|
		i\ am\ no\ longer|
		i\ ca\ n't\ do\ anything|

		# alone

		alone|lonely|lone?liness|

		# hate

		i\ hate\ myself|
		hate\ my\ life|

		# final note

		this\ is\ (my\ last\ message)|(the\ last\ (you|time))|

		# ailments

		too\ old|\bill\b|illness|incurabl(y|e)|sick|	

		# conditions 

		blind|deaf|crippled|arthritis|cancer|epilepsy|diabet(es|ic)|depress|insan(e|ity)|overdose|alcoholic|addict|
		day\ in\ and\ day\ out|
		day \after \day|	

		# symptoms

		\bweak|tired|\bpain|sick|suffer|despair|agony|torment|poverty|misery|miserable|exhaust|terrible|

		# hopeless

		hopeless|(lost|without|no)\ hope|no\ desire|

		# in hell

		(mental|living)\ hell|hell\ on\ earth

		/ix) {

		$class= 'hopelessness';
		&printline;
	}

} 

# --------------------------------------------------------------------------------------------------------------------

sub love {

	if (/

		# dedication

		devotion|only\ person|soul\ mate|forever\ yours|always\ yours|i\ will\ always\ love|

		# expressions

		i\ love\ (my|you)|i\ love|loves?\ you|my\ love|your\ loving|i\ adore|

		# greetings

		love\ to|
		yours\ forever|
		yours\ to\ the\ end|
		yours\ always|

		# endearments

		my\ (darling|sweetheart)

		/ix) {

		$class= 'love';
		&printline;
	}

}

# --------------------------------------------------------------------------------------------------------------------

sub information {

	if (/

		# official documents 

		polic(y|ies)|\bloan|insurance|papers|taxes|	

		# locations

		hospital|

		# information 

		information|

		# where things are

		[^if]\ there\ (is\ a|are)|you\ will\ find|

		# state of life

		i\ owe|(check|address)\ book|savings|loan|papers|policy|policies|

		# money 
		
		\$|\d+\.\d+|

		# phone 

		9999

		/ix) {

		$class= 'information';
		&printline;
	}
	

} 
# --------------------------------------------------------------------------------------------------------------------

sub guilt {

	if (/

		forgive\ me|(am|'m)\ sorry|god\ forgive|

		hate\ myself|

		i\ hate\ to\ leave|

		making\ you\ unhappy|
		causing\ (constant|a\ lot\ of|endless|much)?\ (unhappiness|misery|pain)|

		always\ wrong

		/ix) {

		$class= 'guilt';
		&printline;
	}


} 

# --------------------------------------------------------------------------------------------------------------------

sub blame {

	if (/

		# blame self 

		nobody\ else\ is\ to\ blame|i\ am\ to\ blame|my\ fault|

		# blame others

		you\ (told|promised)\ me|you\ (lied|cheated)|better\ to\ us|criticized
		you\ have\ done|
		you\ think\ you\ are|

		# misfortune

		it\ was\ my\ (problem|misfortune|bad)

		/ix) {

		$class= 'blame';
		&printline;
	}

} 
# --------------------------------------------------------------------------------------------------------------------

sub thankfulness {

	if (/

		# expressions of thanks

		appreciat(e|ion)|gratitude|thanks?|

		# gratitude 

		you\ (were|was)\ (always\ )?(kind|nice|good)
		been\ so\ (nice|good|kind)

		/ix) {

		$class= 'thankfulness';
		&printline;
	}

} 
# --------------------------------------------------------------------------------------------------------------------

sub anger {

	if (/

		# revenge

		punish|divorce|you\ think\ you\ are|

		# feelings 

		bitterly|not\ all\ that\ great|sick\ and\ tired|

		# bad deals

		(raw(er)?|rotten(er)?|bad|worst)\ deal|double crossed|

		# your fault

		are\ to\ blame|you\ have\ not|nothing\ but\ (trouble|grief)|as\ usual\ you|bet\ you\ (ca\ n't|cant)|

		#insults

		\bhog\b|\bpig\b|bastard|dirty|low\ down|\bgang|grafter|whore|unfaithful|bastard|lying|liar|cheater

		/ix) {

		$class= 'anger';
		&printline;
	}

} 
# --------------------------------------------------------------------------------------------------------------------

sub sorrow {

	if (/

		#feelings

		sorry|sorrow|lonely|

		# sadness at departure

		i\ hate\ to\ leave|

		# aloneness

		i\ have\ (nothing|nobody)

		/ix) {

		$class= 'sorrow';
		&printline;
	}

} 
# --------------------------------------------------------------------------------------------------------------------

sub hopefulness {

	if (/

		#rest 

		find\ peace|

		# hope for others

		life\ ahead|worth\ saving

		/ix) {

		$class= 'hopefulness';
		&printline;
	}


} 
# --------------------------------------------------------------------------------------------------------------------

sub happiness_peacefulness {

	if (/peaceful|beautiful|happiness|happily|reality|will now stop/i) {
		$class= 'happiness_peacefulness';
		&printline;
	}

} 
# --------------------------------------------------------------------------------------------------------------------

sub fear {

	if (/afraid|fear|scared|terror/i) {
		$class= 'fear';
		&printline;
	}

} 
# --------------------------------------------------------------------------------------------------------------------

sub pride {

	if (/proud|pride|apple of my eye/i) {
		$class= 'pride';
		&printline;
	}

} 
# --------------------------------------------------------------------------------------------------------------------

sub abuse {

	if (/awful|unbearable|swears|curses/i) {
		$class= 'abuse';
		&printline;
	}

} 
# --------------------------------------------------------------------------------------------------------------------

sub forgiveness {

	if (/i forgive|forgive (him|her|you)/i) {
		$class= 'forgiveness';
		&printline;
	}
} 
# --------------------------------------------------------------------------------------------------------------------

sub printline {

		if ($tagcount < $taglimit) {
			print "c=\"@rawwords\" $linecount:0 $linecount:$wordcount\|\|e=";
			print "\"$class\"\n";
			$tagcount++;
		}
}


