-
Notifications
You must be signed in to change notification settings - Fork 0
/
nlp.pl
217 lines (181 loc) · 6.01 KB
/
nlp.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
#!/usr/bin/perl
use strict;
use CGI qw(param);
print "Content-type: text/html\n\n";
my $q = new CGI;
my $string = $q->param('string');
my $style = $q->param('style');
my $rules = lc $q->param('rules');
my $debug = lc $q->param('debug');
print <<EOF;
<html>
<link rel="stylesheet" href="/styles/global.css" type="text/css">
<font face="arial">
<h4>Basic Sentiment analyser</h4>
<form name="input" method="post">
<table>
<tr>
<td>Text to be analysed:</td>
<td><textarea cols=100 rows=4 name="string">$string</textarea></td>
</tr>
<tr>
<td>Analyse text by:</td>
<td title="Sentences are split on a full stop. This is a crude method and words with dots in (such as Mr. or floating point numbers) will fool this. Line break gives a better analysis at the paragraph level"><input type="radio" name="style" value="sentence" checked>Sentence
<input type="radio" name="style" value="linebreak">Line Break
<input type="hidden" name=debug value="$debug">
</td>
</tr>
<tr>
<td colspan=2><input type="submit" value="submit"></td>
</tr>
</table>
</form>
EOF
exit 0 if ( $string eq "" and $rules ne "true" ) ;
# Read in the control file of :
# Positive words
# Negative words
# Words which should be ignored
# Words which boost a positive/negative words. e.g. "really" good
# Words which negate a positive/negative word. e.g. "not" good
my @nlpConf = `cat nlp.conf`;
if ( $#nlpConf == -1 ) {
print "<B>No NLP control file found - unable to perform analysis<\B>";
exit 0;
}
my %nlp;
foreach my $line ( @nlpConf ) {
next if ( $line =~ /^(#\s)/ );
chomp $line;
print "$line <BR>" if ( $rules eq "true" );
( my $type, my $word, my $value ) = split '\s', $line;
$value = 1 if ( $value eq "" );
$nlp{$type}{$word} = $value;
$nlp{'allwords'}{$word} = $word;
}
# Process each message
my @messages;
# Break up input string into chunks based on user preference
if ( $style eq "sentence" ) {
# Flawed assumption that a dot indicates end of sentence.
# Abbreviations, etc, will break this. e.g. Mr. Jones was born 31st Nov. 2001
@messages = split '\.', $string;
}
elsif ( $style eq "linebreak" ) {
@messages = split '\n', $string;
}
my $overallScore = 0; # Scoring for entire input
my $lines = 0;
print <<EOF;
<table border=1>
<tr><th>Sentiment<th>Segment Score<th>Output Message</th></tr>
EOF
# Process each chunk
for ( $a=0; $a <= $#messages; $a++ ) {
my $msg = @messages[$a];
my $msgOut = ""; # Used to build a sanitised output string
chomp $msg;
$msg =~ s/[^A-Za-z0-9 ]//g; # Remove unwanted characters
next if ( $msg eq ""); # Drop blank lines
# Now split into individual words based on whitespace
my @words = split ' ', $msg;
my $pos = 0; # Positive score
my $neg = 0; # Negative score
# Analyse each word
my $prevWord = ""; # Used to store previous "significant" word
for ( my $i=0; $i <= $#words; $i++ ) {
my $word = lc @words[$i];
# Don't worry if this word isn't "significant"
if ( $nlp{'allwords'}{$word} eq "" ) {
$msgOut .= " @words[$i]";
next;
}
my $booster = 1; # Used to boost a word score
# Check if previous word is a booster word and store the multiplier value
if ( $nlp{'booster'}{$prevWord} ) {
$booster = $nlp{'booster'}{$prevWord} ;
}
if ( $debug eq "true" ) {
print "CurrentWord:$word PrevWord:$prevWord PosScore:$nlp{'positive'}{$word} NegScore:$nlp{'negative'}{$word} Negator:$nlp{'negate'}{$prevWord} <BR>";
}
# Analyse the word and basic context
my $sent = "neutral";
# Positive word and previous word not a negating word
if ( $nlp{'positive'}{$word} and ! $nlp{'negate'}{$prevWord} ) {
$pos += ( $nlp{'positive'}{$word} * $booster );
$sent = "positive";
}
# Negative word and previous word is a negating word
elsif ( $nlp{'negative'}{$word} and $nlp{'negate'}{$prevWord} ) {
$pos += ( $nlp{'negative'}{$word} * $booster );
$sent = "positive";
}
# Negative word and previous word not a negating word
elsif ( $nlp{'negative'}{$word} and ! $nlp{'negate'}{$prevWord} ) {
$neg += ( $nlp{'negative'}{$word} * $booster );
$sent = "negative";
}
# Positive word and previous word is a negating word
elsif ( $nlp{'positive'}{$word} and $nlp{'negate'}{$prevWord} ) {
$neg += ( $nlp{'positive'}{$word} * $booster );
$sent = "negative";
}
# Apply some formatting
if ( $sent eq "positive" ) {
$msgOut .= " <font color=GREEN>@words[$i]</font>";
}
elsif ( $sent eq "negative" ) {
$msgOut .= " <font color=RED>@words[$i]</font>";
}
else {
$msgOut .= " @words[$i]";
}
# Replace any unwanted words. e.g. bad language
if ( $nlp{'replace'}{$word} ) {
$msgOut =~ s/$word/$nlp{'replace'}{$word}/;
}
# If current word is "significant", then store for future use
if ( ! $nlp{'allwords'}{$prevWord} ) {
$prevWord = $word; # Store Previous "significant" word
}
}
$lines++;
# Work out overall score
my $result = $pos - $neg;
$overallScore += $result;
# Some more formatting
my $bg = "GREY";
my $fcolor = "WHITE";
my $sentiment = "Neutral";
if ( $result > 0 ) {
$bg = "GREEN";
$sentiment = "Positive";
}
elsif ( $result < 0 ) {
$bg = "RED";
$sentiment = "Negative";
}
print <<EOF;
<tr><td style="background-color:$bg"><font style="color:$fcolor;">$sentiment</font></td><td>$result</td><td>$msgOut</td></tr>
EOF
}
my $averageScore = $overallScore / $lines;
print " </table><BR><BR>";
my $bg = "GREY";
my $fcolor = "WHITE";
my $sentiment = "Neutral";
if ( $overallScore > 0 ) {
$bg = "GREEN";
$sentiment = "Positive";
}
elsif ( $overallScore < 0 ) {
$bg = "RED";
$sentiment = "Negative";
}
print <<EOF;
<table border=1>
<tr><th colspan=2>Overall Sentiment</th><th>Average Score</th></tr>
<tr><td style="background-color:$bg;"><font style="color:$fcolor;">$sentiment</font></td><td title="Overall score for document">$overallScore</td><td title="Total score divided by # lines">$averageScore</td></tr>
</table>
EOF
exit 0;