1 '''
2 Created on 29/01/2013
3
4 @author: David Vilares Calvo
5 '''
6
7 from miopia.classifier.ClassifierI import ClassifierI
8 from miopia.classifier.PolarityType import PolarityType
9 from miopia.classifier.WekaClassificationStrategy import WekaClassificationStrategy
10 from miopia.util.exceptions.InvalidStrategyException import InvalidStrategyException
11
12 from miopia.analyzer.SentimentAnalyzer import SentimentAnalyzer
13 import time
15 '''
16 A wrapper for the L{CLassifierI}.
17 @todo: Only wraps L{WekaClassificationStrategy at the moment}
18 '''
19 MAX_JOINT_FILES = 30000
20
21
22
23 - def __init__(self, classification_strategy,
24 list_handlers=[], polarity_type_handled=None):
25
26 """
27 @param classification_stategy: An instance of L{WekaClassificationStrategy}
28 @param list_handlers: A list with your L{SimpleClassifier} which are your handlers classifiers. Empty list if no handler classifier
29 @param polarity_type_handled: A value of L{PolarityType}. None if there is no handler
30 """
31
32 self._strategy = classification_strategy
33 self._list_handlers = list_handlers
34 self._polarity_type_handled = polarity_type_handled
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
72 """
73 Merge list_a and list_b prioritizing values of list_b
74 @param list_a: list of tuples id, L{PolarityType}
75 @param list_b: list of tuples id, L{PolarityType}
76 """
77 def rules(list_polarities_confidence):
78
79 dict_polarities_confidence = {polarity:confidence for polarity,confidence in list_polarities}
80 set_polarities = set(dict_polarities_confidence.keys())
81
82 if len(set_polarities) == 0:
83 return PolarityType.OTHER,0.
84 if len(set_polarities) == 1:
85 aux = set_polarities.pop()
86 return aux,dict_polarities_confidence[aux]
87 else:
88 a = PolarityType.NONE
89
90
91 if ((PolarityType.POSITIVE in set_polarities or PolarityType.STRONG_POSITIVE in set_polarities) and
92 (PolarityType.NEGATIVE in set_polarities or PolarityType.STRONG_NEGATIVE in set_polarities)):
93
94 total_confidence = 0.
95 number_polarities = 0.
96 for polarity in set_polarities:
97 set_polarities.pop()
98 number_polarities+=1
99 total_confidence+=1
100
101 return PolarityType.NEUTRAL, (total_confidence / number_polarities)
102 if (PolarityType.POSITIVE) in set_polarities:
103 return PolarityType.POSITIVE, dict_polarities_confidence[PolarityType.POSITIVE]
104 if (PolarityType.STRONG_POSITIVE) in set_polarities:
105 return PolarityType.STRONG_POSITIVE, dict_polarities_confidence[PolarityType.STRONG_POSITIVE]
106 if (PolarityType.NEGATIVE) in set_polarities:
107 return PolarityType.NEGATIVE, dict_polarities_confidence[PolarityType.NEGATIVE]
108 if (PolarityType.STRONG_NEGATIVE) in set_polarities:
109 return PolarityType.STRONG_NEGATIVE, dict_polarities_confidence[PolarityType.STRONG_NEGATIVE]
110 return a
111
112 list_id_fixed_polarity = []
113 for (id, list_polarities) in list_id_polarities:
114 aux_tuple = rules(list_polarities)
115 list_id_fixed_polarity.append((id, aux_tuple[0], aux_tuple[1]))
116 return list_id_fixed_polarity
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138 - def classify(self, arff_file, results_file, dict_position_instanceid):
139
140 """
141 @param arff_file: A string. The path to the ARFF file to be classified
142 @param results_file: A string. The path where will be printed the WEKA classifications.
143 @para dict_position_instanceid: A dictionary {position_in_arff: file_id}, which contains
144 relates the position of each instance in the ARFF DATA file which their corresponding textid.
145 """
146
147 list_id_classifications = self._strategy.classify(arff_file,results_file,
148 dict_position_instanceid)
149
150 list_handled_path_files, list_aux_id_polarities = [],[]
151 dict_handler_id_polarities = {}
152 for (id,polarity,confidence) in list_id_classifications:
153 if polarity == self._polarity_type_handled:
154 list_handled_path_files.append((id,confidence))
155
156 elif polarity != PolarityType.OTHER:
157
158 dict_handler_id_polarities[id] = [(polarity,confidence)]
159 else:
160 dict_handler_id_polarities[id] =[]
161
162
163 if list_handled_path_files !=[]:
164 for handler in self._list_handlers:
165 list_aux_id_polarities = handler.classify(list_handled_path_files)
166 for (id,polarity,confidence) in list_aux_id_polarities:
167 if dict_handler_id_polarities.has_key(id):
168 dict_handler_id_polarities[id].append((polarity,confidence))
169 else:
170 dict_handler_id_polarities[id] = [(polarity,confidence)]
171 list_handled_id_polarities = [(id, dict_handler_id_polarities[id])
172 for id in set(dict_handler_id_polarities)]
173 return self._disambiguate_lists_id_polarities(list_handled_id_polarities)
174
175
176
177
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237