Package miopia :: Package classifier :: Module SimpleClassifier
[hide private]
[frames] | no frames]

Source Code for Module miopia.classifier.SimpleClassifier

  1  ''' 
  2  Created on 29/01/2013 
  3   
  4  @author: David Vilares Calvo 
  5  ''' 
  6   
  7  from miopia.classifier.ClassifierI import ClassifierI 
  8  from miopia.classifier.PolarityType import PolarityType 
  9  from miopia.classifier.WekaClassificationStrategy import WekaClassificationStrategy 
 10  from miopia.util.exceptions.InvalidStrategyException import InvalidStrategyException 
 11   
 12  from miopia.analyzer.SentimentAnalyzer import SentimentAnalyzer 
 13  import time 
14 -class SimpleClassifier(ClassifierI):
15 ''' 16 A wrapper for the L{CLassifierI}. 17 @todo: Only wraps L{WekaClassificationStrategy at the moment} 18 ''' 19 MAX_JOINT_FILES = 30000 #The maximum number of file to classify in one call to WEKA 20 21 22
23 - def __init__(self, classification_strategy, 24 list_handlers=[], polarity_type_handled=None):
25 #TODO: Test matching between Analyzer and ClassificationStrategy 26 """ 27 @param classification_stategy: An instance of L{WekaClassificationStrategy} 28 @param list_handlers: A list with your L{SimpleClassifier} which are your handlers classifiers. Empty list if no handler classifier 29 @param polarity_type_handled: A value of L{PolarityType}. None if there is no handler 30 """ 31 32 self._strategy = classification_strategy 33 self._list_handlers = list_handlers 34 self._polarity_type_handled = polarity_type_handled
35 36 37 38 # def __init__(self, analyzer, classification_strategy, 39 # list_handlers, polarity_type_handled): 40 # #TODO: Test matching between Analyzer and ClassificationStrategy 41 # """ 42 # @precondition: L{SentimentAnalyzer} does not handle L{WekaClassificationStrategy} 43 # and L{LinguisticAnalyzer} only manages L{WekaClassificationStrategy} 44 # @param analyzer: An instance of L{Analyzer} 45 # @param classification_stategy: An instance of L{ClassificationStrategy} 46 # @param list_handlers: A list with your L{SimpleClassifier} which are your handlers classifiers. Empty list if no handler classifier 47 # @param polarity_type_handled: A value of L{PolarityType}. None if there is no handler 48 # """ 49 # 50 # self._analyzer = analyzer 51 # self._classification_strategy = classification_strategy 52 # if self.has_linguistic_analyzer() and not self.has_supervised_strategy(): 53 # raise InvalidStrategyException 54 # if self.has_sentiment_analyzer() and self.has_supervised_strategy(): 55 # raise InvalidStrategyException 56 # 57 # self._list_handlers = list_handlers 58 # self._polarity_type_handled = polarity_type_handled 59 60 61 # def get_analyzer(self): 62 # return self._analyzer 63 # 64 # def has_sentiment_analyzer(self): 65 # return isinstance(self._analyzer,SentimentAnalyzer) 66 # 67 # 68 # def has_supervised_strategy(self): 69 # return isinstance(self._classification_strategy,WekaClassificationStrategy) 70
71 - def _disambiguate_lists_id_polarities(self,list_id_polarities):
72 """ 73 Merge list_a and list_b prioritizing values of list_b 74 @param list_a: list of tuples id, L{PolarityType} 75 @param list_b: list of tuples id, L{PolarityType} 76 """ 77 def rules(list_polarities_confidence): 78 79 dict_polarities_confidence = {polarity:confidence for polarity,confidence in list_polarities} 80 set_polarities = set(dict_polarities_confidence.keys()) 81 82 if len(set_polarities) == 0: 83 return PolarityType.OTHER,0. 84 if len(set_polarities) == 1: 85 aux = set_polarities.pop() 86 return aux,dict_polarities_confidence[aux] 87 else: 88 a = PolarityType.NONE 89 # a = PolarityType.NONE 90 # if PolarityType.NONE in set_polarities : set_polarities.remove(PolarityType.NONE) 91 if ((PolarityType.POSITIVE in set_polarities or PolarityType.STRONG_POSITIVE in set_polarities) and 92 (PolarityType.NEGATIVE in set_polarities or PolarityType.STRONG_NEGATIVE in set_polarities)): 93 94 total_confidence = 0. 95 number_polarities = 0. 96 for polarity in set_polarities: 97 set_polarities.pop() 98 number_polarities+=1 99 total_confidence+=1 100 101 return PolarityType.NEUTRAL, (total_confidence / number_polarities) 102 if (PolarityType.POSITIVE) in set_polarities: 103 return PolarityType.POSITIVE, dict_polarities_confidence[PolarityType.POSITIVE] 104 if (PolarityType.STRONG_POSITIVE) in set_polarities: 105 return PolarityType.STRONG_POSITIVE, dict_polarities_confidence[PolarityType.STRONG_POSITIVE] 106 if (PolarityType.NEGATIVE) in set_polarities: 107 return PolarityType.NEGATIVE, dict_polarities_confidence[PolarityType.NEGATIVE] 108 if (PolarityType.STRONG_NEGATIVE) in set_polarities: 109 return PolarityType.STRONG_NEGATIVE, dict_polarities_confidence[PolarityType.STRONG_NEGATIVE] 110 return a
111 112 list_id_fixed_polarity = [] 113 for (id, list_polarities) in list_id_polarities: 114 aux_tuple = rules(list_polarities) 115 list_id_fixed_polarity.append((id, aux_tuple[0], aux_tuple[1])) 116 return list_id_fixed_polarity 117 118 119 # def classify_from_info(self,info,**kwargs): 120 # """ 121 # @param info: Either a L{SentimentInfo} or a L{LinguisticInfo} 122 # @param **kwargs: Needed key 'dict_adapted_features' if info is a L{LinguisticInfo} 123 # @return: A L{PolarityType} 124 # """ 125 # return self._classification_strategy.polarity_info(info,**kwargs) 126 # 127 # def classify_from_list_info(self,list_info,**kwargs): 128 # """ 129 # @param list_info: A list of either (id,L{SentimentInfo}) or (id,L{LinguisticInfo}) objects 130 # @param **kwargs: Needed key 'dict_adapted_features' if info is a L{LinguisticInfo} 131 # @return: A list of tuples (id,L{PolarityType}) 132 # """ 133 # return self._classification_strategy.polarity(list_info,**kwargs) 134 135 136 137
138 - def classify(self, arff_file, results_file, dict_position_instanceid):
139 #TODO: Classify algo information provided by the SentimentAnalyzer 140 """ 141 @param arff_file: A string. The path to the ARFF file to be classified 142 @param results_file: A string. The path where will be printed the WEKA classifications. 143 @para dict_position_instanceid: A dictionary {position_in_arff: file_id}, which contains 144 relates the position of each instance in the ARFF DATA file which their corresponding textid. 145 """ 146 147 list_id_classifications = self._strategy.classify(arff_file,results_file, 148 dict_position_instanceid) 149 150 list_handled_path_files, list_aux_id_polarities = [],[] 151 dict_handler_id_polarities = {} 152 for (id,polarity,confidence) in list_id_classifications: 153 if polarity == self._polarity_type_handled: 154 list_handled_path_files.append((id,confidence)) 155 #else: 156 elif polarity != PolarityType.OTHER: #or (polarity == PolarityType.OTHER and self._polarity_type_handled == None): 157 #print id,polarity 158 dict_handler_id_polarities[id] = [(polarity,confidence)] 159 else: 160 dict_handler_id_polarities[id] =[] 161 162 163 if list_handled_path_files !=[]: 164 for handler in self._list_handlers: 165 list_aux_id_polarities = handler.classify(list_handled_path_files) 166 for (id,polarity,confidence) in list_aux_id_polarities: 167 if dict_handler_id_polarities.has_key(id): 168 dict_handler_id_polarities[id].append((polarity,confidence)) 169 else: 170 dict_handler_id_polarities[id] = [(polarity,confidence)] 171 list_handled_id_polarities = [(id, dict_handler_id_polarities[id]) 172 for id in set(dict_handler_id_polarities)] 173 return self._disambiguate_lists_id_polarities(list_handled_id_polarities)
174 175 176 177
178 - def to_key_value_format(self, list_id_category, dest_file):
179 """ 180 It transforms the output provided by the classify method into a key-value format (QREL format). 181 @para list_id_category: A list of tuples (fileId,category,confidence) 182 @param dest_file: A string. The 183 """ 184 #dict_polarities = {id: (polarity,confidence) for id,polarity,confidence in list_id_category} 185 f = open(dest_file,"w") 186 for id,category,_ in list_id_category: 187 f.write(id+"\t"+category+"\n") 188 f.close()
189 190 191 192 # def classify(self, list_path_files): 193 # """ 194 # @precondition: path_files must refer ConLL 2006 files 195 # @param list_path_files: A list to the path of the files to analyze 196 # @return: A list of tuples (path_to_file, L{PolarityType}, confidence) 197 # """ 198 # 199 # 200 # list_linguistic_info = [] 201 # list_id_classifications = [] 202 # i = 0 203 # dictionary_adapted_features = (self._analyzer.get_dictionary_adapted_features() 204 # if self.has_linguistic_analyzer() else None) 205 # 206 # ini = time.time() 207 # 208 # list_file_id_linguistic_info = self._analyzer.analyze_from_conll_list(list_path_files) 209 # 210 # list_id_classifications.extend(self._classification_strategy.classify("a",list_path_files)) 211 # 212 # list_linguistic_info = [] 213 # list_handled_path_files, list_aux_id_polarities = [],[] 214 # dict_handler_id_polarities = {} 215 # for (id,polarity,confidence) in list_id_classifications: 216 # if polarity == self._polarity_type_handled: 217 # list_handled_path_files.append((id,confidence)) 218 # #else: 219 # elif polarity != PolarityType.OTHER: #or (polarity == PolarityType.OTHER and self._polarity_type_handled == None): 220 # #print id,polarity 221 # dict_handler_id_polarities[id] = [(polarity,confidence)] 222 # else: 223 # dict_handler_id_polarities[id] =[] 224 # 225 # 226 # if list_handled_path_files !=[]: 227 # for handler in self._list_handlers: 228 # list_aux_id_polarities = handler.classify(list_handled_path_files) 229 # for (id,polarity,confidence) in list_aux_id_polarities: 230 # if dict_handler_id_polarities.has_key(id): 231 # dict_handler_id_polarities[id].append((polarity,confidence)) 232 # else: 233 # dict_handler_id_polarities[id] = [(polarity,confidence)] 234 # list_handled_id_polarities = [(id, dict_handler_id_polarities[id]) 235 # for id in set(dict_handler_id_polarities)] 236 # return self._disambiguate_lists_id_polarities(list_handled_id_polarities) 237