Package miopia :: Package analyzer :: Module Analyzer
[hide private]
[frames] | no frames]

Source Code for Module miopia.analyzer.Analyzer

  1  ''' 
  2  Created on 11/04/2013 
  3   
  4  @author: David Vilares 
  5  ''' 
  6  from miopia.analyzer.SemanticCategory import SemanticCategory 
  7   
  8   
9 -class Analyzer(object):
10 ''' 11 Analyzer is an abstract class which defines some methods for the 12 L{SentimentAnalyzer}. 13 ''' 14 # ROOT_WORD = "ROOT_WORD" 15
16 - def __init__(self, parser, dictionaries,preprocessor, 17 lexical_processor):
18 ''' 19 @param parser: An instance of L{Parser} 20 @param dictionarites: An instance of L{Dictionary} 21 @param preprocessor: An instance of L{PreProcessor}. 22 @param lexical_processor: An instance of L{LexicalProcessor}. 23 ''' 24 self._preprocessor = preprocessor 25 self._lexical_processor = lexical_processor 26 self._parser = parser 27 self._dictionaries = dictionaries
28
29 - def analyze_from_conll(self,file_path,**kwargs):
30 """ 31 @param file_path: Path to the parsed file in the CoNLL format to be analysed 32 """ 33 raise NotImplementedError
34
35 - def analyze_from_plain_file(self, file_path,input_encoding='utf-8'):
36 """ 37 @param file_path: Path to the plain file to be analysed 38 @param input_encoding: The encoding of the input file 39 """ 40 raise NotImplementedError
41
42 - def analyze_dir(self,dir_path,input_encoding='utf-8'):
43 """ 44 It analyzes a directory of plain texts 45 @param dir_path: Path to the directory of plain files to be analysed 46 @param input_encoding: The encoding of the input file 47 """ 48 raise NotImplementedError
49
50 - def analyze(self, text):
51 """ 52 @param text: The string to be analysed. Use unicode. 53 """ 54 raise NotImplementedError
55
56 - def _preanalyze(self,text):
57 """ 58 It applies pre-processing, segmentation, tokenization and 59 PoS-taggins steps to the string to be analysed. 60 @param text: The string to be analysed 61 @return: A tuple. First element is a list of tagged sentences. 62 Each tagged sentence is a list of tuples (token,L{InfoTag}) 63 """ 64 preprocessed_text = self._preprocessor.preprocess(text) 65 sentences =self._lexical_processor.extract_sentences(preprocessed_text) 66 (tokens,lsi) = self._lexical_processor.extract_tokens(sentences) 67 return self._lexical_processor.extract_tags(tokens),lsi
68 69
70 - def _is_weka_reserved_element(self,node):
71 """ 72 It determines if a node is a WEKA reserved symbol 73 74 @param node: A node of a L{SentimentDependencyGraph} 75 @return: True if node word is a WEKA reserved element, False otherwise 76 """ 77 return self.get_word(node) in ['\'']
78 79
80 - def _contain_invalid_token(self,node):
81 """ 82 @param node: A node of a L{SentimentDependencyGraph} 83 @return True if node word contains a reserved WEKA symbol, False otherwise 84 """ 85 86 try: 87 return ('\'' in self.get_word(node) or '|' in self.get_word(node) 88 or '\\' in self.get_word(node)) 89 except: 90 False
91 92
93 - def _word_contain_invalid_token(self,str_word):
94 """ 95 @param str_word: A string. A word. 96 @return True if node word contains a reserved WEKA symbol, False otherwise 97 """ 98 try: 99 return ('\'' in str_word or '|' in str_word 100 or '\\' in str_word) 101 except: 102 False
103 104
105 - def negation_node(self,dg,node):
106 """ 107 @param node: A node of a L{SentimentDependencyGraph} 108 @param dg: An instance of a L{SentimentDependencyGraph} 109 @return: A value in {neg,neg_sin} if word is 'no' or 'nunca', 'neg_sin' if word is 'sin' returns and 110 None otherwise 111 """ 112 113 if dg.get_word(node) == 'sin': 114 return SemanticCategory.NEGATION_WITHOUT 115 116 children_nodes = map(dg.get_by_address,dg.get_deps(node)) 117 for child in children_nodes: 118 word = dg.get_word(child).lower() 119 rel = dg.get_rel(child) 120 if (word == 'no' and rel in ['mod','neg']) or (word == 'nunca' and rel != 'S') : 121 return SemanticCategory.NEGATION 122 return None
123 124
125 - def get_semantic_category(self,dg,node):
126 """ 127 Provides information to call the correct visit function 128 @param dg: An instance of a L{SentimentDependencyGraph} 129 @param node: A node of a L{SentimentDependencyGraph} 130 @return: The "semantic" category of word: If is a negation returns a value in {'neg','neg_sin'}, 131 if is an intensifier returns 'i'. If is an artificial adversative node returns 'adversative' and 132 otherwise returns lexical category of node word. 133 """ 134 135 #Checking if is a negation 136 type_neg = self.negation_node(dg,node) 137 if type_neg != None: 138 return type_neg 139 if dg.is_intensifier(node, self._dictionaries): 140 return SemanticCategory.INTENSIFIER 141 if dg.is_emoticon(node): 142 return SemanticCategory.EMOTICON 143 if dg.is_artificial_node(node): 144 switch = {"art_adversative": SemanticCategory.SUBORDINATE_ADVERSATIVE 145 } 146 return switch[dg.get_lexical_category(node)] 147 148 switch_lexical_pos = {"n": SemanticCategory.NOUN, 149 "a": SemanticCategory.ADJECTIVE, 150 "r": SemanticCategory.ADVERB, 151 "v": SemanticCategory.VERB} 152 try: 153 return switch_lexical_pos[dg.get_lexical_category(node)] 154 except: 155 return SemanticCategory.OTHER
156