Package miopia :: Package parser :: Module SentimentDependencyGraph
[hide private]
[frames] | no frames]

Source Code for Module miopia.parser.SentimentDependencyGraph

  1  ''' 
  2  Created on 26/12/2013 
  3   
  4  @author: David Vilares 
  5  ''' 
  6  from nltk.parse.dependencygraph import DependencyGraph 
  7   
8 -class SentimentDependencyGraphNodeKeys(object):
9 """ 10 The keys of a node of a L{SentimentDependencyGraph} 11 """ 12 13 SEMANTIC_ORIENTATION = "SEMANTIC_ORIENTATION" 14 INTENSIFICATION = "INTENSIFICATION" 15 SUBJECTIVITY = "SUBJECTIVITY" 16 POSITIVE_WORDS = "POSITIVE_WORDS" 17 NEGATIVE_WORDS = "NEGATIVE_WORDS" 18 NUMBER_OF_INTENSIFIERS = "NUMBER_OF_INTENSIFIERS" 19 LENGTH_TEXT = "LENGTH_TEXT" 20 NUMBER_OF_WORDS = "NUMBER_OF_WORDS" 21 LINGUISTIC_INFO = "LINGUISTIC_INFO" 22 WORD = "word" 23 REL = "rel" 24 DEPS = "deps" 25 TAG = "tag" 26 ADDRESS = "address" 27 HEAD = "head"
28
29 -class SentimentJSONKeys(object):
30 """ 31 The keys used to transform a L{SentimentDependencyGraph} to JSON 32 """ 33 CHILDREN = "children" 34 SEMANTIC_ORIENTATION = "so" 35 IS_NEGATION = "is_negation" 36 IS_INTENSIFIER = "is_intensifier" 37 POSTAG = "postag" 38 WORD = "word" 39 ADDRESS = "address" 40 DEPENDENCY_TYPE = "dependency_type"
41
42 -class SentimentDependencyGraph(DependencyGraph):
43 ''' 44 An extension of the class L{nltk.parse.dependencygraph.DependencyGraph} 45 to include sentiment information 46 ''' 47 ROOT_WORD = "ROOT_WORD" 48 49 50
51 - def __init__(self, tree_str=None):
52 ''' 53 Constructor 54 @param tree_str: See L{nltk.parse.dependencygraph.DependencyGraph} 55 ''' 56 super(SentimentDependencyGraph,self).__init__(tree_str)
57 58
59 - def contains_sentiment_info(self,address):
60 try: 61 self.get_by_address(address)[SentimentDependencyGraphNodeKeys.SEMANTIC_ORIENTATION] 62 return True 63 except: 64 return False
65
66 - def set_linguistic_info(self,node,linguistic_info):
68 69
70 - def get_linguistic_info(self,node):
72
73 - def get_rel(self,node):
74 """ 75 @param node: A node of a L{nltk.parse.dependencygraph.DependencyGraph} 76 @return: dependency relation with head's node 77 """ 78 return node[SentimentDependencyGraphNodeKeys.REL]
79 80
81 - def get_deps(self,node):
82 """ 83 @param node: A node of a L{nltk.parse.dependencygraph.DependencyGraph} 84 @return: A list of children id's of node 85 """ 86 return node[SentimentDependencyGraphNodeKeys.DEPS]
87 88
89 - def _get_adversative_id(self,node):
90 """ 91 It obtains the adversative node identifier. 92 @precondition: The parameter tag must follow the regexp for adversative tags: tag:additional_information@id 93 @param tag: The tag of the artificial adversative node 94 @return: An integer 95 """ 96 return int(node[SentimentDependencyGraphNodeKeys.TAG].split(":")[1].split("@")[1])
97
98 - def get_tag(self,node):
99 """ 100 @param node: A node of a L{nltk.parse.dependencygraph.DependencyGraph} 101 @return: The fine PoS-tag of the node 102 """ 103 return node[SentimentDependencyGraphNodeKeys.TAG].split('@')[0]
104 105
106 - def get_ctag(self,node):
107 """ 108 @param node: A node of a L{nltk.parse.dependencygraph.DependencyGraph} 109 @return: The coarse PoS-tag of the node 110 """ 111 return node[SentimentDependencyGraphNodeKeys.TAG].split(':')[0]
112 113
114 - def get_head(self,node):
116
117 - def get_address(self,node):
118 """ 119 @param node: A node of a L{nltk.parse.dependencygraph.DependencyGraph} 120 @return: The position in the sentence of the node. Zero is the root node 121 """ 122 return node[SentimentDependencyGraphNodeKeys.ADDRESS]
123 124
125 - def get_word(self,node):
126 """ 127 @return: The word of the node 128 """ 129 if self.is_root_node(node): 130 return self.ROOT_WORD 131 else: 132 return node[SentimentDependencyGraphNodeKeys.WORD]
133
134 - def get_lexical_category(self,node):
135 """ 136 @param node:A node of a L{nltk.parse.dependencygraph.DependencyGraph} 137 @return: The lexical category of the node 138 """ 139 return self.get_tag(node).split(':')[0]
140
141 - def is_leaf(self,node):
142 """ 143 @param node: A node of a DependencyGraph 144 @return: True if is a leaf node, False otherwise 145 """ 146 return self.get_deps(node) == []
147
148 - def is_root_node(self,node):
149 """ 150 @param node: A node of a L{nltk.parse.dependencygraph.DependencyGraph} 151 @return: True if is the root of the dependency graph, False otherwise 152 """ 153 return self.get_address(node) == 0
154
155 - def is_negation_node(self,node):
156 """ 157 @param node: A node of a L{nltk.parse.dependencygrpah.DependencyGraph} 158 @return True if it's a negation node, False otherwise. 159 """ 160 word = self.get_word(node).lower() 161 rel = self.get_rel(node) 162 163 if (word == 'no' and rel in ['mod','neg']) or (word == 'nunca' and rel != 'S') or (word == 'sin'): 164 return True 165 return False
166 167
168 - def is_intensifier(self,node,dictionary):
169 """ 170 @param node: A node of a L{nltk.parse.dependencygraph.DependencyGraph} 171 @param dictionary: An instance of L{Dictionary} 172 @return: True if word is an intensifier, false otherwise 173 """ 174 intensifier_rel = ['spec','espec','cc','sadv','f'] 175 intensifier_categ = ['r','f'] 176 lexical_category = self.get_lexical_category(node) 177 lemma = dictionary.get_lemma(lexical_category, self.get_word(node)) 178 return (self.get_rel(node) in intensifier_rel 179 and self.get_lexical_category(node) in intensifier_categ 180 and dictionary.is_intensifier_term(lemma))
181
182 - def is_emoticon(self,node):
183 """ 184 @param node: A node of a L{nltk.parse.dependencygraph.DependencyGraph} 185 @return True if the node is an emoticon, False otherwise 186 """ 187 return self.get_rel(node) == 'art_rel_emoticon'
188 189
190 - def is_artificial_node(self,node):
191 """ 192 @param node: A node of a L{nltk.parse.dependencygraph.DependencyGraph} 193 @return: True if node was created artificially by L{src.model.parser.Parser}, False otherwise 194 """ 195 return self.get_rel(node) == 'art_rel_adversative'
196 197 198 # def get_subgraph(self,new_root_node): 199 # difference = self.get_address(new_root_node) 200 # 201 # subgraph = SentimentDependencyGraph() 202 # return SentimentDependencyGraph() 203
204 - def _nodes_in_graph(self,node):
205 206 list_nodes = [] 207 if self.is_leaf(node): 208 return [node] 209 else: 210 if not self.is_root_node(node): 211 list_nodes = [node] 212 list_children_node = [self.get_by_address(address) 213 for address in self.get_deps(node)] 214 for child_node in list_children_node: 215 list_nodes.extend(self._nodes_in_graph(child_node)) 216 return list_nodes 217 218
219 - def graph_to_string(self,node):
220 """ 221 @param A node of a L{SentimentDependencyGraph} 222 @return A string. The raw phrase which starts in the node. 223 """ 224 sorted_nodes = sorted(self._nodes_in_graph(node), key= lambda d: d['address'],reverse=False); 225 return ' '.join([self.get_word(node) for node in sorted_nodes])
226 227
228 - def number_of_nodes(self, node):
229 """ 230 @param node: A node of a L{SentimentDependencyGraph} 231 @return An integer. The number of the nodes starting in node 232 """ 233 number_nodes = 0; 234 if self.is_leaf(node): 235 return 1 236 else: 237 if not self.is_root_node(node): 238 number_nodes = 1 239 list_children_node = [self.get_by_address(address) 240 for address in self.get_deps(node)] 241 for child_node in list_children_node: 242 number_nodes+=self.number_of_nodes(child_node) 243 return number_nodes
244
245 - def level(self,address,level=1):
246 """ 247 @param dg: A L{nltk.parse.dependencygraph.DependencyGraph} instance 248 @param address: An integer representing the identifier of a node of dg 249 @param level: Initial level of the node, before recursive calls of level function 250 @return:The level of a node in a dependency graph 251 """ 252 try: 253 head_node = self.get_by_address(self._hd(address)) 254 except: 255 return level 256 if self.is_root_node(head_node): 257 return level 258 else: 259 return self.level(self.get_address(head_node), level+1)
260 261 262
263 - def dg_to_json(self, node_address, dictionary):
264 """ 265 @param node_address: An integer. The identifier of a node of a L{SentimentDependencyGraph} 266 @param dictionary: An instance of L{Dictionary} 267 """ 268 269 node = self.get_by_address(node_address) 270 sucessors = [] 271 node_dict = {SentimentJSONKeys.ADDRESS: self.get_address(node), 272 SentimentJSONKeys.WORD:self.get_word(node), 273 SentimentJSONKeys.POSTAG: self.get_tag(node), 274 SentimentJSONKeys.DEPENDENCY_TYPE: self.get_rel(node), 275 SentimentJSONKeys.CHILDREN: sucessors, 276 SentimentJSONKeys.IS_NEGATION: self.is_negation_node(node), 277 SentimentJSONKeys.IS_INTENSIFIER: self.is_intensifier(node,dictionary), 278 SentimentJSONKeys.SEMANTIC_ORIENTATION: node[SentimentDependencyGraphNodeKeys.SEMANTIC_ORIENTATION] if 279 SentimentDependencyGraphNodeKeys.SEMANTIC_ORIENTATION in node else None} 280 281 if self.is_leaf(node): 282 return node_dict 283 else: 284 children = self.get_deps(node) 285 for child in children: 286 sucessors.append(self.dg_to_json(child,dictionary)) 287 node_dict[SentimentJSONKeys.CHILDREN] = sucessors 288 return node_dict
289