Package miopia :: Package analyzer :: Package counter :: Module DependencyTripletsCounter
[hide private]
[frames] | no frames]

Source Code for Module miopia.analyzer.counter.DependencyTripletsCounter

 1  ''' 
 2  Created on 25/02/2014 
 3   
 4  @author: david.vilares 
 5  ''' 
 6   
 7  from miopia.analyzer.counter.SintacticCounter import SintacticCounter 
 8  from miopia.adapter.Feature import FeatureLevelBackOff 
 9  from miopia.adapter.Feature import FeatureTypeConfiguration 
10  from miopia.util.TokenInfo import TokenInfo 
11  import itertools 
12   
13   
14 -class DependencyTripletsCounter(SintacticCounter):
15 ''' 16 This Counter is responsible of counting dependency triplets present in a 17 L{SentimentDependencyGraph} 18 ''' 19 20
21 - def __init__(self, ftc, back_off,stop_words=set([])):
22 ''' 23 @param ftc: An instance of L{FeatureTypeConfiguration} 24 @param back_off: An instance of L{BackOff} 25 @param stop_words: A set of words that shoudn't be taken into account 26 ''' 27 super(DependencyTripletsCounter,self).__init__(ftc) 28 self._stop_words = stop_words 29 self._back_off = back_off
30 31
32 - def _count_graph(self,textid,dg,address):
33 """ 34 @param textid: A string. The identifier of the text. 35 @param dg: An instance of L{SentimentDependencyGraph} 36 @param address: An integer. The node identifier in the graph. 37 """ 38 node = dg.get_by_address(address) 39 if dg.is_leaf(node): 40 return {} 41 else: 42 dict_dep = {} 43 children = dg.get_deps(node) 44 for child in children: 45 child_node = dg.get_by_address(child) 46 47 tih = TokenInfo(dg.get_word(node),None, dg.get_ctag(node),dg.get_tag(node)) 48 tid = TokenInfo(dg.get_word(child_node),None, 49 dg.get_ctag(child_node),dg.get_tag(child_node)) 50 head_value = self._back_off.back_off(tih, self._ftc.get_back_off_head()) 51 dependent_value = self._back_off.back_off(tid, self._ftc.get_back_off_dependent()) 52 53 54 if type(head_value) != type([]): 55 head_value = [head_value] 56 if type(dependent_value) != type([]): 57 dependent_value = [dependent_value] 58 59 list_pairs_head_dependent = itertools.product(head_value, 60 dependent_value) 61 for pair in list_pairs_head_dependent: 62 if pair[0] in self._stop_words or pair[1] in self._stop_words: 63 continue 64 65 if self._ftc.get_add_dependency_type() is True: 66 dependency_triplet = self._id_of_feature(textid,address,pair[0]+"-"+dg.get_rel(child_node)+"-"+pair[1]) 67 else: 68 dependency_triplet = self._id_of_feature(textid,address,pair[0]+"-"+pair[1]) 69 70 if dict_dep.has_key(dependency_triplet): 71 dict_dep[dependency_triplet] += 1. 72 else: 73 dict_dep[dependency_triplet] = 1. 74 75 dict_child = self._count_graph(textid,dg,child) 76 dict_dep = dict((n, dict_dep.get(n,0)+dict_child.get(n,0)) for n in set(dict_dep)| set(dict_child)) 77 return (dict_dep)
78