1 '''
2 Created on 25/02/2014
3
4 @author: david.vilares
5 '''
6
7 from miopia.analyzer.counter.SintacticCounter import SintacticCounter
8 from miopia.adapter.Feature import FeatureLevelBackOff
9 from miopia.adapter.Feature import FeatureTypeConfiguration
10 from miopia.util.TokenInfo import TokenInfo
11 import itertools
12
13
15 '''
16 This Counter is responsible of counting dependency triplets present in a
17 L{SentimentDependencyGraph}
18 '''
19
20
21 - def __init__(self, ftc, back_off,stop_words=set([])):
22 '''
23 @param ftc: An instance of L{FeatureTypeConfiguration}
24 @param back_off: An instance of L{BackOff}
25 @param stop_words: A set of words that shoudn't be taken into account
26 '''
27 super(DependencyTripletsCounter,self).__init__(ftc)
28 self._stop_words = stop_words
29 self._back_off = back_off
30
31
33 """
34 @param textid: A string. The identifier of the text.
35 @param dg: An instance of L{SentimentDependencyGraph}
36 @param address: An integer. The node identifier in the graph.
37 """
38 node = dg.get_by_address(address)
39 if dg.is_leaf(node):
40 return {}
41 else:
42 dict_dep = {}
43 children = dg.get_deps(node)
44 for child in children:
45 child_node = dg.get_by_address(child)
46
47 tih = TokenInfo(dg.get_word(node),None, dg.get_ctag(node),dg.get_tag(node))
48 tid = TokenInfo(dg.get_word(child_node),None,
49 dg.get_ctag(child_node),dg.get_tag(child_node))
50 head_value = self._back_off.back_off(tih, self._ftc.get_back_off_head())
51 dependent_value = self._back_off.back_off(tid, self._ftc.get_back_off_dependent())
52
53
54 if type(head_value) != type([]):
55 head_value = [head_value]
56 if type(dependent_value) != type([]):
57 dependent_value = [dependent_value]
58
59 list_pairs_head_dependent = itertools.product(head_value,
60 dependent_value)
61 for pair in list_pairs_head_dependent:
62 if pair[0] in self._stop_words or pair[1] in self._stop_words:
63 continue
64
65 if self._ftc.get_add_dependency_type() is True:
66 dependency_triplet = self._id_of_feature(textid,address,pair[0]+"-"+dg.get_rel(child_node)+"-"+pair[1])
67 else:
68 dependency_triplet = self._id_of_feature(textid,address,pair[0]+"-"+pair[1])
69
70 if dict_dep.has_key(dependency_triplet):
71 dict_dep[dependency_triplet] += 1.
72 else:
73 dict_dep[dependency_triplet] = 1.
74
75 dict_child = self._count_graph(textid,dg,child)
76 dict_dep = dict((n, dict_dep.get(n,0)+dict_child.get(n,0)) for n in set(dict_dep)| set(dict_child))
77 return (dict_dep)
78