1 '''
2 Created on 25/02/2014
3
4 @author: david.vilares
5 '''
6
7 from miopia.analyzer.counter.SintacticCounter import SintacticCounter
8 from miopia.util.TextInfo import TextInfo
10 '''
11 @warning: Not working properly
12 '''
13
14
15 - def __init__(self, ftc,stop_dependencies=set([]),
16 dict_subgraph_classifiers_info=None):
17 '''
18 Constructor
19 '''
20 super(DependencyTypeCounter,self).__init__(ftc)
21 self._stop_dependencies = stop_dependencies
22 self._dict_subgraph_classifiers_info = dict_subgraph_classifiers_info
23
24
25 - def _count(self, list_text_info):
26 dict_features = {}
27 list_subgraphs_text_info = []
28
29
30
31 for text_info in list_text_info:
32 textid = text_info.get_textid()
33 text_graphs = text_info.get_dependency_graphs()
34 list_subgraphs_text_info = []
35 for graph,address in text_graphs:
36 dict_features_graph = self._count_graph(textid, graph, address)
37
38 for _,address,name in dict_features_graph:
39 subrange = self._get_subtext_range(graph, address)
40 subrange.sort(key= lambda t: t[0])
41 subtext = ' '.join([word for address,word in subrange])
42 new_text_info = TextInfo(textid+"_"+str(address),subtext,[(graph,address)],
43 text_info.get_metadata(),'?')
44 list_subgraphs_text_info.append(new_text_info)
45
46 subgraph_adapter = self._dict_subgraph_classifiers_info['DEFAULT'].get_adapter()
47 subgraph_classifier = self._dict_subgraph_classifiers_info['DEFAULT'].get_classifier()
48 subgraph_arff_header = self._dict_subgraph_classifiers_info['DEFAULT'].get_arff_header()
49 subgraph_dest_test_arff = self._dict_subgraph_classifiers_info['DEFAULT'].get_arff_path()
50 dict_position_instanceid = subgraph_adapter.to_arff(list_subgraphs_text_info,
51 subgraph_dest_test_arff,
52 subgraph_arff_header, True)
53 subgraph_results_file = self._dict_subgraph_classifiers_info['DEFAULT'].get_results_path()
54
55 list_id_category = subgraph_classifier.classify(subgraph_dest_test_arff, subgraph_results_file,
56 dict_position_instanceid)
57 print textid, text_info.get_category(),list_id_category
58 print [t.get_text() for t in list_subgraphs_text_info]
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73 - def _get_subtext_range(self,dependency_graph,address):
74 node = dependency_graph.get_by_address(address)
75 form = dependency_graph.get_word(node)
76 if dependency_graph.is_leaf(node):
77 return [(address,form)]
78 else:
79 raw_nodes = [(address, form)]
80 children = dependency_graph.get_deps(node)
81 for child in children:
82 raw_nodes.extend(self._get_subtext_range(dependency_graph,child))
83 return raw_nodes
84
85
86
87
88
89
90
91
92
93
94
95
96
97
99
100
101 node = dg.get_by_address(address)
102 if dg.is_leaf(node):
103 return {self._id_of_feature(textid, address, dg.get_rel(node)):1}
104 else:
105
106 dict_features = {self._id_of_feature(textid, address,dg.get_rel(node)):1}
107 children = dg.get_deps(node)
108 for child in children:
109 dict_child = self._count_graph(textid,dg, child)
110 dict_features = dict((n, dict_features.get(n,0)+dict_child.get(n,0))
111 for n in set(dict_features)| set(dict_child))
112 return (dict_features)
113