Package miopia :: Package analyzer :: Package counter :: Module DependencyTypeCounter
[hide private]
[frames] | no frames]

Source Code for Module miopia.analyzer.counter.DependencyTypeCounter

  1  ''' 
  2  Created on 25/02/2014 
  3   
  4  @author: david.vilares 
  5  ''' 
  6   
  7  from miopia.analyzer.counter.SintacticCounter import SintacticCounter 
  8  from miopia.util.TextInfo import TextInfo 
9 -class DependencyTypeCounter(SintacticCounter):
10 ''' 11 @warning: Not working properly 12 ''' 13 14
15 - def __init__(self, ftc,stop_dependencies=set([]), 16 dict_subgraph_classifiers_info=None):
17 ''' 18 Constructor 19 ''' 20 super(DependencyTypeCounter,self).__init__(ftc) 21 self._stop_dependencies = stop_dependencies 22 self._dict_subgraph_classifiers_info = dict_subgraph_classifiers_info
23 24
25 - def _count(self, list_text_info):
26 dict_features = {} 27 list_subgraphs_text_info = [] 28 29 # graphs = [(text_info.get_textid(),text_info.get_dependency_graphs()) 30 # for text_info in list_text_info] 31 for text_info in list_text_info: 32 textid = text_info.get_textid() 33 text_graphs = text_info.get_dependency_graphs() 34 list_subgraphs_text_info = [] 35 for graph,address in text_graphs: 36 dict_features_graph = self._count_graph(textid, graph, address) 37 38 for _,address,name in dict_features_graph: 39 subrange = self._get_subtext_range(graph, address) 40 subrange.sort(key= lambda t: t[0]) 41 subtext = ' '.join([word for address,word in subrange]) 42 new_text_info = TextInfo(textid+"_"+str(address),subtext,[(graph,address)], 43 text_info.get_metadata(),'?') 44 list_subgraphs_text_info.append(new_text_info) 45 46 subgraph_adapter = self._dict_subgraph_classifiers_info['DEFAULT'].get_adapter() 47 subgraph_classifier = self._dict_subgraph_classifiers_info['DEFAULT'].get_classifier() 48 subgraph_arff_header = self._dict_subgraph_classifiers_info['DEFAULT'].get_arff_header() 49 subgraph_dest_test_arff = self._dict_subgraph_classifiers_info['DEFAULT'].get_arff_path() 50 dict_position_instanceid = subgraph_adapter.to_arff(list_subgraphs_text_info, 51 subgraph_dest_test_arff, 52 subgraph_arff_header, True) 53 subgraph_results_file = self._dict_subgraph_classifiers_info['DEFAULT'].get_results_path() 54 55 list_id_category = subgraph_classifier.classify(subgraph_dest_test_arff, subgraph_results_file, 56 dict_position_instanceid) 57 print textid, text_info.get_category(),list_id_category 58 print [t.get_text() for t in list_subgraphs_text_info]
59 # for textid, text_graphs in graphs: 60 # for text_graph in text_graphs: 61 # self._count_graph(textid,text_graph,0) 62 # dict_graph= self._count_graph(textid,text_graph, 0) 63 # 64 ## subgraph_classifier, adapter = self._dict_subgraph_classifiers['XX'] 65 # 66 # 67 # for key in dict_graph.keys(): 68 # try: 69 # dict_features[key]+= 1 70 # except KeyError: 71 # dict_features[key] = 1 72
73 - def _get_subtext_range(self,dependency_graph,address):
74 node = dependency_graph.get_by_address(address) 75 form = dependency_graph.get_word(node) 76 if dependency_graph.is_leaf(node): 77 return [(address,form)] 78 else: 79 raw_nodes = [(address, form)] 80 children = dependency_graph.get_deps(node) 81 for child in children: 82 raw_nodes.extend(self._get_subtext_range(dependency_graph,child)) 83 return raw_nodes
84 85 # def _get_text_from_graph(self, dependency_graph, address): 86 # node = dependency_graph.get_by_address(address) 87 # if dependency_graph.is_leaf(node): 88 # return [node] 89 # else: 90 # raw_nodes = [node] 91 # children = dependency_graph.get_deps(node) 92 # for child in children: 93 # raw_nodes.extend(self._get_text_from_graph(dependency_graph,child)) 94 # return raw_nodes 95 96 97
98 - def _count_graph(self,textid,dg,address):
99 100 #print textid, dg, address 101 node = dg.get_by_address(address) 102 if dg.is_leaf(node): 103 return {self._id_of_feature(textid, address, dg.get_rel(node)):1} 104 else: 105 #print self._id_of_feature(textid, dg.get_rel(node)) 106 dict_features = {self._id_of_feature(textid, address,dg.get_rel(node)):1} 107 children = dg.get_deps(node) 108 for child in children: 109 dict_child = self._count_graph(textid,dg, child) 110 dict_features = dict((n, dict_features.get(n,0)+dict_child.get(n,0)) 111 for n in set(dict_features)| set(dict_child)) 112 return (dict_features)
113