1 '''
2 Created on 11/04/2013
3
4 @author: David Vilares
5 '''
6 from miopia.analyzer.SemanticCategory import SemanticCategory
7
8
10 '''
11 Analyzer is an abstract class which defines some methods for the
12 L{SentimentAnalyzer}.
13 '''
14
15
16 - def __init__(self, parser, dictionaries,preprocessor,
17 lexical_processor):
18 '''
19 @param parser: An instance of L{Parser}
20 @param dictionarites: An instance of L{Dictionary}
21 @param preprocessor: An instance of L{PreProcessor}.
22 @param lexical_processor: An instance of L{LexicalProcessor}.
23 '''
24 self._preprocessor = preprocessor
25 self._lexical_processor = lexical_processor
26 self._parser = parser
27 self._dictionaries = dictionaries
28
30 """
31 @param file_path: Path to the parsed file in the CoNLL format to be analysed
32 """
33 raise NotImplementedError
34
36 """
37 @param file_path: Path to the plain file to be analysed
38 @param input_encoding: The encoding of the input file
39 """
40 raise NotImplementedError
41
43 """
44 It analyzes a directory of plain texts
45 @param dir_path: Path to the directory of plain files to be analysed
46 @param input_encoding: The encoding of the input file
47 """
48 raise NotImplementedError
49
51 """
52 @param text: The string to be analysed. Use unicode.
53 """
54 raise NotImplementedError
55
57 """
58 It applies pre-processing, segmentation, tokenization and
59 PoS-taggins steps to the string to be analysed.
60 @param text: The string to be analysed
61 @return: A tuple. First element is a list of tagged sentences.
62 Each tagged sentence is a list of tuples (token,L{InfoTag})
63 """
64 preprocessed_text = self._preprocessor.preprocess(text)
65 sentences =self._lexical_processor.extract_sentences(preprocessed_text)
66 (tokens,lsi) = self._lexical_processor.extract_tokens(sentences)
67 return self._lexical_processor.extract_tags(tokens),lsi
68
69
71 """
72 It determines if a node is a WEKA reserved symbol
73
74 @param node: A node of a L{SentimentDependencyGraph}
75 @return: True if node word is a WEKA reserved element, False otherwise
76 """
77 return self.get_word(node) in ['\'']
78
79
81 """
82 @param node: A node of a L{SentimentDependencyGraph}
83 @return True if node word contains a reserved WEKA symbol, False otherwise
84 """
85
86 try:
87 return ('\'' in self.get_word(node) or '|' in self.get_word(node)
88 or '\\' in self.get_word(node))
89 except:
90 False
91
92
94 """
95 @param str_word: A string. A word.
96 @return True if node word contains a reserved WEKA symbol, False otherwise
97 """
98 try:
99 return ('\'' in str_word or '|' in str_word
100 or '\\' in str_word)
101 except:
102 False
103
104
106 """
107 @param node: A node of a L{SentimentDependencyGraph}
108 @param dg: An instance of a L{SentimentDependencyGraph}
109 @return: A value in {neg,neg_sin} if word is 'no' or 'nunca', 'neg_sin' if word is 'sin' returns and
110 None otherwise
111 """
112
113 if dg.get_word(node) == 'sin':
114 return SemanticCategory.NEGATION_WITHOUT
115
116 children_nodes = map(dg.get_by_address,dg.get_deps(node))
117 for child in children_nodes:
118 word = dg.get_word(child).lower()
119 rel = dg.get_rel(child)
120 if (word == 'no' and rel in ['mod','neg']) or (word == 'nunca' and rel != 'S') :
121 return SemanticCategory.NEGATION
122 return None
123
124
156