'''
Created on 11/04/2013

@author: David Vilares
'''
from miopia.analyzer.SemanticCategory import SemanticCategory


class Analyzer(object):
    '''
    Analyzer is an abstract class which defines some methods for the 
    L{SentimentAnalyzer}.
    '''
#    ROOT_WORD = "ROOT_WORD"
    
    def __init__(self, parser, dictionaries,preprocessor, 
                 lexical_processor):
        '''
        @param parser: An instance of L{Parser}
        @param dictionarites: An instance of L{Dictionary}
        @param preprocessor: An instance of L{PreProcessor}. 
        @param lexical_processor: An instance of L{LexicalProcessor}. 
        '''
        self._preprocessor = preprocessor
        self._lexical_processor = lexical_processor
        self._parser = parser
        self._dictionaries = dictionaries

    def analyze_from_conll(self,file_path,**kwargs):
        """
        @param file_path: Path to the parsed file in the CoNLL format to be analysed
        """
        raise NotImplementedError
    
    def analyze_from_plain_file(self, file_path,input_encoding='utf-8'):
        """
        @param file_path: Path to the plain file to be analysed
        @param input_encoding: The encoding of the input file
        """
        raise NotImplementedError

    def analyze_dir(self,dir_path,input_encoding='utf-8'):
        """
        It analyzes a directory of plain texts
        @param dir_path: Path to the directory of plain files to be analysed
        @param input_encoding: The encoding of the input file
        """
        raise NotImplementedError
    
    def analyze(self, text):
        """
        @param text: The string to be analysed. Use unicode.
        """
        raise NotImplementedError

    def _preanalyze(self,text):
        """
        It applies pre-processing, segmentation, tokenization and
        PoS-taggins steps to the string to be analysed. 
        @param text: The string to be analysed
        @return: A tuple. First element is a list of tagged sentences. 
        Each tagged sentence is a list of tuples (token,L{InfoTag})
        """
        preprocessed_text = self._preprocessor.preprocess(text)
        sentences =self._lexical_processor.extract_sentences(preprocessed_text)
        (tokens,lsi) = self._lexical_processor.extract_tokens(sentences)
        return self._lexical_processor.extract_tags(tokens),lsi


    def _is_weka_reserved_element(self,node):
        """
        It determines if a node is a WEKA reserved symbol
        
        @param node: A node of a L{SentimentDependencyGraph}
        @return: True if node word is a WEKA reserved element, False otherwise
        """
        return self.get_word(node) in ['\'']
    
    
    def _contain_invalid_token(self,node):
        """
        @param node: A node of a L{SentimentDependencyGraph}
        @return True if node word contains a reserved WEKA symbol, False otherwise
        """
        
        try:
            return ('\'' in self.get_word(node) or '|' in self.get_word(node) 
                    or '\\' in self.get_word(node))
        except:
            False


    def _word_contain_invalid_token(self,str_word):
        """
        @param str_word: A string. A word.
        @return True if node word contains a reserved WEKA symbol, False otherwise
        """
        try:
            return ('\'' in str_word or '|' in str_word 
                    or '\\' in str_word)
        except:
            False
        
    
    def negation_node(self,dg,node):
        """
        @param node: A node of a L{SentimentDependencyGraph}
        @param dg: An instance of a L{SentimentDependencyGraph}
        @return: A value in {neg,neg_sin} if word is 'no' or 'nunca', 'neg_sin' if word is 'sin' returns and
        None otherwise
        """
        
        if dg.get_word(node) == 'sin':
            return SemanticCategory.NEGATION_WITHOUT
        
        children_nodes = map(dg.get_by_address,dg.get_deps(node))
        for child in children_nodes:
            word = dg.get_word(child).lower()
            rel = dg.get_rel(child)
            if (word == 'no' and rel in ['mod','neg']) or (word == 'nunca' and rel != 'S') :
                return SemanticCategory.NEGATION
        return None
    
    
    def get_semantic_category(self,dg,node):
        """
        Provides information to call the correct visit function
        @param dg: An instance of a L{SentimentDependencyGraph}
        @param node: A node of a L{SentimentDependencyGraph}
        @return: The "semantic" category of word: If is a negation returns a value in {'neg','neg_sin'},
        if is an intensifier returns 'i'. If is an artificial adversative node returns 'adversative' and
        otherwise returns lexical category of node word. 
        """
        
        #Checking if is a negation
        type_neg = self.negation_node(dg,node)
        if type_neg != None:
            return type_neg
        if dg.is_intensifier(node, self._dictionaries):
            return SemanticCategory.INTENSIFIER
        if dg.is_emoticon(node):
            return SemanticCategory.EMOTICON
        if dg.is_artificial_node(node):         
            switch = {"art_adversative": SemanticCategory.SUBORDINATE_ADVERSATIVE
                      } 
            return switch[dg.get_lexical_category(node)]
                
        switch_lexical_pos = {"n": SemanticCategory.NOUN,
                              "a": SemanticCategory.ADJECTIVE,
                              "r": SemanticCategory.ADVERB,
                              "v": SemanticCategory.VERB}
        try:    
            return switch_lexical_pos[dg.get_lexical_category(node)]
        except:
            return SemanticCategory.OTHER
        