'''
Created on 13/03/2014

@author: David Vilares
'''

from miopia.analyzer.counter.RawCounter import RawCounter
from miopia.util.TokenInfo import TokenInfo

class LexiconCounter(RawCounter):
    
    '''
    Abstract class to manage counters which use external knowledge to obtain
    features for the supervised classifier.
    '''

    def __init__(self, ftc, preprocessor, lexical_processor,
                 dict_lexicon, lowercase =True):
        
        '''
        @param ftc: An instance of L{FeatureTypeConfiguration}
        @param preprocessor: An instance of L{PreprocessorI}
        @param lexical_processor: An instance of L{LexicalProcessor}
        @param dict_lexicon: A nested dictionary {key:[Category,{key:[ ... ]}]}
        @param lowercase: A boolean. True to ignore capitalised characters.
        '''
        super(LexiconCounter,self).__init__(ftc,lowercase)
        self._preprocessor = preprocessor
        self._lexical_processor = lexical_processor
        self._dict_lexicon = dict_lexicon


    def _count(self,list_text_info):
        """
        @param list_text_info: A list of L{TextInfo} objects
        """
        dict_features = {}
        tags = self.raw_processing(list_text_info)

        list_textid_list_token_info =[]
        for textid, list_sentences_word_tag in tags:
            l_tokeninfo_text = []
            for sentence_word_tag in list_sentences_word_tag:
                l_tokeninfo_text.extend([TokenInfo(self._filter(word),None, infotag.get_cpostag(), infotag.get_postag()) 
                                         for word,infotag in sentence_word_tag])
            list_textid_list_token_info.append((textid, l_tokeninfo_text))

        for textid, list_token_info in list_textid_list_token_info:
            abstractions = self._find_values(textid,list_token_info[0], 
                                                  list_token_info[1:],1,1,
                                                  self._dict_lexicon)
            for abstraction in abstractions:
                try:
                    dict_features[abstraction]+= abstractions[abstraction]
                except KeyError:
                    dict_features[abstraction]= abstractions[abstraction]
        return dict_features


    def _is_terminal_value(self, dict_expressions, list_token_info):
        return (len(dict_expressions) == 0 or 
                (list_token_info != [] and not dict_expressions.has_key(list_token_info[0].get_form())))

    def _find_values(self, textid,token_info, sublist_token_info, 
                          initial_position, current_position, subdictionary):
        
        try:
            
            values = subdictionary[token_info.get_form()]
            d = {}
            values_non_dict = [value for value in values if type(value)!=type({})]
            
            values_dict = {}
            for value in values:
                if type(value) == type({}):
                    values_dict.update(value)

            if self._is_terminal_value(values_dict, sublist_token_info): #It's the longest match
                for value in values_non_dict:
                    d = self._get_values(d,textid,initial_position,value)
            if sublist_token_info != []: #Taking next token
                sub_d = self._find_values(textid,sublist_token_info[0],sublist_token_info[1:],
                                               initial_position, current_position+1,values_dict)
                for key in sub_d:
                    try:
                        d[key]+= sub_d[key]
                    except KeyError:
                        d[key]= sub_d[key]
            return d

        except KeyError:
            #SALE pasamos al siguiente tokeninfo para buscar sus abstracciones
            if sublist_token_info == []:
                return {}
            else:
                return self._find_values(textid,sublist_token_info[0],sublist_token_info[1:],
                                              current_position+1,current_position+1,
                                              self._dict_lexicon)


        