'''
Created on 19/05/2014

@author: david.vilares
'''

import re
from miopia.analyzer.counter.RawCounter import RawCounter

class UnGrammaticalCounter(RawCounter):
    '''
    This counter counts some ungrammatical phenomena thta may be present in a text.
    '''
    
    REPLICATIONS= "REPLICATIONS"
    HASHTAGS="HASHTAGS"
    WORDS_CAPILATISED="WORDS_CAPITALISED"


    def __init__(self,ftc):
        '''
        @param ftc: An instance of L{FeatureTypeConfiguration}
        '''
        super(UnGrammaticalCounter,self).__init__(ftc)
        self._repeat_regexp = re.compile(r'(\w*)(\w)\2(\w*)')
        self._word_regexp = re.compile(r'[a-zA-Z_]')
        self._phenomena = set([self.REPLICATIONS, self.HASHTAGS,
                               self.WORDS_CAPILATISED])
    
    def raw_processing(self, list_text_info):
        raise NotImplementedError
        
        
    def _count(self, list_text_info):
        """
        @param list_text_info: A list of L{TextInfo} objects
        """
        dict_features = {}
        for text_info in list_text_info:
            textid = text_info.get_textid() 
            text_tokens = text_info.get_text().split()
            
            dict_phenomena= {phenomenon:0 for phenomenon in self._phenomena}
            for token in text_tokens:
                
                #THERE IS A CAPITALISED WORD
                if (token.upper() == token and self._word_regexp.findall(token) !=[]
                    and len(token) > 1):
                    dict_phenomena[self.WORDS_CAPILATISED] = dict_phenomena.get(self.WORDS_CAPILATISED, 0)+1
                #THERE IS A HASHTAG
                if token.startswith('#'):
                    dict_phenomena[self.HASHTAGS] = dict_phenomena.get(self.HASHTAGS, 0)+1
                    
            for phenomenon in self._phenomena:
                try:
                    dict_features[self._id_of_feature(textid, -1,phenomenon)]+=dict_phenomena[phenomenon]
                except KeyError:
                    dict_features[self._id_of_feature(textid, -1,phenomenon)]=dict_phenomena[phenomenon]
            print text_info.get_text(),dict_phenomena
        return dict_features
    