#-*- coding: utf-8 -*-

'''
@author: David Vilares Calvo
'''
import re
from miopia.preprocessor.PreProcessorDecorator import PreProcessorDecorator
from time import time


class HashTagProcessor(PreProcessorDecorator):
    '''
    classdocs
    '''
    
    def __init__(self,component, remove_bordering=False):
        '''
        Constructor
        @param component: An instance defined by the interface L{PreProcessorI}
        '''
        self._component = component
        self._remove_bordering = remove_bordering
        self._hashtag_pattern = re.compile(r'(#\w{1,})') 
    
        
    def preprocess(self,text):
        """
        @param text: A tweet as a string
        @return A hashtag processed tweet
        """
        return self._component.preprocess(self._process_hashtag(text))
    
    
    def is_bordering_hashtag(self,text,hashtag,nocurrence):
        """
        @return: 0 if is not a bordering hashtag, -1 if it's an initial hashtag and 1 if it's an ending hashtag
        """
        #TODO: Repeated hashtags
        text_splitted = text.split()
        code = -1
        for token in text_splitted:
            if not token.startswith('#'):
                code = 0
#                if code == 1: break 
            if (token.lower() == hashtag.lower()):
                if code == -1 :
                    return code
                else:
                    code = 1
        return code
    


    def _process_hashtag(self,text):
        """
        @param text: A tweet. It is a String
        Eliminate the complete hashtag if there is at the beginning
        or the end of the text. Otherwise, only delete '#' symbol.
        Special hashtags as #FF always represented without #, never deleted.
        """
        set_special_hashtags = (['#FF','#ff'])
        #TODO: #ff (and #FF) capitalised but not must count as an intensifier
#        start = time()
        #hashtags = re.findall(r'(#\w{1,})',text)
        hashtags = self._hashtag_pattern.findall(text)
        initial_text = text
        first_ending_hashtag = True
        
        ocurrences = {hashtag:0 for hashtag in hashtags}
        for hashtag in hashtags:
            ocurrences[hashtag]+=1

            is_bordering = self.is_bordering_hashtag(text, hashtag,ocurrences[hashtag])
            if is_bordering:
                if not self._remove_bordering:
                    text= self._build_new_text(text, hashtag, hashtag[1:], maxreplace=1)
                    if first_ending_hashtag and is_bordering ==1: 
                        first_ending_hashtag = False
                else:
                    text = text.replace(hashtag,'')
            else:
                if hashtag in set_special_hashtags:
                    text = text.replace(hashtag,hashtag[1:].upper()) 
                else: 
                    text = text.replace(hashtag, hashtag[1:])
        return text    
    
    

#    def _process_hashtag(self,text):
#        """
#        @param text: A tweet. It is a String
#        Eliminate the complete hashtag if there is at the beginning
#        or the end of the text. Otherwise, only delete '#' symbol.
#        Special hashtags as #FF always represented without #, never deleted.
#        """
#        set_special_hashtags = (['#FF','#ff'])
#        #TODO: #ff (and #FF) capitalised but not must count as an intensifier
##        start = time()
#        #hashtags = re.findall(r'(#\w{1,})',text)
#        hashtags = self._hashtag_pattern.findall(text)
#        initial_text = text
#        for hashtag in hashtags:
#            print "proof: ",self.is_bordering_hashtag(text, hashtag),"|" , hashtag, "|" ,text
#            if ((text.startswith(hashtag) or text.endswith(hashtag))
#                 and hashtag.upper() not in set_special_hashtags):
##                if not self._remove_bordering:
##                    text = text.replace(hashtag,self._build_new_sentence(hashtag[1:]))
##                else:
##                    print "entra aqui"
##                    text = text.replace(hashtag,'')
#                pass
#            else:
#                if hashtag in set_special_hashtags:
#                    text = text.replace(hashtag,hashtag[1:].upper()) 
#                else: 
#                    text = text.replace(hashtag, hashtag[1:])
##        print initial_text
##        print text
#        return text
#        