'''
Created on 30/01/2013

@author: David Vilares Calvo
'''
import re
from miopia.preprocessor.PreProcessorDecorator import PreProcessorDecorator

class URLPreProcessor(PreProcessorDecorator):
    '''
    classdocs
    '''

    SYMBOLIC_URL = "SymbolicURL"

    def __init__(self,component):
        '''
        Constructor
        '''
        self._component = component
        self._url_pattern = re.compile(r'http[s]?://[\w./]{1,}')
        
    def preprocess(self,text):
        ptext = self._url_symbolic_replace(text)
        return self._component.preprocess(ptext)


    def _url_symbolic_replace(self,text):
        #urls = re.findall(r'http://[\w./]{1,}',text)
        urls = set(self._url_pattern.findall(text))
        for url in urls:
            text = self._build_new_text(text,url,self.SYMBOLIC_URL)
        return text

    
#    def _url_symbolic_replace(self,text):
#        #urls = re.findall(r'http://[\w./]{1,}',text)
#        urls = self._url_pattern.findall(text)
#        for url in urls:
#            text = text.replace(url,". SymbolicURL .")
#        
#        return text