Package miopia :: Package preprocessor :: Module URLPreProcessor
[hide private]
[frames] | no frames]

Source Code for Module miopia.preprocessor.URLPreProcessor

 1  ''' 
 2  Created on 30/01/2013 
 3   
 4  @author: David Vilares Calvo 
 5  ''' 
 6  import re 
 7  from miopia.preprocessor.PreProcessorDecorator import PreProcessorDecorator 
 8   
9 -class URLPreProcessor(PreProcessorDecorator):
10 ''' 11 classdocs 12 ''' 13 14 SYMBOLIC_URL = "SymbolicURL" 15
16 - def __init__(self,component):
17 ''' 18 Constructor 19 ''' 20 self._component = component 21 self._url_pattern = re.compile(r'http[s]?://[\w./]{1,}')
22
23 - def preprocess(self,text):
24 ptext = self._url_symbolic_replace(text) 25 return self._component.preprocess(ptext)
26 27
28 - def _url_symbolic_replace(self,text):
29 #urls = re.findall(r'http://[\w./]{1,}',text) 30 urls = set(self._url_pattern.findall(text)) 31 for url in urls: 32 text = self._build_new_text(text,url,self.SYMBOLIC_URL) 33 return text
34 35 36 # def _url_symbolic_replace(self,text): 37 # #urls = re.findall(r'http://[\w./]{1,}',text) 38 # urls = self._url_pattern.findall(text) 39 # for url in urls: 40 # text = text.replace(url,". SymbolicURL .") 41 # 42 # return text 43