Home | Trees | Indices | Help |
---|
|
1 #-*- coding: utf-8 -*- 2 3 ''' 4 @author: David Vilares Calvo 5 ''' 6 import re 7 from miopia.preprocessor.PreProcessorDecorator import PreProcessorDecorator 8 911 ''' 12 classdocs 13 ''' 14 15 ''' 16 Constructor 17 @param component: An instance defined by the interface L{PreProcessorI} 18 '''5520 self._component = component 21 self._rt_and_user_pattern = re.compile('RT[ ]*@{1}[\w:]{1,}') 22 self._user_pattern = re.compile('@\w')23 24 2527 """ 28 Preprocess twitter usernames 29 @param text: A tweet. It is a String 30 """ 31 32 ptext = self._twitter_names_transformation(text) 33 return self._component.preprocess(ptext)34 3537 """ 38 @param text. A tweet. It is a String 39 """ 40 41 #Deleting RT @username ocurrences 42 #rt_user_names = re.findall('RT[ ]*@{1}[\w:]{1,}',text) 43 rt_user_names = self._rt_and_user_pattern.findall(text) 44 for rt_user_name in rt_user_names: 45 text = text.replace(rt_user_name,'') 46 47 #TODO: Delete consecutively user names 48 49 #Changing @username ocurrences by only Username 50 #user_names = re.findall('@\w',text) 51 user_names = self._user_pattern.findall(text) 52 for name in user_names: 53 text = text.replace(name[0:],name[1].upper()) 54 return text
Home | Trees | Indices | Help |
---|
Generated by Epydoc 3.0.1 on Wed Oct 15 10:03:40 2014 | http://epydoc.sourceforge.net |