Package miopia :: Package preprocessor :: Module TwitterUserNameProcessor
[hide private]
[frames] | no frames]

Source Code for Module miopia.preprocessor.TwitterUserNameProcessor

 1  #-*- coding: utf-8 -*- 
 2   
 3  ''' 
 4  @author: David Vilares Calvo 
 5  ''' 
 6  import re 
 7  from miopia.preprocessor.PreProcessorDecorator import PreProcessorDecorator 
 8   
 9   
10 -class TwitterUserNameProcessor(PreProcessorDecorator):
11 ''' 12 classdocs 13 ''' 14 15 ''' 16 Constructor 17 @param component: An instance defined by the interface L{PreProcessorI} 18 '''
19 - def __init__(self,component):
20 self._component = component 21 self._rt_and_user_pattern = re.compile('RT[ ]*@{1}[\w:]{1,}') 22 self._user_pattern = re.compile('@\w')
23 24 25
26 - def preprocess(self,text):
27 """ 28 Preprocess twitter usernames 29 @param text: A tweet. It is a String 30 """ 31 32 ptext = self._twitter_names_transformation(text) 33 return self._component.preprocess(ptext)
34 35
36 - def _twitter_names_transformation(self,text):
37 """ 38 @param text. A tweet. It is a String 39 """ 40 41 #Deleting RT @username ocurrences 42 #rt_user_names = re.findall('RT[ ]*@{1}[\w:]{1,}',text) 43 rt_user_names = self._rt_and_user_pattern.findall(text) 44 for rt_user_name in rt_user_names: 45 text = text.replace(rt_user_name,'') 46 47 #TODO: Delete consecutively user names 48 49 #Changing @username ocurrences by only Username 50 #user_names = re.findall('@\w',text) 51 user_names = self._user_pattern.findall(text) 52 for name in user_names: 53 text = text.replace(name[0:],name[1].upper()) 54 return text
55