Package miopia :: Package preprocessor :: Module InterjectionPreProcessor
[hide private]
[frames] | no frames]

Source Code for Module miopia.preprocessor.InterjectionPreProcessor

 1  ''' 
 2  Created on 06/02/2013 
 3   
 4  @author: David Vilares Calvo 
 5  ''' 
 6   
 7  from miopia.preprocessor.PreProcessorDecorator import PreProcessorDecorator 
 8  from miopia.util.exceptions.LanguageNotSupportedException import LanguageNotSupportedException 
 9  import re 
10   
11 -class InterjectionPreProcessor(PreProcessorDecorator):
12 ''' 13 classdocs 14 ''' 15 16 laugh_consonant='j' 17
18 - def __init__(self,component, lang='es'):
19 ''' 20 Constructor 21 ''' 22 self._component = component 23 if lang == 'en': 24 self.laugh_consonant = 'h' 25 elif lang == 'es': 26 self.laugh_consonant = 'j' 27 else: 28 raise LanguageNotSupportedException(lang)
29
30 - def preprocess(self,text):
31 """ 32 @param text: A String. 33 @return A processed string with normalised laughs 34 """ 35 return self._component.preprocess(self._normalize_laughs(text))
36 37
38 - def _normalize_laughs(self,text):
39 40 def normalise_laugh(vocal,text): 41 cons = self.laugh_consonant 42 43 regex = '['+cons+cons.upper()+vocal+vocal.upper()+']{4,}' 44 laughs = re.findall(regex,text) 45 normalised_cap_laugh = cons.upper()+vocal.upper()+cons.upper()+vocal.upper() 46 normalised_lower_laugh = cons+vocal+cons+vocal 47 for laugh in laughs: 48 #otherwise is not really a laugh 49 if laugh.find(cons) != -1 or laugh.find(cons.upper()) != -1: 50 if laugh.isupper(): 51 text = text.replace(laugh,normalised_cap_laugh) 52 else: 53 text = text.replace(laugh,normalised_lower_laugh) 54 return text
55 56 for vocal_laugh in ['a','e','i','o','u']: 57 text = normalise_laugh(vocal_laugh,text) 58 59 return text
60