Package miopia :: Package preprocessor :: Module StopwordPreProcessor
[hide private]
[frames] | no frames]

Source Code for Module miopia.preprocessor.StopwordPreProcessor

 1  ''' 
 2  Created on 06/02/2013 
 3   
 4  @author: David Vilares Calvo 
 5  ''' 
 6   
 7  from miopia.preprocessor.PreProcessorDecorator import PreProcessorDecorator 
 8  import nltk 
 9   
10 -class StopwordPreProcessor(PreProcessorDecorator):
11 ''' 12 classdocs 13 ''' 14 15
16 - def __init__(self,component,lang='spanish'):
17 ''' 18 Constructor 19 ''' 20 self._component = component 21 self._stopwords = set(nltk.corpus.stopwords.words(lang))
22
23 - def preprocess(self,text):
24 25 text_splitted = text.split(' ') 26 for stopword in self._stopwords: 27 text_splitted = self._remove_all(text_splitted,stopword) 28 29 return self._component.preprocess(' '.join(text_splitted))
30
31 - def _remove_all(self,list,val):
32 return [element for element in list if element != val]
33