'''
Created on 06/02/2013

@author: David Vilares Calvo
'''

from miopia.preprocessor.PreProcessorDecorator import PreProcessorDecorator
import nltk

class StopwordPreProcessor(PreProcessorDecorator):
    '''
    classdocs
    '''


    def __init__(self,component,lang='spanish'):
        '''
        Constructor
        '''
        self._component = component
        self._stopwords = set(nltk.corpus.stopwords.words(lang))
        
    def preprocess(self,text):
        
        text_splitted = text.split(' ')    
        for stopword in self._stopwords:
            text_splitted = self._remove_all(text_splitted,stopword)
                
        return self._component.preprocess(' '.join(text_splitted))    
            
    def _remove_all(self,list,val):
        return [element for element in list if element != val]
        
        