'''
@author: David Vilares Calvo
'''
from AbstractTagger import AbstractTagger
from nltk.tag.sequential import DefaultTagger
from nltk.tag.sequential import AffixTagger
from nltk.tag import brill
from InfoTag import *

class BrillTagger(AbstractTagger):
    """
    Wrapper of the NLTK Brill tagger. It builds a Brill-based tagger.
    """
    def __init__(self,training_set,test_set, list_initial_taggers, 
                 back_off_tagger,fine_tag=True):
        """
        Constructor
        @param training_set: A list of [(word,tag)]
        @param test_test: A list of [(word,tag)]
        @param list_initial_taggers: A list of taggers of type L{nltk.tag.sequential}
        @param back_off_tagger: A back off tagger of initial tagger. None for not to include any back-off tagger
        @param fineTag: A boolean. True for using Fine PoS-tags, False for Coarse PoS-tags
        """
        initialTagger = self._backoff_tagger(training_set,list_initial_taggers, backoff=back_off_tagger)
        brillTagger= self._train_brill_tagger(initialTagger,training_set)
        self.__tagger = brillTagger
        self.__accuracy = self.__tagger.evaluate(test_set)

    def get_accuracy(self):
        """
        @return: The theoretical accuracy of the trained tagger
        """
        return self.__accuracy
    

#     def _get_backoff_tagger(self, fine_tag, t):
#         '''
#         @param fine_tag: A boolean. True for fine tags, False for coarse tags.
#         @param t: The training set, it's a list of (word,tags)
#         @return: A back-off affix tagger
#         '''
#         default_tagger = DefaultTagger(self._default_tag)
#         pre1_tagger = AffixTagger(t,affix_length=1,backoff=default_tagger)
#         pre2_tagger = AffixTagger(t,affix_length=2,backoff=pre1_tagger)
#         pre3_tagger = AffixTagger(t,affix_length=3,backoff=pre2_tagger)
#         pre4_tagger = AffixTagger(t,affix_length=4,backoff=pre3_tagger)
#         pre5_tagger = AffixTagger(t,affix_length=5,backoff=pre4_tagger)
#         suf3_tagger = AffixTagger(t,affix_length=-3,backoff=pre5_tagger)
#         suf4_tagger = AffixTagger(t,affix_length=-4,backoff=suf3_tagger)
#         suf5_tagger = AffixTagger(t,affix_length=-5,backoff=suf4_tagger)
#         
#         backoffTagger = suf5_tagger
#         return backoffTagger
    
    
    def _backoff_tagger(self,train_sets, tagger_classes, backoff=None):
        for cls in tagger_classes:
            backoff=cls(train_sets,backoff=backoff)
        return backoff


    def _train_brill_tagger(self,initial_tagger, train_sents, **kwargs):
        sym_bounds=[(1,1),(2,2),(1,2),(1,3)]
        asym_bounds=[(-1,1),(1,1)]
        templates=[
               brill.SymmetricProximateTokensTemplate(brill.ProximateTagsRule,*sym_bounds),
               brill.SymmetricProximateTokensTemplate(brill.ProximateWordsRule,*sym_bounds),
               brill.ProximateTokensTemplate(brill.ProximateTagsRule,*asym_bounds),
               brill.ProximateTokensTemplate(brill.ProximateWordsRule,*asym_bounds)   
               ]
        trainer = brill.FastBrillTaggerTrainer(initial_tagger,templates,deterministic=True)
        return trainer.train(train_sents, **kwargs)
    

    def _fine_tag_to_cposfeats(self,postag):
        """
        @param postag: A fine part-of-speech tag according with the regexp cpostag:nameofFeat_valueofFeat-nameofFeat-...
        @return An L{InfoTag}
        """
        feats=""
        #There are no feats
        if len(postag) ==1:
            return InfoTag(postag,postag,"_")
        else:
            cpostag = postag.split(":")[0]    
            featsList = postag.split(":")[1].split("-")
            for f in featsList:
                featkey = f.split("_")[0]
                featvalue = f.split("_")[1]
                feats= feats+featkey+"="+featvalue+"|"
            return InfoTag(cpostag,postag,feats[0:len(feats)-1])
    

    def tag(self, tokens):
        """
        @param tokens: A list of tokens. Each token is a string.
        @return: A list of tuples (word,L{InfoTag})
        """
        return self._tag(tokens)
    

    def _tag(self, tokens):
        """
        @param tokens: A list of tokens
        @return: A list of tuples (word,L{InfoTag})
        """    
        tokens_tagged = self.__tagger.tag(tokens)
        tokens_info = []
        for tg in tokens_tagged:
            tokens_info.append((tg[0], self._fine_tag_to_cposfeats(tg[1]) ))
        return tokens_info    
