#-*- coding: utf-8 -*-
'''
@author: David Vilares Calvo
'''
import codecs

class TextPreparator(object): 
    """
    Tools for preparing a text to MaltParser in CoNLL 2006 format
    """   

    def __init__(self):
        """
        Constructor
        """
          
            
    def prepare(self,output_path,tagged_sentences):
        """
        @param output_path: A path to the output conll file unparsed
        @param tagged_sentences:  A list of lists of tuples (word,L{TokenDependencyInfo}). Each 
        list of tuples is a tagged sentence.
        """      
        outputFile = codecs.open(output_path,'w',encoding='utf-8')
        for tagged_sentence in tagged_sentences:               
            outputFile.writelines(self._toConll2006(tagged_sentence))
            outputFile.write('\n')               



    def _toConll2006(self, taggedTokens):
        """
        @param taggedTokens: A list of tuples (word,L{TokenDependencyInfo}) of a sentence
        @return: A String list in CoNLL 2006 format
        """ 
        identifier=1
        conllLines = []
        for token in taggedTokens:
            conllLines.append( str(identifier)+'\t'+token[0]+'\t'+'_'+'\t'+token[1].get_cpostag()+'\t'
                               +token[1].get_postag()+'\t'+token[1].get_feats()+'\n' )
            identifier = identifier + 1
        return conllLines
        
    