Package miopia :: Package preparator :: Module TextPreparator
[hide private]
[frames] | no frames]

Source Code for Module miopia.preparator.TextPreparator

 1  #-*- coding: utf-8 -*- 
 2  ''' 
 3  @author: David Vilares Calvo 
 4  ''' 
 5  import codecs 
 6   
7 -class TextPreparator(object):
8 """ 9 Tools for preparing a text to MaltParser in CoNLL 2006 format 10 """ 11
12 - def __init__(self):
13 """ 14 Constructor 15 """
16 17
18 - def prepare(self,output_path,tagged_sentences):
19 """ 20 @param output_path: A path to the output conll file unparsed 21 @param tagged_sentences: A list of lists of tuples (word,L{TokenDependencyInfo}). Each 22 list of tuples is a tagged sentence. 23 """ 24 outputFile = codecs.open(output_path,'w',encoding='utf-8') 25 for tagged_sentence in tagged_sentences: 26 outputFile.writelines(self._toConll2006(tagged_sentence)) 27 outputFile.write('\n')
28 29 30
31 - def _toConll2006(self, taggedTokens):
32 """ 33 @param taggedTokens: A list of tuples (word,L{TokenDependencyInfo}) of a sentence 34 @return: A String list in CoNLL 2006 format 35 """ 36 identifier=1 37 conllLines = [] 38 for token in taggedTokens: 39 conllLines.append( str(identifier)+'\t'+token[0]+'\t'+'_'+'\t'+token[1].get_cpostag()+'\t' 40 +token[1].get_postag()+'\t'+token[1].get_feats()+'\n' ) 41 identifier = identifier + 1 42 return conllLines
43