#-*- coding: utf-8 -*-
'''
@author: David Vilares Calvo
'''

import os
import codecs
import tempfile
from miopia.util.ConfigurationManager import ConfigurationManager
from miopia.parser.SentimentDependencyGraph import SentimentDependencyGraph
from miopia.preparator.TextPreparator import TextPreparator
from miopia.parser.TokenDependencyInfo import TokenDependencyInfo

class Parser(object):
    
    __lang = 'es' 
    """
    Tools for interacting with MaltParser 1.7* and obtaining L{SentimentDependencyGraph}
    """
    def __init__(self, lang = 'es'):
        """
        Constructor
        """
        self._preparator = TextPreparator()
        self.__lang = lang
        

    def parse_from_conll(self,file_path):
        """
        @param file_path: A path to a CoNLL 2006 file
        @return: A list of L{SentimentDependencyGraph} which represent the parsed file.
        """    
        #TODO: Better simplify in parse_to_file, not here
        sentences = self._simplify(file_path)    
        return [SentimentDependencyGraph(s) for s in sentences]
 
    
    
    def parse_dir_to_file(self,dir_path, list_of_tagged_sentences, 
                          input_path="/tmp/parse_dir_to_file_unparsed.conll",
                          output_path="/tmp/parse_dir_to_file_parsed.conll"):
        """
        It parses a whole directory of plain texts into a single file
        @param dir_path: The directory where the files and the sentences stored
        in list_of_tagged_sentences will be written in CoNLL-2006 format.
        @param aux_path: The path to the file where all plain sentences will be written
        in CoNLL 2006 format before parsing them
        @param list_of_tagged_sentences: A list of (text_id,[[(token,L{INfoTag})]].
        """
        list_id_and_number_of_sentences = [] #ID: number of sentences of the file (int) 
        dir_tagged_sentences =[]
        for text_id, tagged_sentences in list_of_tagged_sentences:
            #print text_id, len(tagged_sentences)
            
            if tagged_sentences == [[]]:
                list_id_and_number_of_sentences.append((text_id,0))
            else:
                list_id_and_number_of_sentences.append((text_id,len(tagged_sentences))) 
            dir_tagged_sentences.extend(tagged_sentences)
        self.parse_to_file(output_path, dir_tagged_sentences,
                           input_path)
        sentences = open(output_path).read().split('\n\n')
        i=0
        for text_id,number_sentences in list_id_and_number_of_sentences:
            
            if number_sentences == 0:
                open(dir_path+os.sep+text_id,"w").write('')
            else:                
                open(dir_path+os.sep+text_id,"w").write('\n\n'.join(sentences[i:i+number_sentences])+'\n\n')
            i+=number_sentences
    
    
    
    def parse_to_file(self,output_path,tagged_sentences,aux_path=None):
        """
        @param output_path: The destination file.
        @param tagged_sentences: [tagged_sentence] where tagged_sentences is a [(token,L{INfoTag})]. 
        Use L{LexicalProcessor} to obtain them.
        @param aux_path: The path to an auxiliary file to parse the sentences.
        """
        if aux_path==None:
            aux_path = tempfile.NamedTemporaryFile(delete=False).name
        self._preparator.prepare(aux_path,tagged_sentences)
        self.parse_tagged_file(aux_path, output_path)
        os.unlink(aux_path)


    def parse_tagged_file(self, tagged_file_name, output_file_name):
        original_dir = os.getcwd()
        c = ConfigurationManager(lang=self.__lang)
        os.chdir(c.getParameter("path_maltparser_model"))
        os.system("java -jar "+c.getParameter("path_maltparser")+" -c "+c.getParameter("maltparser_model")+
                  " -i "+tagged_file_name+" -o "+output_file_name+" -m parse")
        os.chdir(original_dir)


    def parse_dir(self, list_of_tagged_sentences):
        list_id_and_number_of_sentences = [] #ID: number of sentences of the file (int) 
        list_id_and_dependency_graphs = []
        dir_tagged_sentences =[]
        for text_id, tagged_sentences in list_of_tagged_sentences:
            list_id_and_number_of_sentences.append((text_id,len(tagged_sentences))) 
            dir_tagged_sentences.extend(tagged_sentences)
        
        graphs = self.parse(dir_tagged_sentences)
        i=0
        for text_id,number_sentences in list_id_and_number_of_sentences:
            list_id_and_dependency_graphs.append((text_id, graphs[i:i+number_sentences]))
            i+=number_sentences
        return list_id_and_dependency_graphs
    
    def parse(self,tagged_sentences,temp_input=None,temp_output=None):
        """
        @param tagged_sentences: [tagged_sentence] where tagged_sentences is a [(token,L{INfoTag})]. 
        Use L{LexicalProcessor} to obtain them.
        @param input: Temporal file to save the unparsed text.
        @param output: Temporal file to save the parsed text.
        @return: A [L{SentimentDependencyGraph}]
        """
        if temp_input == None:
            temp_input = tempfile.NamedTemporaryFile(delete=False).name

        if temp_output == None:
            temp_output = tempfile.NamedTemporaryFile(delete=False).name

        self._preparator.prepare(temp_input,tagged_sentences)
        self.parse_tagged_file(temp_input, temp_output)
        sentences = self._simplify(temp_output)
        
        os.unlink(temp_input)
        os.unlink(temp_output)
        return [SentimentDependencyGraph(s) for s in sentences]
    
    

    def _simplify(self,parsed_file):    
        """
        Simplifies a CoNLL 2006 file. The output is used to build instances of L{SentimentDependencyGraph}
        @param parsed_file: A path to a CoNLL 2006 file
        @return A list of dictionaries. Each dictionary saves a sentence of the file. ID is the key
        and the string FORM\tPOSTAG\tHEAD\tDEPREL is the value
        """
        co = codecs.open(parsed_file,encoding="utf-8")
        lines = co.readlines()
        sentence = {}
        sentences = []
        
        for l in lines:
            if len(l) > 1:
                columns = l.split('\t')
 #               print columns,
                t = TokenDependencyInfo(columns[1],columns[4],int(columns[6]),columns[7])
                sentence[int(columns[0])] = t
 #               print sentence
            else:
                sentences.append(sentence)
                sentence = {}
        co.close()
        if sentence != {}:
            sentences.append(sentence)
#        print sentences
        return   self._format(self._reorganize(sentences))
        


    def _right_brothers(self,sentence,identifier):
        """
        @param sentence: An adversative sentence 
        @param identifier: ID of adversative clause
        @return: A list of right brothers id's of the adversative clause  
        """
        brothers = []
        father = sentence[identifier].get_head()
        
        for key in sentence.keys():
            if sentence[key].get_head() == father and key >= identifier:
                brothers.append(key)
        return brothers

  
  
    def _reorganize(self,sentences): 
        """
        Reorganizes the output_parsed CoNLL 2006 file to simplify the subordinating sentences
        @param sentences: A list of dictionaries. Each dictionaries is a sentence in CoNLL 2006
        representation. ID is the key and and the string FORM\tPOSTAG\tHEAD\tDEPREL is the value.
        """  
        for sentence in sentences:
            for key in sentence.keys():
                
                if self._is_symbolic_url(sentence[key]):
                    sentence = self._reorganize_symbolic_url(sentence, key)
                if self._is_emoticon(sentence[key]):
                    sentence = self._reorganize_emoticon(sentence, key)      
                if self._is_reorganizable_adversative(sentence[key]):
                    sentence = self._reorganize_adversative(sentence,key) 
        return sentences
    
    
    def _is_symbolic_url(self,token):
        """
        @param token: A L{TokenDependencyInfo} instance
        @return True is token form equals to 'SymbolicURL', False otherwise
        """
        return token.get_form() == 'SymbolicURL'
    
    
    def _reorganize_symbolic_url(self,sentence,key):
        """
        @precondition: The L{TokenDependencyInfo} sentence[key] must be a symbolic url
        @param sentence: A dictionary of L{TokenDependencyInfo}. Represents a sentence
        in CoNLL-2006. ID column is the key.
        @param key: ID of the symbolic url token
        @return A modified dictionary with modified information to the symbolic url token
        """
        sentence[key].set_deprel("art_rel_symbolicurl")
        sentence[key].set_finetag("symbolicurl:")
        return sentence
        
    
    
    def _is_emoticon(self,token):
        """
        @param token A L{TokenDependencyInfo} instance
        @return: True if token form is in set (['Emoticon-Negative','Emoticon-Positive',
                      'Extremely-Emoticon-Positive',
                      'Extremely-Emoticon-Negative',
                      'Neutral']), False otherwise
        """
        set_emoticons = set(['Emoticon-Negative','Emoticon-Positive',
                      'Extremely-Emoticon-Positive',
                      'Extremely-Emoticon-Negative',
                      'Neutral'])
        
        return token.get_form() in set_emoticons
    
    
    
    def _reorganize_emoticon(self,sentence,key):
        """
        @precondition: The L{TokenDependencyInfo} sentence[key] must be an emoticon
        @param sentence: A dictionary of L{TokenDependencyInfo}. Represents a sentence
        in CoNLL-2006. ID column is the key.
        @param key: ID of the emoticon token
        @return A modified dictionary with modified information to the symbolic emoticon token
        """
        sentence[key].set_deprel("art_rel_emoticon")
        sentence[key].set_finetag("emoticon:")
        return sentence



    def _is_reorganizable_adversative(self,token):
        """
        @param token: A L{TokenDependencyInfo} instance
        @return: True if token can be reorganized ('pero','sino','mientras','mientras_que','sino_que'), False otherwise
        """
        
        ladversatives = ['pero','sino','mientras','mientras_que','sino_que']
             
        return (token.get_finetag() == "c:postype_coordinating"
                and (token.get_form() in ladversatives)  
                and token.get_head() != 0 and token.get_deprel() == "coord")
     


    def _reorganize_adversative(self,sentence,key):
        """
        @precondition: Adversative clause must be reorganizable
        @param sentence: A dictionary of a sentence in CoNLL 2006. ID is the key and and the string FORM\tPOSTAG\tHEAD\tDEPREL is the value.
        @param key: ID of an adversative clause
        @return:
        """    
        head = sentence[key].get_head()
        artificial_id = len(sentence)+1
        form = sentence[key].get_form()
        artificial_node = TokenDependencyInfo("[]","art_adversative:"+self._type_of_adversative(form)+"@"+str(key),
                          sentence[head].get_head(),"art_rel_adversative")
        sentence[artificial_id] = artificial_node
        sentence[head].set_head(artificial_id)                
        right_brothers = self._right_brothers(sentence,key)
        for brother in right_brothers:
            sentence[brother].set_head(artificial_id)
        return sentence
 
    
    def _type_of_adversative(self,form):
        """
        @precondition: form must be in {'pero','sino','mientras','mientras_que','sino_que'}
        @param form: An adversative clause 
        @return: 'restrict' if the clause is restrictive, 'exclude' otherwise
        """
        if form in ['pero','mientras','mientras_que']:
            return 'restrict'
        else:
            return 'exclude'
            

    def _format(self,sentences):
        """
        Prepares a text to get a DependencyGraph instance
        """
        data_string = ""
        formatted_sentences = []
        
        for sentence in sentences:
            for key in sentence.keys():
                    token = sentence[key]
                    data_string = data_string+token.get_form()+'\t'+token.get_finetag() \
                    +'\t'+str(token.get_head())+'\t'+token.get_deprel()+'\n'
            formatted_sentences.append(data_string)        
            data_string = ""
        return formatted_sentences

class MaltParser(Parser):
     
    """
    MaltParser Wrapper
    """
    def __init__(self, parser_bin, model_dir, model_name):
        """
        Constructor
        """
        self._preparator = TextPreparator()
        self._parser_bin = parser_bin
        self._model_dir = model_dir
        self._model_name = model_name
        

    def parse_tagged_file(self, tagged_file_name, output_file_name):
        original_dir = os.getcwd()
        os.chdir(self._model_dir)
        command = "java -jar %s -c %s -i %s -o %s -m parse"%(
                             self._parser_bin, self._model_name, 
                             tagged_file_name, output_file_name)
        os.system(command)
        os.chdir(original_dir)


class ZparParser(Parser):
    
    """
    Zpar wrapper
    """
    def __init__(self, parser_bin, model_path):
        """
        Constructor
        """
        self._parser_bin = parser_bin
        self._model_path = model_path
        self._preparator = TextPreparator()

    def parse_tagged_file(self, tagged_file_name, output_file_name):
        os.system("%s -c %s %s %s"%(self._parser_bin,tagged_file_name, output_file_name, self._model_path))
