# -*- coding: utf-8 -*-
'''
Created on 01/10/2014

@author: david
'''

#!/usr/bin/python
# -*- coding: utf-8 -*-

import nltk
import pickle

from miopia.preprocessor.PreProcessor import PreProcessor
from miopia.preparator.LexicalProcessor import LexicalProcessor
from miopia.preparator.TextPreparator import TextPreparator
from miopia.parser.Parser import Parser
from miopia.analyzer.Dictionary import Dictionary
from miopia.util.ConfigurationManager import ConfigurationManager
from miopia.classifier.SimpleClassifier import SimpleClassifier
from miopia.adapter.CompositeAdapter import CompositeAdapter
from miopia.adapter.NGramAdapter import NGramAdapter
from miopia.analyzer.counter.NGramCounter import NGramCounter
from miopia.analyzer.counter.CompositeAuxiliaryCounter import CompositeAuxiliaryCounter
from miopia.analyzer.BackOff import BackOff
from miopia.classifier.WekaClassificationStrategy import ClassifierWeka
from miopia.classifier.MetaStrategy import MetaStrategy
from miopia.classifier.WekaClassificationStrategy import RankerSearchMethod
from miopia.classifier.WekaClassificationStrategy import InformationGainAttributeEvaluator
from miopia.adapter.Feature import FeatureTypeConfiguration
from miopia.adapter.Feature import FeatureLevelBackOff
from miopia.adapter.Adapter import Adapter
from miopia.util.TextInfo import TextInfo
from nltk.tokenize.punkt import PunktWordTokenizer
from miopia.tagger.PerceptronTwitTagger import PerceptronTwitTagger
from miopia.analyzer.PsychometricDictionary import PsychometricDictionary

import os
import codecs


def miopia_file_to_text_info(path_miopia_file,
                             base_path_parsed_files):
    
    p = Parser()
    list_text_info =[]
    lines = codecs.open(path_miopia_file).readlines()
    base_parsed_dir = lines[0].strip(base_path_parsed_files+'\t').strip('\n')
    base_metadata_dir = lines[1].strip(base_path_parsed_files+'\t').strip('\n')
#    print base_parsed_dir
#    print base_metadata_dir
    text_corpus = lines[3:]
    for line_text in text_corpus:
        ls = line_text.strip('\n').split('\t')
        print ls, len(ls)
        if len(ls) != 5:
            print line_text
            print "UNEXPECTED LENGTH OF LINE"
        if base_parsed_dir !="":
            #print base_parsed_dir
            t = TextInfo(ls[0], ls[2],[(dg,0) for dg in p.parse_from_conll(base_parsed_dir+ls[3])],base_metadata_dir+ls[4],
                     ls[1])
        else:
            t = TextInfo(ls[0], ls[2],[],base_metadata_dir+ls[4],
                     ls[1])
        #print t.get_text(), t.get_dependency_graphs()
        list_text_info.append(t)
    return list_text_info
          
############################################################################
#Creating the configuration to run the demo
cm = ConfigurationManager(lang='en')
PATH_WEKA = ":".join([cm.getParameter('path_weka'),cm.getParameter('path_weka_liblinear'),cm.getParameter('path_weka_liblinear2')])
PATH_RANKING_FILE_FEATURES = "/tmp/ranking_SENTIMENT.PSYCHOMETRIC"
PATH_TRAINING_ARFF = "/tmp/training.arff"
PATH_TEST_ARFF = "/tmp/test.arff"
PATH_OUTPUT_RESULTS = "/tmp/output.arff" 
PATH_WEKA_MODEL = '/tmp/classifier_model'
BASE_PATH_PARSED_FILES = ""
BASE_PATH_PARSED_FILES_TITLE = "BASE_PATH_PARSED_FILES "
head = BASE_PATH_PARSED_FILES_TITLE+BASE_PATH_PARSED_FILES
content_training = head
content_training+= """
BASE_PATH_METADATA_FILES    
DATA
file1\tP\tI like you\t_\t_
file2\tP\tThe camera of that mobile is amazing\t_\t_
file3\tN\tThree people died in an accident\t_\t_
"""
content_test = head

content_test += """
BASE_PATH_METADATA_FILES    
DATA
file1\t?\tI like you\t_\t_
file2\t?\tThe camera of that mobile is amazing\t_\t_
file3\t?\tThree people died in an accident\t_\t_
"""

PATH_TRAINING_MIOPIA_FILE = "/tmp/training_set.miopia"
PATH_TEST_MIOPIA_FILE = "/tmp/test_set.miopia"
PATH_WEKA_RESULTS = "/tmp/test.results"
PATH_QREL_RESULTS = "/tmp/test.qrel"

############################################################################
#Create the NLP resources needed to process the texts
cw_dict = cm.readTsvDict(cm.getParameter('path_composedwords'))
ab_dict = cm.readTsvDict(cm.getParameter('path_abbreviations'))
preprocessor = PreProcessor( composite_words=cw_dict, abbreviations=ab_dict, lang="en")
preparator = TextPreparator()
sentence_tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')
tokenizer = PunktWordTokenizer()
tagger = PerceptronTwitTagger()
parser = Parser()

# A default instance of Dictionary() will create resources for Spanish language. Instead of that we need to manually create the resources
p_dict = PsychometricDictionary()
p_dict.readFromFile(cm.getParameter("path_lwic_dictionary"),encoding="utf-8")

dictionary = Dictionary(lemmas=cm.readLemmaDict(cm.getParameter("path_lemmas"),encoding="utf-8"),p_dict=p_dict)
lexical_processor = LexicalProcessor(sentence_tokenizer,
                                     tokenizer,
                                     tagger)

############################################################################
#Create the feature counters and their adapters
psychometric_counter = NGramCounter(FeatureTypeConfiguration(n_gram_back_off= '-'.join([FeatureLevelBackOff.TYPE_BACK_OFF_PSYCHOMETRIC]), 
                                                             n_gram=1),
                             preprocessor, lexical_processor,BackOff(dictionary), set([])) 

psychometric_adapter = NGramAdapter(PATH_WEKA, psychometric_counter, Adapter.BINARY_WEIGHTING_FACTOR)

word_counter = NGramCounter(FeatureTypeConfiguration(n_gram_back_off= '-'.join([FeatureLevelBackOff.TYPE_BACK_OFF_WORD]), 
                                                      n_gram=1),
                             preprocessor, lexical_processor,BackOff(dictionary), set([])) 

word_adapter = NGramAdapter(PATH_WEKA, word_counter, Adapter.BINARY_WEIGHTING_FACTOR)

postag_counter = NGramCounter(FeatureTypeConfiguration(n_gram_back_off= '-'.join([FeatureLevelBackOff.TYPE_BACK_OFF_FINE_TAG]), 
                                                      n_gram=1),
                             preprocessor, lexical_processor,BackOff(dictionary), set([])) 

postag_adapter = NGramAdapter(PATH_WEKA, postag_counter, Adapter.BINARY_WEIGHTING_FACTOR)


composite_counter = CompositeAuxiliaryCounter(FeatureTypeConfiguration())
composite_adapter = CompositeAdapter(PATH_WEKA, composite_counter)

composite_adapter.add(psychometric_adapter)
composite_adapter.add(word_adapter)
composite_adapter.add(postag_adapter)
############################################################################
#Creating the training and the test file (miopia format needed)
content_file = codecs.open(PATH_TRAINING_MIOPIA_FILE,"w")
content_test_file = codecs.open(PATH_TEST_MIOPIA_FILE,"w")

content_file.write(content_training)
composite_adapter.to_arff(miopia_file_to_text_info(PATH_TRAINING_MIOPIA_FILE, 
                                                   BASE_PATH_PARSED_FILES_TITLE),PATH_TRAINING_ARFF)
content_file.close()
content_test_file.write(content_test)
arff_header = composite_adapter.arff_header_from_arff_file(PATH_TRAINING_ARFF)

############################################################################
#Create the strategy and the classifier to train and evaluate it
evaluator = InformationGainAttributeEvaluator()
search_method = RankerSearchMethod(0)
strategy = MetaStrategy(evaluator, search_method, ClassifierWeka.NAIVE_BAYES, PATH_WEKA)

#path_dir_to_save_model = path_dir_to_save_model if path_dir_to_save_model != None else "/tmp/"
strategy.train(PATH_WEKA_MODEL,
                PATH_OUTPUT_RESULTS,
                PATH_TRAINING_ARFF)

d_position_id = composite_adapter.to_arff(miopia_file_to_text_info(PATH_TEST_MIOPIA_FILE,
                                                                   BASE_PATH_PARSED_FILES_TITLE),
                                                      PATH_TEST_ARFF, arff_header, True)
            
classifier = SimpleClassifier(strategy)
l = classifier.classify(PATH_TEST_ARFF,PATH_WEKA_RESULTS, d_position_id)
print "Classifications for the test set (FILEID,CLASS,CONFIDENCE)"
print l
classifier.to_key_value_format(l, PATH_QREL_RESULTS)