#!/usr/bin/python
# -*- coding: utf-8 -*-

import sys
import nltk
import pickle

from collections import defaultdict
from miopia.preprocessor.PreProcessor import PreProcessor
from miopia.preparator.LexicalProcessor import LexicalProcessor
from miopia.preparator.TextPreparator import TextPreparator
from miopia.parser.Parser import Parser
from miopia.analyzer.Dictionary import Dictionary
from miopia.analyzer.SentimentAnalyzer import SentimentAnalyzer
from miopia.analyzer.AnalyzerConfiguration import AnalyzerConfiguration
from miopia.util.ConfigurationManager import ConfigurationManager
from miopia.classifier.SimpleClassifier import SimpleClassifier
from miopia.classifier.TernaryStrategy import TernaryStrategy
from nltk.tokenize.punkt import PunktWordTokenizer
from miopia.classifier.PolarityType import PolarityType
import os

#PATH_DUMMY_TRAINING_SET = "/usr/local/lib/python2.7/dist-packages/miopia-0.1.0-py2.7.egg/miopia/dataresources/dummy_training_set/"
PATH_DUMMY_TRAINING_SET = os.path.dirname(os.path.abspath(__file__))+os.sep+"dummy_parsed_set/P"
preprocessor = PreProcessor()
preparator = TextPreparator()
sentence_tokenizer = nltk.data.load('tokenizers/punkt/spanish.pickle')
tokenizer = PunktWordTokenizer()
tagger = pickle.load(open(ConfigurationManager().getParameter("path_pickle_taggers")+"spanish_brill.pickle",'r'))
parser = Parser()
dictionary = Dictionary()
lexical_processor = LexicalProcessor(sentence_tokenizer,
                                     tokenizer,
                                     tagger)

sentiment_analyzer =  SentimentAnalyzer(parser,
                       dictionary,
                       AnalyzerConfiguration(final_sentences_weight=1.),
                       preprocessor,
                       lexical_processor)


# We evaluate the test set (in ConLL 2006 format)
list_polarities = [polarity_type for polarity_type in os.listdir(PATH_DUMMY_TRAINING_SET)]
#To classify a list of files
files =[PATH_DUMMY_TRAINING_SET+os.sep+name_file for name_file in os.listdir(PATH_DUMMY_TRAINING_SET)]
for f in files:
    sentiment_info = sentiment_analyzer.analyze_from_conll(f)[1]
    print "The semantic orientation of the file named "+"\""+f+"\" is "+str(sentiment_info.get_so())


#To classify a text from a string
running_example = "Esta máquina es muy barata, pero pésima".decode('utf-8')
lists_graphs,sentiment_info = sentiment_analyzer.analyze(running_example)
for graph in lists_graphs:
    print "The raw representation:",graph.tree().pprint()
    print "The JSON representation:",graph.dg_to_json(0,dictionary)
 
print "-----------------------------------------------"
print "The semantic orientation of the sentence "+"\""+running_example+"\" is "+str(sentiment_info.get_so())
