'''
Created on 29/10/2013

@author: David Vilares Calvo
'''
import os
import re
from miopia.classifier.WekaClassificationStrategy import WekaClassificationStrategy
from miopia.classifier.WekaClassificationStrategy import ClassifierWeka
from miopia.util.ConfigurationManager import ConfigurationManager
from miopia.adapter.Feature import FeatureTypeConfiguration
from collections import OrderedDict

class MetaStrategy(WekaClassificationStrategy):
    '''
    A wrapper for the meta classifier AttributeSelectedClassifier provided by the WEKA framework
    '''
    ALLOWED_JAVA_HEAP_XMX = ConfigurationManager().getParameter("allowed_java_heap")
    
    def __init__(self, evaluator=None, search_method=None, 
                 classifier_weka=None, path_weka=None,
                 results_file="/tmp/outputMeta.txt", cl_params="-s 4 -no-cv ",model=None):
        '''
        @param evaluator: An instance of L{miope.classifier.WekaClassificationStrategy.AttributeEvaluator}
        @param search_method: An instance of L{miope.classifier.WekaClassificationStrategy.SearchMethod}
        @param classifier_weka: A element of L{ClassifierWekaPath} or an instance of L{miope.classifier.MetraStrategy}
        @param path_weka: A path to the weka.jar. None if weka is in your classpath
        @param cl_params: A string. It specifies the configuration of the WEKA classifier. See WEKA command line options for
        a detailed explanation.
        @param model: A path to a trained model (SOMETHING.model). If None, a model must
        be trained using train() method.
        '''
        
        self._evaluator = evaluator
        self._search_method = search_method
        self._classifier = classifier_weka
        self._cl_params = cl_params
        self._dict_information = {}
#        if model is None: 
#            self._meta_information = None
#        else: 
#            self._meta_information_file = model+"_meta_info"
#            self.meta_information(self._meta_information_file)
        super(MetaStrategy,self).__init__(path_weka,model)    


    def get_dict_information(self):
        return self._dict_information
    
    def get_evaluator(self):
        return self._evaluator
    
    def get_search_method(self):
        return self._search_method


    def get_base_classifier_configuration(self):
        if isinstance(self._classifier, MetaStrategy):
            return (
                    " -- -E "+"\""+str(self._classifier.get_evaluator())+"\""
                    +" -S "+"\""+str(self._classifier.get_search_method())+"\""
                    +" -W "+self._classifier.get_base_classifier_path()+" -- ")
        else:
            return ' '
        
        
    def get_base_classifier_path(self):
        if isinstance(self._classifier, MetaStrategy):
            return ClassifierWeka.META_CLASSIFIER
        else:
            return self._classifier 


    def _train_model(self,arff_training_file, arff_development_file,
                     output_file):
        
#        self._meta_information = self._model+"_meta_info"


        if arff_development_file is not None:
            dev_param = " -T "+arff_development_file
        else:
            dev_param = " "


        #java -Xmx1500M -cp "/home/mij/wekafiles/packages/LibLINEAR/LibLINEAR.jar:/opt/weka-3-7-10/weka.jar:/home/mij/wekafiles/packages/LibLINEAR/lib/liblinear-1.92.jar" weka.classifiers.meta.AttributeSelectedClassifier -E "weka.attributeSelection.InfoGainAttributeEval" -S "weka.attributeSelection.Ranker -T 0 -N -1" -W weka.classifiers.functions.LibLINEAR -i -k -t /tmp/Training_1_W-LibLINEAR-train-TW-_-S_0_.arff  -d /tmp/models/save/1_W-LibLINEAR-train-TW-_-S_0_.model  -x 5 -- -S 0

        exec_string= ("java -Xmx"+self.ALLOWED_JAVA_HEAP_XMX+" -cp \""+self._path_weka+"\" "+ClassifierWeka.META_CLASSIFIER
        +" -E "+"\""+str(self._evaluator)+"\""
        +" -S "+"\""+str(self._search_method)+"\""
        +" -W "+self.get_base_classifier_path()
        +" -i -k"
        +" -t "+arff_training_file+dev_param+" -d "+self._model
        +" "+self._cl_params
        +self.get_base_classifier_configuration()
        +" > "+ output_file) 
        
        print "EXECUTING",exec_string
        
        os.system(exec_string) 


    def _get_model_classifications(self, arff_file, results_file):

        os.system("java -Xmx"+self.ALLOWED_JAVA_HEAP_XMX+" -cp "+self._path_weka+" "+ClassifierWeka.META_CLASSIFIER
        +" -classifications \"weka.classifiers.evaluation.output.prediction.PlainText " "-file "+results_file
        +" -decimals 15 -suppress \" "
        +" -l "+self._model+" -T "+arff_file)
        
        f = open(results_file,"r")
        lines = f.readlines()
        f.close()
        return lines[1:len(lines)-1]


#    def _get_features(self,lines_features):   
#        """
#        @param ranking_file:  A path to an existing ranking file
#        @precondition: ranking_file must be in the format provided by the WEKA selection attribute tools
#        @return A dictionary of features given a ranking file provided by WEKA attribute selection tools
#        """
#
#        dict_features = OrderedDict()
#
#        lines_attributes = [l.split()[1] for l in lines_features]
#        for key_atr in lines_attributes: 
#            #We only select attributes with a positive information gain        
#            feature_type, ftc, name = self._get_feature_configuration(key_atr) 
#            try: 
#                dict_features[feature_type, ftc].append(name) 
#            except KeyError:
#                dict_features[feature_type, ftc] = [name]   
#        return dict_features
#    


#    def _get_feature_configuration(self,str_name_feature):
#        """
#        Given a long name of a feature it returns the L{FeatureType}
#        and the L{FeatureTypeConfiguration}
#        """
#            
#        aux = str_name_feature.rsplit(FeatureTypeConfiguration.DELIMITER_CONFIGURATION)
#        back_off_head = None
#        back_off_dependent = None
#        n_gram = None
#        feature_type_configuration = aux[1:(len(aux)-1)]
#        kwargs = {}
#        for configuration_element in feature_type_configuration:
#                
#            if configuration_element.startswith(FeatureTypeConfiguration.HEAD_BACK_OFF_DELIMITER+"="):
#                back_off_head = configuration_element[len(FeatureTypeConfiguration.HEAD_BACK_OFF_DELIMITER+"="):]
#
#            if configuration_element.startswith(FeatureTypeConfiguration.DEPENDENT_BACK_OFF_DELIMITER+"="):
#                back_off_dependent = configuration_element[len(FeatureTypeConfiguration.DEPENDENT_BACK_OFF_DELIMITER+"="):]
#
#            if configuration_element.startswith(FeatureTypeConfiguration.N_GRAM_DELIMITER+"="):
#                n_gram = configuration_element[len(FeatureTypeConfiguration.N_GRAM_DELIMITER+"="):]
#                kwargs['n_gram'] = n_gram
#                
#            if configuration_element.startswith(FeatureTypeConfiguration.N_GRAM_BACK_OFF_DELIMITER+"="):
#                n_gram_back_off = configuration_element[len(FeatureTypeConfiguration.N_GRAM_BACK_OFF_DELIMITER+"="):]
#                kwargs['n_gram_back_off'] = n_gram_back_off
#                    
#            if configuration_element.startswith(FeatureTypeConfiguration.ADD_DEPENDENCY_TYPE_DELIMITER+"="):
#                add_dependency_type = configuration_element[len(FeatureTypeConfiguration.ADD_DEPENDENCY_TYPE_DELIMITER+"="):]
#                kwargs['add_dependency_type'] = True if add_dependency_type == 'True' else False
#
#            if configuration_element.startswith(FeatureTypeConfiguration.SEMANTIC_PROPERTY_DELIMITER+"="):
#                semantic_property = configuration_element[len(FeatureTypeConfiguration.SEMANTIC_PROPERTY_DELIMITER+"="):]
#                kwargs['semantic_property'] = semantic_property               
#                
#        feature_type_configuration = FeatureTypeConfiguration(back_off_head,
#                                                              back_off_dependent,
#                                                              **{str(k): v for k, v in kwargs.items()})
#                   
#        return aux[0],feature_type_configuration,aux[len(aux)-1]


#    def _get_classes(self, match_header_classes):
#        return match_header_classes.split()[2].replace('{','').replace('}','').split(',')
        
    
    def meta_information(self,meta_information_file):
        """
        @param meta_information_file: A string. The path to output file provided by WEKA after training
        the model. 
        @return A dictionary with meta information about the classifier.
        """
        meta_information_string = open(meta_information_file).read()
        print "Entra meta information"
        match_number_attributes = re.findall('Selected attributes:.*',meta_information_string)
        match_evaluator = re.findall('Attribute Evaluator.*\n.*\n',meta_information_string)
        match_search_method = re.findall('Search Method:.*\n.*\n',meta_information_string)
        match_categories = re.findall('@attribute class.*',meta_information_string)
        match_threshold_discarding_attributes = re.findall('Threshold for discarding attributes.*',meta_information_string)
        match_header_data = re.findall("Header of reduced data.*attribute class",meta_information_string,re.DOTALL)
        match_header_data_splitted = match_header_data[0].split('\n')
        match_header_data_features = match_header_data_splitted[3:len(match_header_data_splitted)-1]
        
        match_header_classes = re.findall("@attribute class.*",meta_information_string)[0]
        

        for meta_data in ['evaluator','search_method','search_method_threshold','number_attributes']:
            self._dict_information[meta_data] = []
        
        for match in match_evaluator:
            self._dict_information['evaluator'].append(match.split('\n')[1].replace('\t',''))
        for match in match_search_method:
            self._dict_information['search_method'].append(match.split('\n')[1].replace('\t',''))
        for match in match_threshold_discarding_attributes:
            self._dict_information['search_method_threshold'].append(float(match.split(":")[1]))
        for match in match_number_attributes:
            self._dict_information['number_attributes'].append(int(match.rsplit(':')[2]))
        
        return self._dict_information



    
