'''
Created on 04/02/2013

@author: David Vilares
'''

from miopia.classifier.ClassificationStrategy import ClassificationStrategy
import codecs
import time



class AttributeEvaluator(object):
    
    def __init__(self):
        pass    
   
class CfsSubsetEvalAttributeEvaluator(AttributeEvaluator):
    CFSSUBSETEVAL = "weka.attributeSelection.CfsSubsetEval"

    def __init__(self):
        super(CfsSubsetEvalAttributeEvaluator,self).__init__()
    
    def __str__(self):
        return self.CFSSUBSETEVAL  
        
class InformationGainAttributeEvaluator(AttributeEvaluator):
    INFORMATION_GAIN = "weka.attributeSelection.InfoGainAttributeEval"

    
    def __init__(self):
        super(InformationGainAttributeEvaluator,self).__init__()
    
    def __str__(self):
        return self.INFORMATION_GAIN
    
class ChiSquaredAttributeEvaluator(AttributeEvaluator):
    
    CHI_SQUARED= "weka.attributeSelection.ChiSquaredAttributeEval"

    def __init__(self):
        super(ChiSquaredAttributeEvaluator,self).__init__()
    
    def __str__(self):
        return self.CHI_SQUARED 
    
    
class PrincipalComponentsAttributeEvaluator(AttributeEvaluator):
    PRINCIPAL_COMPONENTS = "weka.attributeSelection.PrincipalComponents"
        
    def __init__(self, variance_covered =0.95):
        super(PrincipalComponentsAttributeEvaluator,self).__init__()
        self._variance_covered = variance_covered
    
    def __str__(self):
        return self.PRINCIPAL_COMPONENTS
        
        
class SearchMethod(object):

    def __init__(self):
        pass

class RankerSearchMethod(SearchMethod):
    
    RANKER = "weka.attributeSelection.Ranker"

    def __init__(self, threshold = 0., num_to_select = -1):
        super(RankerSearchMethod,self).__init__()
        self._threshold = threshold
        self._num_to_select = num_to_select
         
    def __str__(self):
        return self.RANKER+" -T "+str(self._threshold)+" -N "+str(self._num_to_select)


class RerankingSearch(SearchMethod):
    
    RERANKINGSEARCH = "weka.attributeSelection.RerankingSearch"
    
    def __init__(self, b=20, information_based_evaluator=0,
                 rerank_method=1, search_algorithm="weka.attributeSelection.GreedyStepwise"):
        self._b = b
        self._information_based_evaluator = information_based_evaluator
        self._rerank_method = rerank_method
        self._search_algorithm = search_algorithm
        
    def __str__(self):
        return (self.RERANKINGSEARCH+" -method "+str(self._information_based_evaluator)
                +" -blockSize "+str(self._b)+" -rankingMeasure "+str(self._rerank_method)
                +" -search "+self._search_algorithm)



class ClassifierWeka(object):
    SMO = "weka.classifiers.functions.SMO"
    NAIVE_BAYES = "weka.classifiers.bayes.NaiveBayes"
    J48 = "weka.classifiers.trees.J48"
    META_CLASSIFIER = "weka.classifiers.meta.AttributeSelectedClassifier"
    LIBLINEAR = "weka.classifiers.functions.LibLINEAR"


class WekaClassificationStrategy(ClassificationStrategy):
    '''
    classdocs
    '''
    UNKWOWN_CLASS_SYMBOL = '?'
            
    def __init__(self, path_weka, model):
        '''
        @param path_weka: A string. The path to the WEKA.jar (and additional jar's depending on the selected classifier
        @param model: A string. A path to a trained model. None if no trained model provided.
        '''
        self._model = model
        if path_weka is None: self._path_weka = ''
        else: self._path_weka = path_weka


    def train(self,output_model, output_file, arff_training_file,
              arff_development_file=None ):
        """
        @param output_model: A string. The path where will be stored the trained model.
        @param output_file: A string. The path where will be printed the training results.
        @param arff_training_file: A string. The path to the training ARFF file.
        @param arff_development_file: A string. The path to the development ARFF file. None if there is no development file. 
        """
        #self._to_arff(list_linguistic_info_category, dict_adapted_features, arff_training_file)
        self._model = output_model
        self._train_model(arff_training_file, arff_development_file,
                          output_file)



    def classify(self,arff_file, results_file, dict_position_instanceid):
        """
        @param arff_file: A string. The path to the ARFF file to be classified
        @param results_file: A string. The path where will be printed the WEKA classifications.
        @para dict_position_instanceid: A dictionary {position_in_arff: file_id}, which contains
        relates the position of each instance in the ARFF DATA file which their corresponding textid.
        """
        classifications = []
        lines= self._get_model_classifications(arff_file, results_file)
        for line in lines:
            classifications.append((dict_position_instanceid[lines.index(line)],
                                   line.split()[2].split(":")[1], line.split()[3]))
        return classifications

