'''
Created on 10/04/2013

@author: David Vilares
'''

import os
import codecs
from miopia.adapter.Feature import FeatureTypeConfiguration
from miopia.analyzer.counter.Counter import Counter
from miopia.util.ConfigurationManager import ConfigurationManager
from miopia.adapter.Feature import FeatureType
from collections import defaultdict

class FeatureInfo(object):
    """
    This class provides information about a feature for a supervised classifier
    """
    
    def __init__(self,feature,ranking,information_gain,feature_type):
        """
        @param feature: The idenfier of the feature. A string
        @param ranking: The ranking of the feature with respect to the others. A float
        @param information_gain: A float indicating the information gain provided by the feature
        @param feature_type: A constant of L{FeatureType} which represents the type of the feature
        """
        self._feature = feature
        self._ranking = ranking
        self._information_gain = information_gain
        self._feature_type = type
        
    def get_feature(self):
        """
        @return: A string with the feature
        """
        return self._feature
    
    def get_ranking(self):
        """
        @return: A float with the ranking of the feature
        """
        return self._ranking
    
    def get_feature_type(self):
        """
        @return: An constant of L{FeatureType} which represents the type of the feature
        """
        return self._feature_type
    
    def get_information_gain(self):
        """
        @return: A float
        """
        return self.get_information_gain()



class Adapter(object):
    #TODO: Any adapter can read any adapter ranking file , is this good?
    '''
    Adapter is an abstract class which defines the interface for different features Adapter's
    provided in the package adapter
    '''

    BINARY_WEIGHTING_FACTOR = "BW"
    TOTAL_WEIGHTING_FACTOR = "TW" 

    DELIMITER_FEATURE_TYPE_AND_LIST_FEATURES = ":"
    ALLOWED_JAVA_HEAP_XMX = ConfigurationManager().getParameter("allowed_java_heap")


    def __init__(self,path_weka, counter,
                 weight_factor = BINARY_WEIGHTING_FACTOR):
        '''
        Constructor
        @param path_weka: The path to the WEKA.jar
        @param abstracted_lexicons_counter: An instance of L{Counter}
        @param weight_factor: A value {BINARY_WEIGHTING_FACTOR, TOTAL_WEIGHTING_FACTOR}
        '''
        self._counter = counter
        if path_weka is None: self._path_weka =''
        else: self._path_weka = path_weka

        self._weighting_factor = weight_factor

        #
        
        
    def get_counter(self):
        return self._counter

    def get_weighting_factor(self):
        return self._weighting_factor
    
#    def _get_feature_type(self):
#        """
#        The type of the feature
#        """
#        return FeatureType.GENERIC
    

    def count_features(self,list_text_info):
        """
        @param list_text_info: A list of L{TextInfo}
        @param A dictionary with the features detected 
        """
        #dict_features = self._count_features(list_text_info)
        dict_features = self._counter.count(list_text_info)
        dict_long_title_features = {}
        
        str_feature_type_configuration = str(self._counter.get_feature_type_configuration())
        dict_long_title_features.update({(str(self._get_feature_type())
                                        ,str_feature_type_configuration
                                        ,feature):dict_features.get(feature) 
                                        for feature in dict_features.keys()})
        
        return dict_long_title_features
    
    
    
    def classes_from_arff(self,arff_file):
        """
        @param arff_file: The path to an arff file
        @return The classes considered in hat arff
        """

        lines_aux = codecs.open(arff_file,"r").read().split(
                        '@DATA')[0].split('\n')
        line_classes = lines_aux[len(lines_aux)-2]
        return line_classes.split()[2].replace('{','').replace('}','').split(',')
   
        


    def _long_id_feature_to_string(self,expanded_id):
        #TODO: Change Counter()
        """
        @param expanded_id: An identifier to know the location of a features inside a file and a L{SentimentDependencyGraph}
        @todo: Change this in the future
        """
        return expanded_id[0]+expanded_id[1]+self._counter.name_from_id(expanded_id[2])


    def to_arff(self,list_text_info,arff_file, arff_header=None, is_test_set = False):
        """
        @param list_text_info: A list of L{TextInfo}
        @param arff_file: A string. The path to the destination of the data represented in the ARFF format.
        @param arff_header: A string containing an arff header, which will indicate the features
        that will be taken into account. None if no header specified.
        @param is_test_set: A boolean. True if it's a test set. False otherwise.
        """
        farff = codecs.open(arff_file,"w")
        #dict_features, dict_hash_dg_file_id, list_file_category = self._proccess(list_text_info, is_test_set)
        dict_features = self._proccess(list_text_info, is_test_set)
#        print "Entra",{}, len(list_text_info)
#        #It's necessary to calculate the header
#        for key in dict_features.keys():
#            print key, dict_features[key]
        if arff_header is None:
            classes = set([text_info.get_category() for text_info in list_text_info])
            #print classes
            #classes = os.listdir(list_text_info)
            arff_header=self._arff_header(dict_features,classes)
        farff.write(arff_header)
        farff.flush()
        dict_feature_position = self._features_from_arff_header(arff_header)
        #Calculate arff_data
        arff_data, dict_position_instanceid = self._arff_data(dict_features,
                                                      dict_feature_position, list_text_info)#, list_file_category)
        farff.write(arff_data)
        farff.flush()
        farff.close()
        return dict_position_instanceid


    
    def arff_header_from_arff_file(self, arff_file):
        """
        @param arff_file: The path to an ARFF file
        @return A string. The ARFF header of the arff_file
        """
        farff = codecs.open(arff_file)
        arff_file_str = farff.read()
        ending_index = arff_file_str.find('@DATA')
        return arff_file_str[0:ending_index+6]



    def _proccess(self, list_text_info, is_test_set):
        dict_features = self.count_features(list_text_info)  
        return dict_features#,# dict_hash_textinfo_file_id#, list_category


    def _features_from_arff_header(self,arff_header):
        """
        @param arff_header: A string. an ARFF header
        @return A dictionary of the features considered in the ARFF header and their with their
        corresponding position in the file.
        """
        position = 0
        dict_features = {}
        lines_aux = arff_header.split('\n')  
        lines_attributes = lines_aux[1:len(lines_aux)-2]
        for l in lines_attributes:
            l_splitted =l.split()    
            feature = l_splitted[1][1:len(l_splitted[1])-1].replace("\\'","'")
            dict_features[feature] = position
          #  print l_splitted,feature, position
            position+=1
        return dict_features
        

    def _arff_header(self,dict_features, classes):
        """
        @para dict_features: The dictionary of features provided by the count method of L{Counter}.
        @param classes: The classes considered by the classifier.
        @return A string. The ARFF header.
        """
        list_features = []
        arff_header = "@RELATION Polarity\n"

        for feature in dict_features:
            str_feature = self._long_id_feature_to_string(feature)
            #print str_feature
            _, ftc, _ = self._get_feature_configuration(str_feature)
            if str_feature not in list_features:
                list_features.append(str_feature)
                arff_header+= "@ATTRIBUTE '"+str_feature.encode('utf-8').replace("'","\\'")+"' "+ftc.get_weka_data_type()+"\n"

        arff_header+="@ATTRIBUTE 'class' {"+','.join(classes)+"}\n"
        arff_header+="@DATA\n"         
        return arff_header

    def get_weighting_value(self,str_ftc,value):
        """
        @param str_ftc. A string. The representation of an instance of L{FeatureTypeConfiguration}
        @param value: An integer.
        @return A weighted value according to the weighting factor employed by the current adapter
        """
            
        if self._weighting_factor == self.BINARY_WEIGHTING_FACTOR:
            return 1
        if self._weighting_factor == self.TOTAL_WEIGHTING_FACTOR:
            return value

    def _arff_data(self, dict_features, dict_feature_position, list_text_info):
        """
        @para dict_features: The dictionary of features provided by the count method of L{Counter}.
        @param classes: The classes considered by the classifier.
        @return A string. The ARFF header.
        """
                
        WEKA_RESERVED_WORDS = ['class']
        dict_features_file = defaultdict(defaultdict)
        dict_instanceid_position = {}
        position = 0
        for feature in dict_features:
            feature_id = feature[2]
            feature_type = feature[0]
            str_feature = self._long_id_feature_to_string(feature)
            if str_feature in WEKA_RESERVED_WORDS: 
                continue 
            file_path = self._counter.file_id(feature_id)
            value = dict_features[feature]
         #   print value,  weight(value), self._weighting_factor
            try:
                dict_features_file[file_path][str_feature] = self.get_weighting_value(feature_type,
                                                                                      dict_features_file[file_path][str_feature]+ value)
                #dict_features_file[file_path][str_feature]+= value
            except KeyError:
                dict_features_file[file_path][str_feature] = self.get_weighting_value(feature_type,
                                                                                      value)
                #dict_features_file[file_path][str_feature]= value
                
        arff_data = ""
        
        for text_info in list_text_info:     
            open_symbol = "{"
            close_symbol = "}"
            
            textid = text_info.get_textid()
            keys  = dict_features_file[textid].keys()
            keys = list(set(keys).intersection(set(dict_feature_position.keys())))
            keys.sort(key = lambda x: dict_feature_position[x]) 
            for feature_in_text in keys:                            
                arff_data+=open_symbol+str(dict_feature_position[feature_in_text])+" "+str(dict_features_file[textid][feature_in_text])+","    
                open_symbol = ""
            arff_data+=open_symbol+str(dict_feature_position['class'])+" "+text_info.get_category()+close_symbol+"\n"
            dict_instanceid_position[position] = textid
            position+=1
 
        return arff_data, dict_instanceid_position


    def _get_feature_configuration(self,str_name_feature):
        """
        Given a long name of a feature it returns the L{FeatureType}
        and the L{FeatureTypeConfiguration}
        """
            
        aux = str_name_feature.rsplit(FeatureTypeConfiguration.DELIMITER_CONFIGURATION)
        back_off_head = None
        back_off_dependent = None
        n_gram = None
        feature_type_configuration = aux[1:(len(aux)-1)]
        kwargs = {}
        for configuration_element in feature_type_configuration:
                
            if configuration_element.startswith(FeatureTypeConfiguration.HEAD_BACK_OFF_DELIMITER+"="):
                back_off_head = configuration_element[len(FeatureTypeConfiguration.HEAD_BACK_OFF_DELIMITER+"="):]

            if configuration_element.startswith(FeatureTypeConfiguration.DEPENDENT_BACK_OFF_DELIMITER+"="):
                back_off_dependent = configuration_element[len(FeatureTypeConfiguration.DEPENDENT_BACK_OFF_DELIMITER+"="):]

            if configuration_element.startswith(FeatureTypeConfiguration.N_GRAM_DELIMITER+"="):
                n_gram = configuration_element[len(FeatureTypeConfiguration.N_GRAM_DELIMITER+"="):]
                kwargs['n_gram'] = n_gram
                
            if configuration_element.startswith(FeatureTypeConfiguration.N_GRAM_BACK_OFF_DELIMITER+"="):
                n_gram_back_off = configuration_element[len(FeatureTypeConfiguration.N_GRAM_BACK_OFF_DELIMITER+"="):]
                kwargs['n_gram_back_off'] = n_gram_back_off
                    
            if configuration_element.startswith(FeatureTypeConfiguration.ADD_DEPENDENCY_TYPE_DELIMITER+"="):
                add_dependency_type = configuration_element[len(FeatureTypeConfiguration.ADD_DEPENDENCY_TYPE_DELIMITER+"="):]
                kwargs['add_dependency_type'] = True if add_dependency_type == 'True' else False

            if configuration_element.startswith(FeatureTypeConfiguration.SEMANTIC_PROPERTY_DELIMITER+"="):
                semantic_property = configuration_element[len(FeatureTypeConfiguration.SEMANTIC_PROPERTY_DELIMITER+"="):]
                kwargs['semantic_property'] = semantic_property               
                
        feature_type_configuration = FeatureTypeConfiguration(back_off_head,
                                                              back_off_dependent,
                                                              **{str(k): v for k, v in kwargs.items()})
                   
        return aux[0],feature_type_configuration,aux[len(aux)-1]  