Home | Trees | Indices | Help |
---|
|
1 ''' 2 Created on 29/10/2013 3 4 @author: David Vilares Calvo 5 ''' 6 import os 7 import re 8 from miopia.classifier.WekaClassificationStrategy import WekaClassificationStrategy 9 from miopia.classifier.WekaClassificationStrategy import ClassifierWeka 10 from miopia.util.ConfigurationManager import ConfigurationManager 11 from miopia.adapter.Feature import FeatureTypeConfiguration 12 from collections import OrderedDict 1315 ''' 16 A wrapper for the meta classifier AttributeSelectedClassifier provided by the WEKA framework 17 ''' 18 ALLOWED_JAVA_HEAP_XMX = ConfigurationManager().getParameter("allowed_java_heap") 1921720 - def __init__(self, evaluator=None, search_method=None, 21 classifier_weka=None, path_weka=None, 22 results_file="/tmp/outputMeta.txt", cl_params="-s 4 -no-cv ",model=None):23 ''' 24 @param evaluator: An instance of L{miope.classifier.WekaClassificationStrategy.AttributeEvaluator} 25 @param search_method: An instance of L{miope.classifier.WekaClassificationStrategy.SearchMethod} 26 @param classifier_weka: A element of L{ClassifierWekaPath} or an instance of L{miope.classifier.MetraStrategy} 27 @param path_weka: A path to the weka.jar. None if weka is in your classpath 28 @param cl_params: A string. It specifies the configuration of the WEKA classifier. See WEKA command line options for 29 a detailed explanation. 30 @param model: A path to a trained model (SOMETHING.model). If None, a model must 31 be trained using train() method. 32 ''' 33 34 self._evaluator = evaluator 35 self._search_method = search_method 36 self._classifier = classifier_weka 37 self._cl_params = cl_params 38 self._dict_information = {} 39 # if model is None: 40 # self._meta_information = None 41 # else: 42 # self._meta_information_file = model+"_meta_info" 43 # self.meta_information(self._meta_information_file) 44 super(MetaStrategy,self).__init__(path_weka,model)45 46 49 52 55 5658 if isinstance(self._classifier, MetaStrategy): 59 return ( 60 " -- -E "+"\""+str(self._classifier.get_evaluator())+"\"" 61 +" -S "+"\""+str(self._classifier.get_search_method())+"\"" 62 +" -W "+self._classifier.get_base_classifier_path()+" -- ") 63 else: 64 return ' '65 6668 if isinstance(self._classifier, MetaStrategy): 69 return ClassifierWeka.META_CLASSIFIER 70 else: 71 return self._classifier72 7376 77 # self._meta_information = self._model+"_meta_info" 78 79 80 if arff_development_file is not None: 81 dev_param = " -T "+arff_development_file 82 else: 83 dev_param = " " 84 85 86 #java -Xmx1500M -cp "/home/mij/wekafiles/packages/LibLINEAR/LibLINEAR.jar:/opt/weka-3-7-10/weka.jar:/home/mij/wekafiles/packages/LibLINEAR/lib/liblinear-1.92.jar" weka.classifiers.meta.AttributeSelectedClassifier -E "weka.attributeSelection.InfoGainAttributeEval" -S "weka.attributeSelection.Ranker -T 0 -N -1" -W weka.classifiers.functions.LibLINEAR -i -k -t /tmp/Training_1_W-LibLINEAR-train-TW-_-S_0_.arff -d /tmp/models/save/1_W-LibLINEAR-train-TW-_-S_0_.model -x 5 -- -S 0 87 88 exec_string= ("java -Xmx"+self.ALLOWED_JAVA_HEAP_XMX+" -cp \""+self._path_weka+"\" "+ClassifierWeka.META_CLASSIFIER 89 +" -E "+"\""+str(self._evaluator)+"\"" 90 +" -S "+"\""+str(self._search_method)+"\"" 91 +" -W "+self.get_base_classifier_path() 92 +" -i -k" 93 +" -t "+arff_training_file+dev_param+" -d "+self._model 94 +" "+self._cl_params 95 +self.get_base_classifier_configuration() 96 +" > "+ output_file) 97 98 print "EXECUTING",exec_string 99 100 os.system(exec_string)101 102104 105 os.system("java -Xmx"+self.ALLOWED_JAVA_HEAP_XMX+" -cp "+self._path_weka+" "+ClassifierWeka.META_CLASSIFIER 106 +" -classifications \"weka.classifiers.evaluation.output.prediction.PlainText " "-file "+results_file 107 +" -decimals 15 -suppress \" " 108 +" -l "+self._model+" -T "+arff_file) 109 110 f = open(results_file,"r") 111 lines = f.readlines() 112 f.close() 113 return lines[1:len(lines)-1]114 115 116 # def _get_features(self,lines_features): 117 # """ 118 # @param ranking_file: A path to an existing ranking file 119 # @precondition: ranking_file must be in the format provided by the WEKA selection attribute tools 120 # @return A dictionary of features given a ranking file provided by WEKA attribute selection tools 121 # """ 122 # 123 # dict_features = OrderedDict() 124 # 125 # lines_attributes = [l.split()[1] for l in lines_features] 126 # for key_atr in lines_attributes: 127 # #We only select attributes with a positive information gain 128 # feature_type, ftc, name = self._get_feature_configuration(key_atr) 129 # try: 130 # dict_features[feature_type, ftc].append(name) 131 # except KeyError: 132 # dict_features[feature_type, ftc] = [name] 133 # return dict_features 134 # 135 136 137 # def _get_feature_configuration(self,str_name_feature): 138 # """ 139 # Given a long name of a feature it returns the L{FeatureType} 140 # and the L{FeatureTypeConfiguration} 141 # """ 142 # 143 # aux = str_name_feature.rsplit(FeatureTypeConfiguration.DELIMITER_CONFIGURATION) 144 # back_off_head = None 145 # back_off_dependent = None 146 # n_gram = None 147 # feature_type_configuration = aux[1:(len(aux)-1)] 148 # kwargs = {} 149 # for configuration_element in feature_type_configuration: 150 # 151 # if configuration_element.startswith(FeatureTypeConfiguration.HEAD_BACK_OFF_DELIMITER+"="): 152 # back_off_head = configuration_element[len(FeatureTypeConfiguration.HEAD_BACK_OFF_DELIMITER+"="):] 153 # 154 # if configuration_element.startswith(FeatureTypeConfiguration.DEPENDENT_BACK_OFF_DELIMITER+"="): 155 # back_off_dependent = configuration_element[len(FeatureTypeConfiguration.DEPENDENT_BACK_OFF_DELIMITER+"="):] 156 # 157 # if configuration_element.startswith(FeatureTypeConfiguration.N_GRAM_DELIMITER+"="): 158 # n_gram = configuration_element[len(FeatureTypeConfiguration.N_GRAM_DELIMITER+"="):] 159 # kwargs['n_gram'] = n_gram 160 # 161 # if configuration_element.startswith(FeatureTypeConfiguration.N_GRAM_BACK_OFF_DELIMITER+"="): 162 # n_gram_back_off = configuration_element[len(FeatureTypeConfiguration.N_GRAM_BACK_OFF_DELIMITER+"="):] 163 # kwargs['n_gram_back_off'] = n_gram_back_off 164 # 165 # if configuration_element.startswith(FeatureTypeConfiguration.ADD_DEPENDENCY_TYPE_DELIMITER+"="): 166 # add_dependency_type = configuration_element[len(FeatureTypeConfiguration.ADD_DEPENDENCY_TYPE_DELIMITER+"="):] 167 # kwargs['add_dependency_type'] = True if add_dependency_type == 'True' else False 168 # 169 # if configuration_element.startswith(FeatureTypeConfiguration.SEMANTIC_PROPERTY_DELIMITER+"="): 170 # semantic_property = configuration_element[len(FeatureTypeConfiguration.SEMANTIC_PROPERTY_DELIMITER+"="):] 171 # kwargs['semantic_property'] = semantic_property 172 # 173 # feature_type_configuration = FeatureTypeConfiguration(back_off_head, 174 # back_off_dependent, 175 # **{str(k): v for k, v in kwargs.items()}) 176 # 177 # return aux[0],feature_type_configuration,aux[len(aux)-1] 178 179 180 # def _get_classes(self, match_header_classes): 181 # return match_header_classes.split()[2].replace('{','').replace('}','').split(',') 182 183185 """ 186 @param meta_information_file: A string. The path to output file provided by WEKA after training 187 the model. 188 @return A dictionary with meta information about the classifier. 189 """ 190 meta_information_string = open(meta_information_file).read() 191 print "Entra meta information" 192 match_number_attributes = re.findall('Selected attributes:.*',meta_information_string) 193 match_evaluator = re.findall('Attribute Evaluator.*\n.*\n',meta_information_string) 194 match_search_method = re.findall('Search Method:.*\n.*\n',meta_information_string) 195 match_categories = re.findall('@attribute class.*',meta_information_string) 196 match_threshold_discarding_attributes = re.findall('Threshold for discarding attributes.*',meta_information_string) 197 match_header_data = re.findall("Header of reduced data.*attribute class",meta_information_string,re.DOTALL) 198 match_header_data_splitted = match_header_data[0].split('\n') 199 match_header_data_features = match_header_data_splitted[3:len(match_header_data_splitted)-1] 200 201 match_header_classes = re.findall("@attribute class.*",meta_information_string)[0] 202 203 204 for meta_data in ['evaluator','search_method','search_method_threshold','number_attributes']: 205 self._dict_information[meta_data] = [] 206 207 for match in match_evaluator: 208 self._dict_information['evaluator'].append(match.split('\n')[1].replace('\t','')) 209 for match in match_search_method: 210 self._dict_information['search_method'].append(match.split('\n')[1].replace('\t','')) 211 for match in match_threshold_discarding_attributes: 212 self._dict_information['search_method_threshold'].append(float(match.split(":")[1])) 213 for match in match_number_attributes: 214 self._dict_information['number_attributes'].append(int(match.rsplit(':')[2])) 215 216 return self._dict_information
Home | Trees | Indices | Help |
---|
Generated by Epydoc 3.0.1 on Wed Oct 15 10:03:40 2014 | http://epydoc.sourceforge.net |