Package miopia :: Package classifier :: Module WekaClassificationStrategy
[hide private]
[frames] | no frames]

Source Code for Module miopia.classifier.WekaClassificationStrategy

  1  ''' 
  2  Created on 04/02/2013 
  3   
  4  @author: David Vilares 
  5  ''' 
  6   
  7  from miopia.classifier.ClassificationStrategy import ClassificationStrategy 
  8  import codecs 
  9  import time 
 10   
 11   
 12   
13 -class AttributeEvaluator(object):
14
15 - def __init__(self):
16 pass
17
18 -class CfsSubsetEvalAttributeEvaluator(AttributeEvaluator):
19 CFSSUBSETEVAL = "weka.attributeSelection.CfsSubsetEval" 20
21 - def __init__(self):
23
24 - def __str__(self):
25 return self.CFSSUBSETEVAL
26
27 -class InformationGainAttributeEvaluator(AttributeEvaluator):
28 INFORMATION_GAIN = "weka.attributeSelection.InfoGainAttributeEval" 29 30
31 - def __init__(self):
33
34 - def __str__(self):
35 return self.INFORMATION_GAIN
36
37 -class ChiSquaredAttributeEvaluator(AttributeEvaluator):
38 39 CHI_SQUARED= "weka.attributeSelection.ChiSquaredAttributeEval" 40
41 - def __init__(self):
43
44 - def __str__(self):
45 return self.CHI_SQUARED
46 47
48 -class PrincipalComponentsAttributeEvaluator(AttributeEvaluator):
49 PRINCIPAL_COMPONENTS = "weka.attributeSelection.PrincipalComponents" 50
51 - def __init__(self, variance_covered =0.95):
52 super(PrincipalComponentsAttributeEvaluator,self).__init__() 53 self._variance_covered = variance_covered
54
55 - def __str__(self):
56 return self.PRINCIPAL_COMPONENTS
57 58
59 -class SearchMethod(object):
60
61 - def __init__(self):
62 pass
63
64 -class RankerSearchMethod(SearchMethod):
65 66 RANKER = "weka.attributeSelection.Ranker" 67
68 - def __init__(self, threshold = 0., num_to_select = -1):
69 super(RankerSearchMethod,self).__init__() 70 self._threshold = threshold 71 self._num_to_select = num_to_select
72
73 - def __str__(self):
74 return self.RANKER+" -T "+str(self._threshold)+" -N "+str(self._num_to_select)
75 76
77 -class RerankingSearch(SearchMethod):
78 79 RERANKINGSEARCH = "weka.attributeSelection.RerankingSearch" 80
81 - def __init__(self, b=20, information_based_evaluator=0, 82 rerank_method=1, search_algorithm="weka.attributeSelection.GreedyStepwise"):
83 self._b = b 84 self._information_based_evaluator = information_based_evaluator 85 self._rerank_method = rerank_method 86 self._search_algorithm = search_algorithm
87
88 - def __str__(self):
89 return (self.RERANKINGSEARCH+" -method "+str(self._information_based_evaluator) 90 +" -blockSize "+str(self._b)+" -rankingMeasure "+str(self._rerank_method) 91 +" -search "+self._search_algorithm)
92 93 94
95 -class ClassifierWeka(object):
96 SMO = "weka.classifiers.functions.SMO" 97 NAIVE_BAYES = "weka.classifiers.bayes.NaiveBayes" 98 J48 = "weka.classifiers.trees.J48" 99 META_CLASSIFIER = "weka.classifiers.meta.AttributeSelectedClassifier" 100 LIBLINEAR = "weka.classifiers.functions.LibLINEAR"
101 102
103 -class WekaClassificationStrategy(ClassificationStrategy):
104 ''' 105 classdocs 106 ''' 107 UNKWOWN_CLASS_SYMBOL = '?' 108
109 - def __init__(self, path_weka, model):
110 ''' 111 @param path_weka: A string. The path to the WEKA.jar (and additional jar's depending on the selected classifier 112 @param model: A string. A path to a trained model. None if no trained model provided. 113 ''' 114 self._model = model 115 if path_weka is None: self._path_weka = '' 116 else: self._path_weka = path_weka
117 118
119 - def train(self,output_model, output_file, arff_training_file, 120 arff_development_file=None ):
121 """ 122 @param output_model: A string. The path where will be stored the trained model. 123 @param output_file: A string. The path where will be printed the training results. 124 @param arff_training_file: A string. The path to the training ARFF file. 125 @param arff_development_file: A string. The path to the development ARFF file. None if there is no development file. 126 """ 127 #self._to_arff(list_linguistic_info_category, dict_adapted_features, arff_training_file) 128 self._model = output_model 129 self._train_model(arff_training_file, arff_development_file, 130 output_file)
131 132 133
134 - def classify(self,arff_file, results_file, dict_position_instanceid):
135 """ 136 @param arff_file: A string. The path to the ARFF file to be classified 137 @param results_file: A string. The path where will be printed the WEKA classifications. 138 @para dict_position_instanceid: A dictionary {position_in_arff: file_id}, which contains 139 relates the position of each instance in the ARFF DATA file which their corresponding textid. 140 """ 141 classifications = [] 142 lines= self._get_model_classifications(arff_file, results_file) 143 for line in lines: 144 classifications.append((dict_position_instanceid[lines.index(line)], 145 line.split()[2].split(":")[1], line.split()[3])) 146 return classifications
147