1 '''
2 Created on 04/02/2013
3
4 @author: David Vilares
5 '''
6
7 from miopia.classifier.ClassificationStrategy import ClassificationStrategy
8 import codecs
9 import time
10
11
12
17
26
36
38
39 CHI_SQUARED= "weka.attributeSelection.ChiSquaredAttributeEval"
40
43
46
47
57
58
63
65
66 RANKER = "weka.attributeSelection.Ranker"
67
68 - def __init__(self, threshold = 0., num_to_select = -1):
72
74 return self.RANKER+" -T "+str(self._threshold)+" -N "+str(self._num_to_select)
75
76
78
79 RERANKINGSEARCH = "weka.attributeSelection.RerankingSearch"
80
81 - def __init__(self, b=20, information_based_evaluator=0,
82 rerank_method=1, search_algorithm="weka.attributeSelection.GreedyStepwise"):
83 self._b = b
84 self._information_based_evaluator = information_based_evaluator
85 self._rerank_method = rerank_method
86 self._search_algorithm = search_algorithm
87
89 return (self.RERANKINGSEARCH+" -method "+str(self._information_based_evaluator)
90 +" -blockSize "+str(self._b)+" -rankingMeasure "+str(self._rerank_method)
91 +" -search "+self._search_algorithm)
92
93
94
96 SMO = "weka.classifiers.functions.SMO"
97 NAIVE_BAYES = "weka.classifiers.bayes.NaiveBayes"
98 J48 = "weka.classifiers.trees.J48"
99 META_CLASSIFIER = "weka.classifiers.meta.AttributeSelectedClassifier"
100 LIBLINEAR = "weka.classifiers.functions.LibLINEAR"
101
102
104 '''
105 classdocs
106 '''
107 UNKWOWN_CLASS_SYMBOL = '?'
108
110 '''
111 @param path_weka: A string. The path to the WEKA.jar (and additional jar's depending on the selected classifier
112 @param model: A string. A path to a trained model. None if no trained model provided.
113 '''
114 self._model = model
115 if path_weka is None: self._path_weka = ''
116 else: self._path_weka = path_weka
117
118
119 - def train(self,output_model, output_file, arff_training_file,
120 arff_development_file=None ):
121 """
122 @param output_model: A string. The path where will be stored the trained model.
123 @param output_file: A string. The path where will be printed the training results.
124 @param arff_training_file: A string. The path to the training ARFF file.
125 @param arff_development_file: A string. The path to the development ARFF file. None if there is no development file.
126 """
127
128 self._model = output_model
129 self._train_model(arff_training_file, arff_development_file,
130 output_file)
131
132
133
134 - def classify(self,arff_file, results_file, dict_position_instanceid):
135 """
136 @param arff_file: A string. The path to the ARFF file to be classified
137 @param results_file: A string. The path where will be printed the WEKA classifications.
138 @para dict_position_instanceid: A dictionary {position_in_arff: file_id}, which contains
139 relates the position of each instance in the ARFF DATA file which their corresponding textid.
140 """
141 classifications = []
142 lines= self._get_model_classifications(arff_file, results_file)
143 for line in lines:
144 classifications.append((dict_position_instanceid[lines.index(line)],
145 line.split()[2].split(":")[1], line.split()[3]))
146 return classifications
147