1
2
3
4
5
6 """
7 Maximum Entropy code.
8
9 Uses Improved Iterative Scaling:
10 XXX ref
11
12 # XXX need to define terminology
13
14 """
15
16 import numpy
17
19 """Holds information for a Maximum Entropy classifier.
20
21 Members:
22 classes List of the possible classes of data.
23 alphas List of the weights for each feature.
24 feature_fns List of the feature functions.
25
26 """
28 self.classes = []
29 self.alphas = []
30 self.feature_fns = []
31
33 """calculate(me, observation) -> list of log probs
34
35 Calculate the log of the probability for each class. me is a
36 MaxEntropy object that has been trained. observation is a vector
37 representing the observed data. The return value is a list of
38 unnormalized log probabilities for each class.
39
40 """
41 scores = []
42 for klass in me.classes:
43 lprob = 0.0
44 for fn, alpha in map(None, me.feature_fns, me.alphas):
45 lprob += fn(observation, klass) * alpha
46 scores.append(lprob)
47 return scores
48
61
63 """_eval_feature_fn(fn, xs, classes) -> dict of values
64
65 Evaluate a feature function on every instance of the training set
66 and class. fn is a callback function that takes two parameters: a
67 training instance and a class. Return a dictionary of (training
68 set index, class index) -> non-zero value. Values of 0 are not
69 stored in the dictionary.
70
71 """
72 values = {}
73 for i in range(len(xs)):
74 for j in range(len(classes)):
75 f = fn(xs[i], classes[j])
76 if f != 0:
77 values[(i, j)] = f
78 return values
79
81 """_calc_empirical_expects(xs, ys, classes, features) -> list of expectations
82
83 Calculate the expectation of each function from the data. This is
84 the constraint for the maximum entropy distribution. Return a
85 list of expectations, parallel to the list of features.
86
87 """
88
89
90 class2index = {}
91 for index, key in enumerate(classes):
92 class2index[key] = index
93 ys_i = [class2index[y] for y in ys]
94
95 expect = []
96 N = len(xs)
97 for feature in features:
98 s = 0
99 for i in range(N):
100 s += feature.get((i, ys_i[i]), 0)
101 expect.append(float(s) / N)
102 return expect
103
105 """_calc_model_expects(xs, classes, features, alphas) -> list of expectations.
106
107 Calculate the expectation of each feature from the model. This is
108 not used in maximum entropy training, but provides a good function
109 for debugging.
110
111 """
112
113
114 p_yx = _calc_p_class_given_x(xs, classes, features, alphas)
115
116 expects = []
117 for feature in features:
118 sum = 0.0
119 for (i, j), f in feature.items():
120 sum += p_yx[i][j] * f
121 expects.append(sum/len(xs))
122 return expects
123
125 """_calc_p_class_given_x(xs, classes, features, alphas) -> matrix
126
127 Calculate P(y|x), where y is the class and x is an instance from
128 the training set. Return a XSxCLASSES matrix of probabilities.
129
130 """
131 prob_yx = numpy.zeros((len(xs), len(classes)))
132
133
134 for feature, alpha in map(None, features, alphas):
135 for (x, y), f in feature.items():
136 prob_yx[x][y] += alpha * f
137
138 prob_yx = numpy.exp(prob_yx)
139
140 for i in range(len(xs)):
141 z = sum(prob_yx[i])
142 prob_yx[i] = prob_yx[i] / z
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157 return prob_yx
158
160 """_calc_f_sharp(N, nclasses, features) -> matrix of f sharp values."""
161
162 f_sharp = numpy.zeros((N, nclasses))
163 for feature in features:
164 for (i, j), f in feature.items():
165 f_sharp[i][j] += f
166 return f_sharp
167
168 -def _iis_solve_delta(N, feature, f_sharp, empirical, prob_yx,
169 max_newton_iterations, newton_converge):
170
171
172 delta = 0.0
173 iters = 0
174 while iters < max_newton_iterations:
175 f_newton = df_newton = 0.0
176 for (i, j), f in feature.items():
177 prod = prob_yx[i][j] * f * numpy.exp(delta * f_sharp[i][j])
178 f_newton += prod
179 df_newton += prod * f_sharp[i][j]
180 f_newton, df_newton = empirical - f_newton / N, -df_newton / N
181
182 ratio = f_newton / df_newton
183 delta -= ratio
184 if numpy.fabs(ratio) < newton_converge:
185 break
186 iters = iters + 1
187 else:
188 raise RuntimeError("Newton's method did not converge")
189 return delta
190
191 -def _train_iis(xs, classes, features, f_sharp, alphas, e_empirical,
192 max_newton_iterations, newton_converge):
193 """Do one iteration of hill climbing to find better alphas (PRIVATE)."""
194
195
196
197 p_yx = _calc_p_class_given_x(xs, classes, features, alphas)
198
199 N = len(xs)
200 newalphas = alphas[:]
201 for i in range(len(alphas)):
202 delta = _iis_solve_delta(N, features[i], f_sharp, e_empirical[i], p_yx,
203 max_newton_iterations, newton_converge)
204 newalphas[i] += delta
205 return newalphas
206
207
208 -def train(training_set, results, feature_fns, update_fn=None,
209 max_iis_iterations=10000, iis_converge=1.0e-5,
210 max_newton_iterations=100, newton_converge=1.0e-10):
211 """Train a maximum entropy classifier, returns MaxEntropy object.
212
213 Train a maximum entropy classifier on a training set.
214 training_set is a list of observations. results is a list of the
215 class assignments for each observation. feature_fns is a list of
216 the features. These are callback functions that take an
217 observation and class and return a 1 or 0. update_fn is a
218 callback function that is called at each training iteration. It is
219 passed a MaxEntropy object that encapsulates the current state of
220 the training.
221
222 The maximum number of iterations and the convergence criterion for IIS
223 are given by max_iis_iterations and iis_converge, respectively, while
224 max_newton_iterations and newton_converge are the maximum number
225 of iterations and the convergence criterion for Newton's method.
226 """
227 if not training_set:
228 raise ValueError("No data in the training set.")
229 if len(training_set) != len(results):
230 raise ValueError("training_set and results should be parallel lists.")
231
232
233 xs, ys = training_set, results
234
235
236 classes = list(set(results))
237 classes.sort()
238
239
240 features = [_eval_feature_fn(fn, training_set, classes)
241 for fn in feature_fns]
242
243 f_sharp = _calc_f_sharp(len(training_set), len(classes), features)
244
245
246 e_empirical = _calc_empirical_expects(xs, ys, classes, features)
247
248
249 alphas = [0.0] * len(features)
250 iters = 0
251 while iters < max_iis_iterations:
252 nalphas = _train_iis(xs, classes, features, f_sharp,
253 alphas, e_empirical,
254 max_newton_iterations, newton_converge)
255 diff = map(lambda x, y: numpy.fabs(x-y), alphas, nalphas)
256 diff = reduce(lambda x, y: x+y, diff, 0)
257 alphas = nalphas
258
259 me = MaxEntropy()
260 me.alphas, me.classes, me.feature_fns = alphas, classes, feature_fns
261 if update_fn is not None:
262 update_fn(me)
263
264 if diff < iis_converge:
265 break
266 else:
267 raise RuntimeError("IIS did not converge")
268
269 return me
270
271
272 if __name__ == "__main__":
273
274
275
276 xcar=[
277 ['Red', 'Sports', 'Domestic'],
278 ['Red', 'Sports', 'Domestic'],
279 ['Red', 'Sports', 'Domestic'],
280 ['Yellow', 'Sports', 'Domestic'],
281 ['Yellow', 'Sports', 'Imported'],
282 ['Yellow', 'SUV', 'Imported'],
283 ['Yellow', 'SUV', 'Imported'],
284 ['Yellow', 'SUV', 'Domestic'],
285 ['Red', 'SUV', 'Imported'],
286 ['Red', 'Sports', 'Imported']
287 ]
288
289 ycar=[
290 'Yes',
291 'No',
292 'Yes',
293 'No',
294 'Yes',
295 'No',
296 'Yes',
297 'No',
298 'No',
299 'Yes'
300 ]
301
302
304 if ts[0] =='Red':
305 return 0
306 else:
307 return 1
308
310 if ts[1] =='Sports':
311 return 0
312 else:
313 return 1
314
316 if ts[2] =='Domestic':
317 return 0
318 else:
319 return 1
320
321 user_functions=[udf1, udf2, udf3]
322
323 xe=train(xcar, ycar, user_functions)
324 for xv,yv in zip(xcar, ycar):
325 xc=classify(xe, xv)
326 print 'Pred:', xv, 'gives', xc, 'y is', yv
327