python 3.x - theano function not updating parameters during gradient optimization in feed forward neural net -
trying hands wet theano , deep nets starting simple implementation of 3 layer feed forward neural network , testing on mnist data set.
i using rudimentary implementation of stochastic gradient descent start out with, , network not training properly. parameters of network not being updated.
was wondering if could point out i'm doing wrong.
the following code lstm module. i've called because planned on implementing lstm networks in future.
import theano, theano.tensor t import numpy np collections import ordereddict np_rng = np.random.randomstate(1234) class feedforwardlayer(object): def __init__(self, input_size, hidden_size, activation): self.input_size = input_size self.hidden_size = hidden_size self.activation = activation self.create_layer() def create_layer(self): self.w = create_shared(self.hidden_size, self.input_size, "weight") self.b = create_shared(self.hidden_size, name="bias") def activate(self, x): if x.ndim > 1: return self.activation(t.dot(self.w, x.t) + self.b[:, none]).t else: return self.activation(t.dot(self.w, x) + self.b) @property def params(self): return [self.w, self.b] @params.setter def params(self, param_list): self.w.set_value(param_list[0]) self.b.set_value(param_list[1]) class network(object): def __init__(self, input_size, celltype=feedforwardlayer, layer_sizes=none): self.input_size = input_size self.celltype = celltype self.layer_sizes = layer_sizes self.create_layers() def create_layers(self): self.layers = [] input_size = self.input_size layer_size in self.layer_sizes: self.layers.append(self.celltype(input_size, layer_size, activation=t.nnet.sigmoid)) input_size = layer_size def forward(self, x): out = [] layer_input = x layer in self.layers: layer_out = layer.activate(layer_input) out.append(layer_out) layer_input = layer_out return out @property def params(self): return [param layer in self.layers param in layer.params] @params.setter def params(self, param_list): start = 0 layer in self.layers: end = start + len(layer.params) layer.params = param_list[start:end] start = end def create_shared(m, n=none, name=none): if n none: return theano.shared(np_rng.standard_normal((m, )), name=name) else: return theano.shared(np_rng.standard_normal((m, n)), name=name) def optimization_updates(cost, params, lr=.01): """ implements stochastic gradient descent inputs --------------- cost -- theano variable minimize params -- network weights take gradient respect lr -- learning rate """ lr = theano.shared(np.float64(lr).astype(theano.config.floatx)) gparams = t.grad(cost, params) updates = ordereddict() gparam, param in zip(gparams, params): updates[param] = param - lr * gparam return updates
the following code create, train, , test simple three-layer feed forward network on mnist data set.
from lstm import network import theano, theano.tensor t import numpy np import lstm l sklearn.datasets import load_digits sklearn.cross_validation import train_test_split sklearn.metrics import confusion_matrix, classification_report sklearn.preprocessing import labelbinarizer # load , normalize dataset digits = load_digits() x = digits.data y = digits.target x -= x.min() x /= x.max() # create network model = network(64, layer_sizes=[100, 10]) # prepare training , test data x_train, x_test, y_train, y_test = train_test_split(x, y) labels_train = labelbinarizer().fit_transform(y_train) labels_test = labelbinarizer().fit_transform(y_test) data = t.vector() result = model.forward(data)[-1] label = t.vector() cost = (result - label).norm(l=2) updates = l.optimization_updates(cost, model.params) update = theano.function([data, label], cost, updates=updates, allow_input_downcast=true) predict = theano.function([data], result, allow_input_downcast=true) x, y in zip(x_train, labels_train): c = update(x, y) predictions = [] x in x_test: prediction = predict(x) predictions.append(np.argmax(prediction)) print(confusion_matrix(y_test, predictions)) print(classification_report(y_test, predictions))
the problem i'm facing parameters not being updated properly. i'm not sure if that's because i'm not calculating gradient properly, or if i'm not using theano function correctly.
you have make more 1 pass on dataset when using stochastic gradient descent. not unusual classification error , confusion matrix not change during first epoch, if dataset small.
i made following change in code train 100 epochs
for in xrange(100): x, y in zip(x_train, labels_train): c = update(x, y)
the confusion matrix seems have started improving:
[[ 0 0 18 0 13 4 5 0 5 0] [ 0 42 0 2 0 0 0 0 2 0] [ 0 0 51 0 0 0 0 1 0 0] [ 0 0 0 45 0 1 0 1 2 0] [ 0 0 0 0 33 0 0 0 0 0] [ 0 0 0 0 0 47 0 0 0 0] [ 0 0 0 0 0 0 45 0 0 0] [ 0 0 0 0 1 0 0 48 0 0] [ 0 2 1 0 0 0 0 0 34 0] [ 0 1 0 25 0 3 0 2 16 0]]
Comments
Post a Comment