gitforhf · August 29, 2015 14:12 · Mar 17, 2012
diff --git a/mlp.py b/mlp.py
@@ -0,0 +1,193 @@
+import numpy as np
+import warnings
+
+from itertools import cycle, izip
+
+from sklearn.utils import gen_even_slices
+from sklearn.utils import shuffle
+from sklearn.base import BaseEstimator
+from sklearn.base import ClassifierMixin
+from sklearn.preprocessing import LabelBinarizer
+
+def _softmax(x):
+    np.exp(x, x)
+    x /= np.sum(x, axis=1)[:, np.newaxis]
+
+def _tanh(x):
+    np.tanh(x, x)
+
+def _dtanh(x):
+    """Derivative of tanh as a function of tanh."""
+    x *= -x
+    x += 1
+
+class BaseMLP(BaseEstimator):
+    """Base class for estimators base on multi layer
+    perceptrons."""
+
+    def __init__(self, n_hidden, lr, l2decay, loss, output_layer, batch_size, verbose=0):
+        self.n_hidden = n_hidden
+        self.lr = lr
+        self.l2decay = l2decay
+        self.loss = loss
+        self.batch_size = batch_size
+        self.verbose = verbose
+
+        # check compatibility of loss and output layer:
+        if output_layer=='softmax' and loss!='cross_entropy':
+            raise ValueError('Softmax output is only supported '+
+                'with cross entropy loss function.')
+        if output_layer!='softmax' and loss=='cross_entropy':
+            raise ValueError('Cross-entropy loss is only ' +
+                    'supported with softmax output layer.')
+
+        # set output layer and loss function
+        if output_layer=='linear':
+            self.output_func = id
+        elif output_layer=='softmax':
+            self.output_func = _softmax
+        elif output_layer=='tanh':
+            self.output_func = _tanh
+        else:
+            raise ValueError("'output_layer' must be one of "+
+                    "'linear', 'softmax' or 'tanh'.")
+
+        if not loss in ['cross_entropy', 'square', 'crammer_singer']:
+            raise ValueError("'loss' must be one of " +
+                    "'cross_entropy', 'square' or 'crammer_singer'.")
+            self.loss = loss
+
+    def fit(self, X, y, max_epochs, shuffle_data, verbose=0):
+        # get all sizes
+        n_samples, n_features = X.shape
+        if y.shape[0] != n_samples:
+            raise ValueError("Shapes of X and y don't fit.")
+        self.n_outs = y.shape[1]
+        #n_batches = int(np.ceil(float(n_samples) / self.batch_size))
+        n_batches = n_samples / self.batch_size
+        if n_samples % self.batch_size != 0:
+            warnings.warn("Discarding some samples: \
+                sample size not divisible by chunk size.")
+        n_iterations = int(max_epochs * n_batches)
+
+        if shuffle_data:
+            X, y = shuffle(X, y)
+
+        # generate batch slices
+        batch_slices = list(gen_even_slices(n_batches * self.batch_size, n_batches))
+
+        # generate weights.
+        # TODO: smart initialization
+        self.weights1_ = np.random.uniform(size=(n_features, self.n_hidden))/np.sqrt(n_features)
+        self.bias1_ = np.zeros(self.n_hidden)
+        self.weights2_ = np.random.uniform(size=(self.n_hidden, self.n_outs))/np.sqrt(self.n_hidden)
+        self.bias2_ = np.zeros(self.n_outs)
+
+        # preallocate memory
+        x_hidden = np.empty((self.batch_size, self.n_hidden))
+        delta_h = np.empty((self.batch_size, self.n_hidden))
+        x_output = np.empty((self.batch_size, self.n_outs))
+        delta_o = np.empty((self.batch_size, self.n_outs))
+
+        # main loop
+        for i, batch_slice in izip(xrange(n_iterations), cycle(batch_slices)):
+            self._forward(i, X, batch_slice, x_hidden, x_output)
+            self._backward(i, X, y, batch_slice, x_hidden, x_output, delta_o, delta_h)
+        return self
+
+    def predict(self, X):
+        n_samples = X.shape[0]
+        x_hidden = np.empty((n_samples, self.n_hidden))
+        x_output = np.empty((n_samples, self.n_outs))
+        self._forward(None, X, slice(0, n_samples), x_hidden, x_output)
+        return x_output
+
+    def _forward(self, i, X, batch_slice, x_hidden, x_output):
+        """Do a forward pass through the network"""
+        x_hidden[:] = np.dot(X[batch_slice], self.weights1_)
+        x_hidden += self.bias1_
+        np.tanh(x_hidden, x_hidden)
+        x_output[:] = np.dot(x_hidden, self.weights2_)
+        x_output += self.bias2_
+
+        # apply output nonlinearity (if any)
+        self.output_func(x_output)
+
+    def _backward(self, i, X, y, batch_slice, x_hidden, x_output, delta_o, delta_h):
+        """Do a backward pass through the network and update the weights"""
+
+        # calculate derivative of output layer
+        if self.loss in ['cross_entropy'] or (self.loss == 'square' and self.output_func == id):
+            delta_o[:] = y[batch_slice] - x_output
+        elif self.loss == 'crammer_singer':
+            raise ValueError("Not implemented yet.")
+            delta_o[:] = 0
+            delta_o[y[batch_slice], np.ogrid[len(batch_slice)]] -= 1
+            delta_o[np.argmax(x_output - np.ones((1))[y[batch_slice], np.ogrid[len(batch_slice)]], axis=1), np.ogrid[len(batch_slice)]] += 1
+
+        elif self.loss == 'square' and self.output_func == _tanh:
+            delta_o[:] = (y[batch_slice] - x_output) * _dtanh(x_output)
+        else:
+            raise ValueError("Unknown combination of output function and error.")
+
+        if self.verbose > 0:
+            print(np.linalg.norm(delta_o / self.batch_size))
+        delta_h[:] = np.dot(delta_o, self.weights2_.T)
+
+        # update weights
+        self.weights2_ += self.lr / self.batch_size * np.dot(x_hidden.T, delta_o)
+        self.bias2_ += self.lr * np.mean(delta_o, axis=0)
+        self.weights1_ += self.lr / self.batch_size * np.dot(X[batch_slice].T, delta_h)
+        self.bias1_ += self.lr * np.mean(delta_h, axis=0)
+
+
+class MLPClassifier(BaseMLP, ClassifierMixin):
+    """ Multilayer Perceptron Classifier.
+
+    Uses a neural network with one hidden layer.
+
+
+    Parameters
+    ----------
+
+
+    Attributes
+    ----------
+
+    Notes
+    -----
+
+
+    References
+    ----------"""
+    def __init__(self, n_hidden=200, lr=0.1, l2decay=0, loss='cross_entropy',
+            output_layer='softmax', batch_size=100, verbose=0):
+        super(MLPClassifier, self).__init__(n_hidden, lr, l2decay, loss,
+                output_layer, batch_size, verbose)
+
+    def fit(self, X, y, max_epochs=10, shuffle_data=False):
+        self.lb = LabelBinarizer()
+        one_hot_labels = self.lb.fit_transform(y)
+        super(MLPClassifier, self).fit(
+                X, one_hot_labels, max_epochs,
+                shuffle_data)
+        return self
+
+    def predict(self, X):
+        prediction = super(MLPClassifier, self).predict(X)
+        return self.lb.inverse_transform(prediction)
+
+
+def test_classification():
+    from sklearn.datasets import load_digits
+    digits = load_digits()
+    X, y = digits.data, digits.target
+    mlp = MLPClassifier()
+    mlp.fit(X, y)
+    training_score = mlp.score(X, y)
+    print("training accuracy: %f" % training_score)
+    assert(training_score > .95)
+
+
+if __name__ == "__main__":
+    test_classification()
No results found