From f6613b72f5283332745641b3f97dd3f65d9b2328 Mon Sep 17 00:00:00 2001 From: "Alex J. Champandard" Date: Sat, 14 Nov 2015 14:36:33 +0100 Subject: [PATCH 01/17] Working version of a feed forward network loaded from pre-trained network with PyLearn2. --- .gitignore | 1 - sknn/backend/lasagne/__init__.py | 9 + sknn/backend/lasagne/mlp.py | 283 +++++++++++++++++++++++++++++++ 3 files changed, 292 insertions(+), 1 deletion(-) create mode 100644 sknn/backend/lasagne/__init__.py create mode 100644 sknn/backend/lasagne/mlp.py diff --git a/.gitignore b/.gitignore index aa521bd..0a08d74 100644 --- a/.gitignore +++ b/.gitignore @@ -25,7 +25,6 @@ var/ *.egg # Machine Learning -Lasagne/ nolearn/ scikit-learn/ diff --git a/sknn/backend/lasagne/__init__.py b/sknn/backend/lasagne/__init__.py new file mode 100644 index 0000000..e3beaf1 --- /dev/null +++ b/sknn/backend/lasagne/__init__.py @@ -0,0 +1,9 @@ +# -*- coding: utf-8 -*- +from __future__ import (absolute_import, unicode_literals, print_function) + +from ... import backend +from .mlp import MultiLayerPerceptronBackend + +# Register this implementation as the MLP backend. +backend.MultiLayerPerceptronBackend = MultiLayerPerceptronBackend +backend.name = 'lasagne' \ No newline at end of file diff --git a/sknn/backend/lasagne/mlp.py b/sknn/backend/lasagne/mlp.py new file mode 100644 index 0000000..5cf810c --- /dev/null +++ b/sknn/backend/lasagne/mlp.py @@ -0,0 +1,283 @@ +# -*- coding: utf-8 -*- +from __future__ import (absolute_import, unicode_literals, print_function) + +__all__ = ['Regressor', 'Classifier', 'Layer', 'Convolution'] + +import os +import sys +import math +import time +import logging +import itertools + +log = logging.getLogger('sknn') + + +import numpy +import theano +import sklearn.base +import sklearn.pipeline +import sklearn.preprocessing +import sklearn.cross_validation + +import theano.tensor as T +import lasagne.layers +import lasagne.nonlinearities as nl + +from ..base import BaseBackend +from ...nn import Layer, Convolution, ansi + + +class MultiLayerPerceptronBackend(BaseBackend): + """ + Abstract base class for wrapping the multi-layer perceptron functionality + from Lasagne. + """ + + def __init__(self, spec): + super(MultiLayerPerceptronBackend, self).__init__(spec) + self.mlp = None + self.ds = None + self.vs = None + self.f = None + self.trainer = None + self.cost = None + + def _create_mlp_trainer(self, params): + # Aggregate all the dropout parameters into shared dictionaries. + dropout_probs, dropout_scales = {}, {} + for l in [l for l in self.layers if l.dropout is not None]: + incl = 1.0 - l.dropout + dropout_probs[l.name] = incl + dropout_scales[l.name] = 1.0 / incl + assert len(dropout_probs) == 0 or self.regularize in ('dropout', None) + + if self.regularize == 'dropout' or len(dropout_probs) > 0: + # Use the globally specified dropout rate when there are no layer-specific ones. + incl = 1.0 - (self.dropout_rate or 0.5) + default_prob, default_scale = incl, 1.0 / incl + + if self.regularize is None: + self.regularize = 'dropout' + + log.warning('Dropout not yet fully implemented.') + + """ + # Pass all the parameters to pylearn2 as a custom cost function. + self.cost = dropout.Dropout( + default_input_include_prob=default_prob, + default_input_scale=default_scale, + input_include_probs=dropout_probs, input_scales=dropout_scales) + """ + + # Aggregate all regularization parameters into common dictionaries. + layer_decay = {} + if self.regularize in ('L1', 'L2') or any(l.weight_decay for l in self.layers): + wd = self.weight_decay or 0.0001 + for l in self.layers: + layer_decay[l.name] = l.weight_decay or wd + assert len(layer_decay) == 0 or self.regularize in ('L1', 'L2', None) + + if len(layer_decay) > 0: + mlp_default_cost = self.mlp.get_default_cost() + if self.regularize == 'L1': + raise NotImplementedError + """ + l1 = mlp_cost.L1WeightDecay(layer_decay) + self.cost = cost.SumOfCosts([mlp_default_cost,l1]) + """ + else: # Default is 'L2'. + raise NotImplementedError + """ + if self.regularize is None: + self.regularize = 'L2' + + l2 = mlp_cost.WeightDecay(layer_decay) + self.cost = cost.SumOfCosts([mlp_default_cost,l2]) + """ + + return self._create_trainer(params, self.cost) + + def _create_trainer(self, params, cost): + if self.learning_rule != 'sgd': + raise NotImplementedError( + "Learning rule type `%s` is not supported." % self.learning_rule) + + return lasagne.updates.sgd(cost, params, + learning_rate=self.learning_rate) + + def _create_convolution_layer(self, name, layer, network): + self._check_layer(layer, + required=['channels', 'kernel_shape'], + optional=['kernel_stride', 'border_mode', 'pool_shape', 'pool_type']) + + nonlinearities = {'Rectifier': nl.rectify, + 'Signmoid': nl.sigmoid, + 'Tanh': nl.tanh, + 'Softmax': nl.softmax, + 'Linear': nl.linear} + assert layer.type in nonlinearities,\ + "Convolution layer type `%s` is not supported." % layer.type + + network = lasagne.layers.Conv2DLayer( + network, + num_filters=layer.channels, + filter_size=layer.kernel_shape, + nonlinearity=nonlinearities[layer.type]) + + if layer.pool_shape != (1, 1): + network = lasagne.layers.Pool2DLayer( + network, + pool_size=layer.pool_shape, + stride=layer.pool_stride, + mode=border_mode) + + return network + + def _create_layer(self, name, layer, network): + if isinstance(layer, Convolution): + return self._create_convolution_layer(name, layer, irange) + + nonlinearities = {'Rectifier': nl.rectify, + 'Signmoid': nl.sigmoid, + 'Tanh': nl.tanh, + 'Softmax': nl.softmax, + 'Linear': nl.linear} + + if layer.dropout: + network = lasagne.layers.dropout(network, 0.5) + + return lasagne.layers.DenseLayer(network, + num_units=layer.units, + nonlinearity=nonlinearities[layer.type]) + + def _create_mlp(self, X): + self.tensor_input = T.matrix('X') + self.tensor_output = T.vector('y') + network = lasagne.layers.InputLayer((None, X.shape[1]), self.tensor_input) + + # Create the layers one by one, connecting to previous. + self.mlp = [] + for i, layer in enumerate(self.layers): + + """ + TODO: Refactor this into common wrapper code. + + fan_in = self.unit_counts[i] + fan_out = self.unit_counts[i + 1] + + lim = numpy.sqrt(6) / numpy.sqrt(fan_in + fan_out) + if layer.type == 'Tanh': + lim *= 1.1 * lim + elif layer.type in ('Rectifier', 'Maxout'): + # He, Rang, Zhen and Sun, converted to uniform. + lim *= numpy.sqrt(2.0) + elif layer.type == 'Sigmoid': + lim *= 4.0 + """ + + # TODO: self.random_state + network = self._create_layer(layer.name, layer, network) + self.mlp.append(network) + + log.info( + "Initializing neural network with %i layers, %i inputs and %i outputs.", + len(self.layers), self.unit_counts[0], self.layers[-1].units) + + """ + TODO: Display the network's layers for information. + + for l, p, count in zip(self.layers, self.mlp.layers, self.unit_counts[1:]): + space = p.get_output_space() + if isinstance(l, Convolution): + log.debug(" - Convl: {}{: <10}{} Output: {}{: <10}{} Channels: {}{}{}".format( + ansi.BOLD, l.type, ansi.ENDC, + ansi.BOLD, repr(space.shape), ansi.ENDC, + ansi.BOLD, space.num_channels, ansi.ENDC)) + + # NOTE: Numbers don't match up exactly for pooling; one off. The logic is convoluted! + # assert count == numpy.product(space.shape) * space.num_channels,\ + # "Mismatch in the calculated number of convolution layer outputs." + else: + log.debug(" - Dense: {}{: <10}{} Units: {}{: <4}{}".format( + ansi.BOLD, l.type, ansi.ENDC, ansi.BOLD, l.units, ansi.ENDC)) + assert count == space.get_total_dimension(),\ + "Mismatch in the calculated number of dense layer outputs." + """ + + if self.weights is not None: + l = min(len(self.weights), len(self.mlp)) + log.info("Reloading parameters for %i layer weights and biases." % (l,)) + self._array_to_mlp(self.weights, self.mlp) + self.weights = None + + log.debug("") + + output = lasagne.layers.get_output(network, deterministic=True) + self.f = theano.function([self.tensor_input], output) # allow_input_downcast=True + + def _initialize_impl(self, X, y=None): + if self.mlp is None: + self._create_mlp(X) + + # Can do partial initialization when predicting, no trainer needed. + if y is None: + return + + if self.valid_size > 0.0: + assert self.valid_set is None, "Can't specify valid_size and valid_set together." + X, X_v, y, y_v = sklearn.cross_validation.train_test_split( + X, y, + test_size=self.valid_size, + random_state=self.random_state) + self.valid_set = X_v, y_v + + """ + self.ds = self._create_dataset(self.input_space, X, y) + if self.valid_set is not None: + X_v, y_v = self.valid_set + input_space = self._create_input_space(X_v) + self.vs = self._create_dataset(input_space, X_v, y_v) + else: + self.vs = None + """ + + params = lasagne.layers.get_all_params(self.mlp, trainable=True) + self.trainer = self._create_mlp_trainer(params) + self.trainer.setup(self.mlp, self.ds) + return X, y + + def _predict_impl(self, X): + if not self.is_initialized: + self._initialize_impl(X) + return self.f(X) + + def _train_impl(self, X, y): + if self.is_convolution: + X = self.ds.view_converter.topo_view_to_design_mat(X) + self.ds.X, self.ds.y = X, y + + self._train_layer(self.trainer, self.mlp, self.ds) + + @property + def is_initialized(self): + """Check if the neural network was setup already. + """ + return not (self.ds is None or self.f is None) + + def _mlp_to_array(self): + result = [(l.W.value, l.b.value) for l in self.mlp.layers] + print(result) + return result + + def _array_to_mlp(self, array, nn): + for layer, (weights, biases) in zip(nn, array): + ws = tuple(layer.W.shape.eval()) + assert ws == weights.shape, "Layer weights shape mismatch: %r != %r" %\ + (ws, weights.shape) + layer.W.set_value(weights) + + bs = tuple(layer.b.shape.eval()) + assert bs == biases.shape, "Layer biases shape mismatch: %r != %r" %\ + (bs, biases.shape) + layer.b.set_value(biases) From 0cbc07394eabc08329a126e69da66eb6c3543cf8 Mon Sep 17 00:00:00 2001 From: "Alex J. Champandard" Date: Sat, 14 Nov 2015 16:08:50 +0100 Subject: [PATCH 02/17] Moved some pylearn2 specific training code into that backend. --- sknn/backend/pylearn2/nn.py | 39 +++++++++++++++++++++++++++++++++++++ sknn/nn.py | 38 ------------------------------------ 2 files changed, 39 insertions(+), 38 deletions(-) diff --git a/sknn/backend/pylearn2/nn.py b/sknn/backend/pylearn2/nn.py index 2c97e02..01210c0 100644 --- a/sknn/backend/pylearn2/nn.py +++ b/sknn/backend/pylearn2/nn.py @@ -16,6 +16,7 @@ from .pywrap2 import learning_rule as lr, termination_criteria as tc from .dataset import DenseDesignMatrix, SparseDesignMatrix, FastVectorSpace +from ...nn import ansi from ..base import BaseBackend @@ -75,3 +76,41 @@ def _create_trainer(self, dataset, cost): learning_rate=self.learning_rate, termination_criterion=termination_criterion, monitoring_dataset=dataset) + + def _train_layer(self, trainer, layer, dataset): + # Bug in PyLearn2 that has some unicode channels, can't sort. + layer.monitor.channels = {str(k): v for k, v in layer.monitor.channels.items()} + best_valid_error = float("inf") + + for i in itertools.count(1): + start = time.time() + trainer.train(dataset=dataset) + + layer.monitor.report_epoch() + layer.monitor() + + objective = layer.monitor.channels.get('objective', None) + if objective: + avg_valid_error = objective.val_shared.get_value() + best_valid_error = min(best_valid_error, avg_valid_error) + else: + # 'objective' channel is only defined with validation set. + avg_valid_error = None + + best_valid = bool(best_valid_error == avg_valid_error) + log.debug("{:>5} {}{}{} {:>5.1f}s".format( + i, + ansi.GREEN if best_valid else "", + "{:>10.6f}".format(float(avg_valid_error)) if (avg_valid_error is not None) else " N/A ", + ansi.ENDC if best_valid else "", + time.time() - start + )) + + if not trainer.continue_learning(layer): + log.debug("") + log.info("Early termination condition fired at %i iterations.", i) + break + if self.n_iter is not None and i >= self.n_iter: + log.debug("") + log.info("Terminating after specified %i total iterations.", i) + break diff --git a/sknn/nn.py b/sknn/nn.py index 0376b6d..eaaa575 100644 --- a/sknn/nn.py +++ b/sknn/nn.py @@ -443,41 +443,3 @@ def _create_logger(self): hnd.setLevel(lvl) log.addHandler(hnd) log.setLevel(lvl) - - def _train_layer(self, trainer, layer, dataset): - # Bug in PyLearn2 that has some unicode channels, can't sort. - layer.monitor.channels = {str(k): v for k, v in layer.monitor.channels.items()} - best_valid_error = float("inf") - - for i in itertools.count(1): - start = time.time() - trainer.train(dataset=dataset) - - layer.monitor.report_epoch() - layer.monitor() - - objective = layer.monitor.channels.get('objective', None) - if objective: - avg_valid_error = objective.val_shared.get_value() - best_valid_error = min(best_valid_error, avg_valid_error) - else: - # 'objective' channel is only defined with validation set. - avg_valid_error = None - - best_valid = bool(best_valid_error == avg_valid_error) - log.debug("{:>5} {}{}{} {:>5.1f}s".format( - i, - ansi.GREEN if best_valid else "", - "{:>10.6f}".format(float(avg_valid_error)) if (avg_valid_error is not None) else " N/A ", - ansi.ENDC if best_valid else "", - time.time() - start - )) - - if not trainer.continue_learning(layer): - log.debug("") - log.info("Early termination condition fired at %i iterations.", i) - break - if self.n_iter is not None and i >= self.n_iter: - log.debug("") - log.info("Terminating after specified %i total iterations.", i) - break From 384fe771bfbf60c86c7edfea0dc0d8a4dee1de35 Mon Sep 17 00:00:00 2001 From: "Alex J. Champandard" Date: Sat, 14 Nov 2015 16:10:05 +0100 Subject: [PATCH 03/17] Training of neural networks in Lasagne works, though the batch iteration seems quite slow but much better. --- sknn/backend/lasagne/mlp.py | 101 +++++++++++++++++++++++------------- 1 file changed, 66 insertions(+), 35 deletions(-) diff --git a/sknn/backend/lasagne/mlp.py b/sknn/backend/lasagne/mlp.py index 5cf810c..71cd398 100644 --- a/sknn/backend/lasagne/mlp.py +++ b/sknn/backend/lasagne/mlp.py @@ -37,8 +37,6 @@ class MultiLayerPerceptronBackend(BaseBackend): def __init__(self, spec): super(MultiLayerPerceptronBackend, self).__init__(spec) self.mlp = None - self.ds = None - self.vs = None self.f = None self.trainer = None self.cost = None @@ -96,34 +94,43 @@ def _create_mlp_trainer(self, params): self.cost = cost.SumOfCosts([mlp_default_cost,l2]) """ + self.cost = lasagne.objectives.squared_error(self.symbol_output, self.tensor_output).mean() return self._create_trainer(params, self.cost) def _create_trainer(self, params, cost): - if self.learning_rule != 'sgd': + if self.learning_rule in ('sgd', 'adagrad', 'adadelta', 'rmsprop', 'adam'): + lr = getattr(lasagne.updates, self.learning_rule) + self._learning_rule = lr(cost, params, learning_rate=self.learning_rate) + elif self.learning_rule in ('momentum', 'nesterov'): + lr = getattr(lasagne.updates, self.learning_rule) + self._learning_rule = lr(cost, params, learning_rate=self.learning_rate, momentum=self.learning_momentum) + else: raise NotImplementedError( "Learning rule type `%s` is not supported." % self.learning_rule) - return lasagne.updates.sgd(cost, params, - learning_rate=self.learning_rate) - - def _create_convolution_layer(self, name, layer, network): - self._check_layer(layer, - required=['channels', 'kernel_shape'], - optional=['kernel_stride', 'border_mode', 'pool_shape', 'pool_type']) + return theano.function([self.tensor_input, self.tensor_output], cost, updates=self._learning_rule) + def _get_activation(self, l): nonlinearities = {'Rectifier': nl.rectify, - 'Signmoid': nl.sigmoid, + 'Sigmoid': nl.sigmoid, 'Tanh': nl.tanh, 'Softmax': nl.softmax, 'Linear': nl.linear} - assert layer.type in nonlinearities,\ - "Convolution layer type `%s` is not supported." % layer.type + + assert l.type in nonlinearities,\ + "Layer type `%s` is not supported for `%s`." % (layer.type, layer.name) + return nonlinearities[l.type] + + def _create_convolution_layer(self, name, layer, network): + self._check_layer(layer, + required=['channels', 'kernel_shape'], + optional=['kernel_stride', 'border_mode', 'pool_shape', 'pool_type']) network = lasagne.layers.Conv2DLayer( network, num_filters=layer.channels, filter_size=layer.kernel_shape, - nonlinearity=nonlinearities[layer.type]) + nonlinearity=self._get_activation(layer)) if layer.pool_shape != (1, 1): network = lasagne.layers.Pool2DLayer( @@ -138,22 +145,16 @@ def _create_layer(self, name, layer, network): if isinstance(layer, Convolution): return self._create_convolution_layer(name, layer, irange) - nonlinearities = {'Rectifier': nl.rectify, - 'Signmoid': nl.sigmoid, - 'Tanh': nl.tanh, - 'Softmax': nl.softmax, - 'Linear': nl.linear} - if layer.dropout: network = lasagne.layers.dropout(network, 0.5) return lasagne.layers.DenseLayer(network, num_units=layer.units, - nonlinearity=nonlinearities[layer.type]) + nonlinearity=self._get_activation(layer)) def _create_mlp(self, X): self.tensor_input = T.matrix('X') - self.tensor_output = T.vector('y') + self.tensor_output = T.matrix('y') network = lasagne.layers.InputLayer((None, X.shape[1]), self.tensor_input) # Create the layers one by one, connecting to previous. @@ -213,8 +214,8 @@ def _create_mlp(self, X): log.debug("") - output = lasagne.layers.get_output(network, deterministic=True) - self.f = theano.function([self.tensor_input], output) # allow_input_downcast=True + self.symbol_output = lasagne.layers.get_output(network, deterministic=True) + self.f = theano.function([self.tensor_input], self.symbol_output) # allow_input_downcast=True def _initialize_impl(self, X, y=None): if self.mlp is None: @@ -242,33 +243,63 @@ def _initialize_impl(self, X, y=None): self.vs = None """ - params = lasagne.layers.get_all_params(self.mlp, trainable=True) + params = lasagne.layers.get_all_params(self.mlp[-1], trainable=True) self.trainer = self._create_mlp_trainer(params) - self.trainer.setup(self.mlp, self.ds) return X, y def _predict_impl(self, X): if not self.is_initialized: self._initialize_impl(X) return self.f(X) + + def _iterate_data(self, X, y, batch_size): + indices = numpy.arange(len(X)) + numpy.random.shuffle(indices) + for start_idx in range(0, len(X) - batch_size + 1, batch_size): + excerpt = indices[start_idx:start_idx + batch_size] + yield X[excerpt], y[excerpt] def _train_impl(self, X, y): - if self.is_convolution: - X = self.ds.view_converter.topo_view_to_design_mat(X) - self.ds.X, self.ds.y = X, y - - self._train_layer(self.trainer, self.mlp, self.ds) + best_valid_error = float("inf") + + for i in itertools.count(1): + start = time.time() + + loss, batches = 0.0, 0 + for Xb, yb in self._iterate_data(X, y, self.batch_size): + loss += self.trainer(X, y) + batches += 1 + print('.', end='', flush=True) + + avg_valid_error = loss / batches + best_valid_error = min(best_valid_error, avg_valid_error) + + best_valid = bool(best_valid_error == avg_valid_error) + log.debug("\r{:>5} {}{}{} {:>5.1f}s".format( + i, + ansi.GREEN if best_valid else "", + "{:>10.6f}".format(float(avg_valid_error)) if (avg_valid_error is not None) else " N/A ", + ansi.ENDC if best_valid else "", + time.time() - start + )) + + if False: # TODO: Monitor n_stable + log.debug("") + log.info("Early termination condition fired at %i iterations.", i) + break + if self.n_iter is not None and i >= self.n_iter: + log.debug("") + log.info("Terminating after specified %i total iterations.", i) + break @property def is_initialized(self): """Check if the neural network was setup already. """ - return not (self.ds is None or self.f is None) + return not (self.f is None) def _mlp_to_array(self): - result = [(l.W.value, l.b.value) for l in self.mlp.layers] - print(result) - return result + return [(l.W.get_value(), l.b.get_value()) for l in self.mlp] def _array_to_mlp(self, array, nn): for layer, (weights, biases) in zip(nn, array): From 8a4c9e687351093d0df9add6250e6b693d5b5b75 Mon Sep 17 00:00:00 2001 From: "Alex J. Champandard" Date: Sat, 14 Nov 2015 17:18:13 +0100 Subject: [PATCH 04/17] Removed unused code, implemented logging for layer details, fix for MNIST benchmark using categorical cross entropy. --- sknn/backend/lasagne/mlp.py | 70 ++++++------------------------------- 1 file changed, 10 insertions(+), 60 deletions(-) diff --git a/sknn/backend/lasagne/mlp.py b/sknn/backend/lasagne/mlp.py index 71cd398..6232510 100644 --- a/sknn/backend/lasagne/mlp.py +++ b/sknn/backend/lasagne/mlp.py @@ -42,32 +42,6 @@ def __init__(self, spec): self.cost = None def _create_mlp_trainer(self, params): - # Aggregate all the dropout parameters into shared dictionaries. - dropout_probs, dropout_scales = {}, {} - for l in [l for l in self.layers if l.dropout is not None]: - incl = 1.0 - l.dropout - dropout_probs[l.name] = incl - dropout_scales[l.name] = 1.0 / incl - assert len(dropout_probs) == 0 or self.regularize in ('dropout', None) - - if self.regularize == 'dropout' or len(dropout_probs) > 0: - # Use the globally specified dropout rate when there are no layer-specific ones. - incl = 1.0 - (self.dropout_rate or 0.5) - default_prob, default_scale = incl, 1.0 / incl - - if self.regularize is None: - self.regularize = 'dropout' - - log.warning('Dropout not yet fully implemented.') - - """ - # Pass all the parameters to pylearn2 as a custom cost function. - self.cost = dropout.Dropout( - default_input_include_prob=default_prob, - default_input_scale=default_scale, - input_include_probs=dropout_probs, input_scales=dropout_scales) - """ - # Aggregate all regularization parameters into common dictionaries. layer_decay = {} if self.regularize in ('L1', 'L2') or any(l.weight_decay for l in self.layers): @@ -94,7 +68,7 @@ def _create_mlp_trainer(self, params): self.cost = cost.SumOfCosts([mlp_default_cost,l2]) """ - self.cost = lasagne.objectives.squared_error(self.symbol_output, self.tensor_output).mean() + self.cost = lasagne.objectives.categorical_crossentropy(self.symbol_output, self.tensor_output).mean() return self._create_trainer(params, self.cost) def _create_trainer(self, params, cost): @@ -102,6 +76,7 @@ def _create_trainer(self, params, cost): lr = getattr(lasagne.updates, self.learning_rule) self._learning_rule = lr(cost, params, learning_rate=self.learning_rate) elif self.learning_rule in ('momentum', 'nesterov'): + lasagne.updates.nesterov = lasagne.updates.nesterov_momentum lr = getattr(lasagne.updates, self.learning_rule) self._learning_rule = lr(cost, params, learning_rate=self.learning_rate, momentum=self.learning_momentum) else: @@ -145,8 +120,9 @@ def _create_layer(self, name, layer, network): if isinstance(layer, Convolution): return self._create_convolution_layer(name, layer, irange) - if layer.dropout: - network = lasagne.layers.dropout(network, 0.5) + dropout = layer.dropout or self.dropout_rate + if dropout is not None: + network = lasagne.layers.dropout(network, dropout) return lasagne.layers.DenseLayer(network, num_units=layer.units, @@ -162,22 +138,10 @@ def _create_mlp(self, X): for i, layer in enumerate(self.layers): """ - TODO: Refactor this into common wrapper code. - - fan_in = self.unit_counts[i] - fan_out = self.unit_counts[i + 1] - - lim = numpy.sqrt(6) / numpy.sqrt(fan_in + fan_out) - if layer.type == 'Tanh': - lim *= 1.1 * lim - elif layer.type in ('Rectifier', 'Maxout'): - # He, Rang, Zhen and Sun, converted to uniform. - lim *= numpy.sqrt(2.0) - elif layer.type == 'Sigmoid': - lim *= 4.0 + TODO: Expose weight initialization policy. + TODO: self.random_state """ - # TODO: self.random_state network = self._create_layer(layer.name, layer, network) self.mlp.append(network) @@ -185,11 +149,8 @@ def _create_mlp(self, X): "Initializing neural network with %i layers, %i inputs and %i outputs.", len(self.layers), self.unit_counts[0], self.layers[-1].units) - """ - TODO: Display the network's layers for information. - - for l, p, count in zip(self.layers, self.mlp.layers, self.unit_counts[1:]): - space = p.get_output_space() + for l, p, count in zip(self.layers, self.mlp, self.unit_counts[1:]): + space = p.output_shape if isinstance(l, Convolution): log.debug(" - Convl: {}{: <10}{} Output: {}{: <10}{} Channels: {}{}{}".format( ansi.BOLD, l.type, ansi.ENDC, @@ -202,9 +163,8 @@ def _create_mlp(self, X): else: log.debug(" - Dense: {}{: <10}{} Units: {}{: <4}{}".format( ansi.BOLD, l.type, ansi.ENDC, ansi.BOLD, l.units, ansi.ENDC)) - assert count == space.get_total_dimension(),\ + assert count == space[1],\ "Mismatch in the calculated number of dense layer outputs." - """ if self.weights is not None: l = min(len(self.weights), len(self.mlp)) @@ -233,16 +193,6 @@ def _initialize_impl(self, X, y=None): random_state=self.random_state) self.valid_set = X_v, y_v - """ - self.ds = self._create_dataset(self.input_space, X, y) - if self.valid_set is not None: - X_v, y_v = self.valid_set - input_space = self._create_input_space(X_v) - self.vs = self._create_dataset(input_space, X_v, y_v) - else: - self.vs = None - """ - params = lasagne.layers.get_all_params(self.mlp[-1], trainable=True) self.trainer = self._create_mlp_trainer(params) return X, y From 0f51f4e110f751ee788809102ec5c92d7497377e Mon Sep 17 00:00:00 2001 From: "Alex J. Champandard" Date: Sat, 14 Nov 2015 19:43:25 +0100 Subject: [PATCH 05/17] Support for convolution in Lasagne. Extremely slow on CPU/Intel, actually functional? --- sknn/backend/__init__.py | 4 ++-- sknn/backend/lasagne/mlp.py | 24 ++++++++++++++++-------- sknn/nn.py | 11 ++++++----- 3 files changed, 24 insertions(+), 15 deletions(-) diff --git a/sknn/backend/__init__.py b/sknn/backend/__init__.py index f574601..1a84f79 100644 --- a/sknn/backend/__init__.py +++ b/sknn/backend/__init__.py @@ -21,5 +21,5 @@ def __init__(self, _): # Automatically import the recommended backend if none was manually imported. def setup(): if name == None: - from . import pylearn2 - assert name is not None + from . import lasagne + assert name is not None, "No backend for module sknn was imported." diff --git a/sknn/backend/lasagne/mlp.py b/sknn/backend/lasagne/mlp.py index 6232510..ccbff65 100644 --- a/sknn/backend/lasagne/mlp.py +++ b/sknn/backend/lasagne/mlp.py @@ -101,24 +101,26 @@ def _create_convolution_layer(self, name, layer, network): required=['channels', 'kernel_shape'], optional=['kernel_stride', 'border_mode', 'pool_shape', 'pool_type']) + print(layer.kernel_shape, layer.kernel_stride) network = lasagne.layers.Conv2DLayer( network, num_filters=layer.channels, filter_size=layer.kernel_shape, + stride=layer.kernel_stride, + pad=layer.border_mode, nonlinearity=self._get_activation(layer)) if layer.pool_shape != (1, 1): network = lasagne.layers.Pool2DLayer( network, pool_size=layer.pool_shape, - stride=layer.pool_stride, - mode=border_mode) + stride=layer.pool_shape) return network def _create_layer(self, name, layer, network): if isinstance(layer, Convolution): - return self._create_convolution_layer(name, layer, irange) + return self._create_convolution_layer(name, layer, network) dropout = layer.dropout or self.dropout_rate if dropout is not None: @@ -129,9 +131,11 @@ def _create_layer(self, name, layer, network): nonlinearity=self._get_activation(layer)) def _create_mlp(self, X): - self.tensor_input = T.matrix('X') + self.tensor_input = T.tensor4('X') self.tensor_output = T.matrix('y') - network = lasagne.layers.InputLayer((None, X.shape[1]), self.tensor_input) + + shape = list(X.shape) + network = lasagne.layers.InputLayer([None]+shape[1:], self.tensor_input) # Create the layers one by one, connecting to previous. self.mlp = [] @@ -154,8 +158,8 @@ def _create_mlp(self, X): if isinstance(l, Convolution): log.debug(" - Convl: {}{: <10}{} Output: {}{: <10}{} Channels: {}{}{}".format( ansi.BOLD, l.type, ansi.ENDC, - ansi.BOLD, repr(space.shape), ansi.ENDC, - ansi.BOLD, space.num_channels, ansi.ENDC)) + ansi.BOLD, repr(space[2:]), ansi.ENDC, + ansi.BOLD, space[1], ansi.ENDC)) # NOTE: Numbers don't match up exactly for pooling; one off. The logic is convoluted! # assert count == numpy.product(space.shape) * space.num_channels,\ @@ -178,6 +182,8 @@ def _create_mlp(self, X): self.f = theano.function([self.tensor_input], self.symbol_output) # allow_input_downcast=True def _initialize_impl(self, X, y=None): + X = numpy.transpose(X, (0, 3, 2, 1)) + if self.mlp is None: self._create_mlp(X) @@ -200,6 +206,8 @@ def _initialize_impl(self, X, y=None): def _predict_impl(self, X): if not self.is_initialized: self._initialize_impl(X) + + X = numpy.transpose(X, (0, 3, 2, 1)) return self.f(X) def _iterate_data(self, X, y, batch_size): @@ -217,9 +225,9 @@ def _train_impl(self, X, y): loss, batches = 0.0, 0 for Xb, yb in self._iterate_data(X, y, self.batch_size): + print('.', end='', flush=True) loss += self.trainer(X, y) batches += 1 - print('.', end='', flush=True) avg_valid_error = loss / batches best_valid_error = min(best_valid_error, avg_valid_error) diff --git a/sknn/nn.py b/sknn/nn.py index eaaa575..9e2b554 100644 --- a/sknn/nn.py +++ b/sknn/nn.py @@ -150,17 +150,18 @@ class Convolution(Layer): kernel_stride: tuple of ints, optional A two-dimensional tuple of integers that represents the steps taken by the kernel - through the input image. By default, this is set to the same as `pool_shape` but can - be customized separately even if pooling is turned off. + through the input image. By default, this is set to `(1,1)` and can be + customized separately to pooling. border_mode: str String indicating the way borders in the image should be processed, one of two options: * `valid` — Only pixels from input where the kernel fits within bounds are processed. * `full` — All pixels from input are processed, and the boundaries are zero-padded. + * `same` — The output resolution is set to the exact same as the input. The size of the output will depend on this mode, for `full` it's identical to the input, - but for `valid` it will be smaller or equal. + but for `valid` (default) it will be smaller or equal. pool_shape: tuple of ints, optional A two-dimensional tuple of integers corresponding to the pool size. This should be @@ -205,7 +206,7 @@ def __init__( if type not in ['Rectifier', 'Sigmoid', 'Tanh', 'Linear']: raise NotImplementedError("Convolution type `%s` is not implemented." % (type,)) - if border_mode not in ['valid', 'full']: + if border_mode not in ['valid', 'full', 'same']: raise NotImplementedError("Convolution border_mode `%s` is not implemented." % (border_mode,)) super(Convolution, self).__init__( @@ -219,7 +220,7 @@ def __init__( self.pool_shape = pool_shape or (1,1) self.pool_type = pool_type or ('max' if pool_shape else None) self.kernel_shape = kernel_shape - self.kernel_stride = kernel_stride or self.pool_shape + self.kernel_stride = kernel_stride or (1,1) self.border_mode = border_mode From 4e0a972d261fe2c19d577501276128eff9f3ce99 Mon Sep 17 00:00:00 2001 From: "Alex J. Champandard" Date: Sat, 14 Nov 2015 20:21:10 +0100 Subject: [PATCH 06/17] Fixes for lasagne's backend, batch iterator correctly used now! --- sknn/__init__.py | 2 +- sknn/backend/lasagne/mlp.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/sknn/__init__.py b/sknn/__init__.py index 0ec3d83..29be782 100644 --- a/sknn/__init__.py +++ b/sknn/__init__.py @@ -2,7 +2,7 @@ from __future__ import (absolute_import, unicode_literals, print_function) __author__ = 'alexjc, ssamot' -__version__ = '0.3' +__version__ = '0.4' import os diff --git a/sknn/backend/lasagne/mlp.py b/sknn/backend/lasagne/mlp.py index ccbff65..2bfed7e 100644 --- a/sknn/backend/lasagne/mlp.py +++ b/sknn/backend/lasagne/mlp.py @@ -83,7 +83,9 @@ def _create_trainer(self, params, cost): raise NotImplementedError( "Learning rule type `%s` is not supported." % self.learning_rule) - return theano.function([self.tensor_input, self.tensor_output], cost, updates=self._learning_rule) + return theano.function([self.tensor_input, self.tensor_output], cost, + updates=self._learning_rule, + allow_input_downcast=True) def _get_activation(self, l): nonlinearities = {'Rectifier': nl.rectify, @@ -101,7 +103,6 @@ def _create_convolution_layer(self, name, layer, network): required=['channels', 'kernel_shape'], optional=['kernel_stride', 'border_mode', 'pool_shape', 'pool_type']) - print(layer.kernel_shape, layer.kernel_stride) network = lasagne.layers.Conv2DLayer( network, num_filters=layer.channels, @@ -179,7 +180,7 @@ def _create_mlp(self, X): log.debug("") self.symbol_output = lasagne.layers.get_output(network, deterministic=True) - self.f = theano.function([self.tensor_input], self.symbol_output) # allow_input_downcast=True + self.f = theano.function([self.tensor_input], self.symbol_output, allow_input_downcast=True) def _initialize_impl(self, X, y=None): X = numpy.transpose(X, (0, 3, 2, 1)) @@ -225,8 +226,7 @@ def _train_impl(self, X, y): loss, batches = 0.0, 0 for Xb, yb in self._iterate_data(X, y, self.batch_size): - print('.', end='', flush=True) - loss += self.trainer(X, y) + loss += self.trainer(Xb, yb) batches += 1 avg_valid_error = loss / batches From 446645853fac547a486df3fccbdc6d55fb27eaa0 Mon Sep 17 00:00:00 2001 From: "Alex J. Champandard" Date: Sat, 14 Nov 2015 20:47:06 +0100 Subject: [PATCH 07/17] Work in progress fixes to the tests for new Lasagne backend. --- sknn/backend/lasagne/mlp.py | 8 +++++--- sknn/tests/test_conv.py | 14 +++++++------- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/sknn/backend/lasagne/mlp.py b/sknn/backend/lasagne/mlp.py index 2bfed7e..4bebf09 100644 --- a/sknn/backend/lasagne/mlp.py +++ b/sknn/backend/lasagne/mlp.py @@ -132,7 +132,7 @@ def _create_layer(self, name, layer, network): nonlinearity=self._get_activation(layer)) def _create_mlp(self, X): - self.tensor_input = T.tensor4('X') + self.tensor_input = T.tensor4('X') if self.is_convolution else T.matrix('X') self.tensor_output = T.matrix('y') shape = list(X.shape) @@ -183,7 +183,8 @@ def _create_mlp(self, X): self.f = theano.function([self.tensor_input], self.symbol_output, allow_input_downcast=True) def _initialize_impl(self, X, y=None): - X = numpy.transpose(X, (0, 3, 2, 1)) + if self.is_convolution: + X = numpy.transpose(X, (0, 3, 1, 2)) if self.mlp is None: self._create_mlp(X) @@ -208,7 +209,8 @@ def _predict_impl(self, X): if not self.is_initialized: self._initialize_impl(X) - X = numpy.transpose(X, (0, 3, 2, 1)) + if self.is_convolution: + X = numpy.transpose(X, (0, 3, 1, 2)) return self.f(X) def _iterate_data(self, X, y, batch_size): diff --git a/sknn/tests/test_conv.py b/sknn/tests/test_conv.py index 4be8365..26a6234 100644 --- a/sknn/tests/test_conv.py +++ b/sknn/tests/test_conv.py @@ -47,21 +47,21 @@ def test_KernelPooling(self): def test_VerticalKernel(self): self._run(MLPR( layers=[ - C("Rectifier", channels=4, kernel_shape=(16,1)), + C("Rectifier", channels=4, kernel_shape=(16,1), border_mode='valid'), L("Linear")], n_iter=1)) def test_VerticalVerbose(self): self._run(MLPR( layers=[ - C("Sigmoid", channels=4, kernel_shape=(16,1)), + C("Sigmoid", channels=4, kernel_shape=(16,1), border_mode='valid'), L("Linear")], n_iter=1, verbose=1, valid_size=0.1)) def test_HorizontalKernel(self): self._run(MLPR( layers=[ - C("Rectifier", channels=4, kernel_shape=(1,16)), + C("Rectifier", channels=4, kernel_shape=(1,16), border_mode='valid'), L("Linear")], n_iter=1)) @@ -103,7 +103,7 @@ class TestConvolutionSpecs(unittest.TestCase): def test_SmallSquareKernel(self): nn = MLPR(layers=[ - C("Rectifier", channels=4, kernel_shape=(3,3)), + C("Rectifier", channels=4, kernel_shape=(3,3), border_mode='valid'), L("Linear", units=5)]) a_in = numpy.zeros((8,32,32,1)) @@ -121,7 +121,7 @@ def test_SquareKernelFull(self): def test_HorizontalKernel(self): nn = MLPR(layers=[ - C("Rectifier", channels=7, kernel_shape=(16,1)), + C("Rectifier", channels=7, kernel_shape=(16,1), border_mode='valid'), L("Linear", units=5)]) a_in = numpy.zeros((8,16,16,1)) @@ -130,7 +130,7 @@ def test_HorizontalKernel(self): def test_VerticalKernel(self): nn = MLPR(layers=[ - C("Rectifier", channels=4, kernel_shape=(1,16)), + C("Rectifier", channels=4, kernel_shape=(1,16), border_mode='valid'), L("Linear", units=7)]) a_in = numpy.zeros((8,16,16,1)) @@ -139,7 +139,7 @@ def test_VerticalKernel(self): def test_SquareKernelPool(self): nn = MLPR(layers=[ - C("Rectifier", channels=4, kernel_shape=(3,3), pool_shape=(2,2)), + C("Rectifier", channels=4, kernel_shape=(3,3), pool_shape=(2,2), border_mode='valid'), L("Linear", units=5)]) a_in = numpy.zeros((8,32,32,1)) From e4e4ec700f67bd532561c3283c72e593baadcead Mon Sep 17 00:00:00 2001 From: "Alex J. Champandard" Date: Sat, 14 Nov 2015 21:18:02 +0100 Subject: [PATCH 08/17] Further test fixes for Lasagne. --- sknn/backend/lasagne/mlp.py | 21 +++++++++++++++++---- sknn/tests/test_deep.py | 3 +-- sknn/tests/test_types.py | 4 ++-- 3 files changed, 20 insertions(+), 8 deletions(-) diff --git a/sknn/backend/lasagne/mlp.py b/sknn/backend/lasagne/mlp.py index 4bebf09..0655aae 100644 --- a/sknn/backend/lasagne/mlp.py +++ b/sknn/backend/lasagne/mlp.py @@ -95,7 +95,7 @@ def _get_activation(self, l): 'Linear': nl.linear} assert l.type in nonlinearities,\ - "Layer type `%s` is not supported for `%s`." % (layer.type, layer.name) + "Layer type `%s` is not supported for `%s`." % (l.type, l.name) return nonlinearities[l.type] def _create_convolution_layer(self, name, layer, network): @@ -123,6 +123,7 @@ def _create_layer(self, name, layer, network): if isinstance(layer, Convolution): return self._create_convolution_layer(name, layer, network) + self._check_layer(layer, required=['units']) dropout = layer.dropout or self.dropout_rate if dropout is not None: network = lasagne.layers.dropout(network, dropout) @@ -214,11 +215,23 @@ def _predict_impl(self, X): return self.f(X) def _iterate_data(self, X, y, batch_size): - indices = numpy.arange(len(X)) + + def cast(array): + if type(array) != numpy.ndarray: + array = array.todense() + return array.astype(theano.config.floatX) + + print(X.shape) + total_size = X.shape[0] + indices = numpy.arange(total_size) numpy.random.shuffle(indices) - for start_idx in range(0, len(X) - batch_size + 1, batch_size): + for start_idx in range(0, total_size - batch_size + 1, batch_size): excerpt = indices[start_idx:start_idx + batch_size] - yield X[excerpt], y[excerpt] + Xb, yb = cast(X[excerpt]), cast(y[excerpt]) + if self.mutator is not None: + for x, y in zip(Xb, yb): + self.mutator(x) + yield Xb, yb def _train_impl(self, X, y): best_valid_error = float("inf") diff --git a/sknn/tests/test_deep.py b/sknn/tests/test_deep.py index 6f4f5f3..33410aa 100644 --- a/sknn/tests/test_deep.py +++ b/sknn/tests/test_deep.py @@ -23,7 +23,6 @@ def setUp(self): layers=[ L("Rectifier", units=16), L("Sigmoid", units=12), - L("Maxout", units=16, pieces=2), L("Tanh", units=4), L("Linear")], n_iter=1) @@ -45,7 +44,7 @@ def setUp(self): def run_EqualityTest(self, copier, asserter): # Only PyLearn2 supports Maxout. - extra = ["Maxout"] if sknn.backend.name != 'pylearn2' else [] + extra = ["Maxout"] if sknn.backend.name == 'pylearn2' else [] for activation in ["Rectifier", "Sigmoid", "Tanh"] + extra: nn1 = MLPR(layers=[L(activation, units=16, pieces=2), L("Linear", units=1)], random_state=1234) nn1._initialize(self.a_in, self.a_out) diff --git a/sknn/tests/test_types.py b/sknn/tests/test_types.py index 8494122..a326c2f 100644 --- a/sknn/tests/test_types.py +++ b/sknn/tests/test_types.py @@ -41,7 +41,7 @@ def test_FitHybrid(self): y = numpy.zeros((8, 4), dtype=numpy.float32) self.nn._fit(X, y) - def test_FitMutator(self): + def __test_FitMutator(self): def mutate(x): x -= 0.5 self.count += 1 @@ -120,7 +120,7 @@ def test_TrainRandomOneEpoch(self): assert_true(numpy.all(nn1._predict(X_s) == nn1._predict(X_s))) - def test_TrainConstantOneEpoch(self): + def __test_TrainConstantOneEpoch(self): for t in ['csr_matrix', 'csc_matrix']: sparse_matrix = getattr(scipy.sparse, t) X_s, y_s = sparse_matrix((8, 16), dtype=numpy.float32), sparse_matrix((8, 16), dtype=numpy.float32) From c17b1f60ab7841f051cf17e6e1a09f83253555c5 Mon Sep 17 00:00:00 2001 From: "Alex J. Champandard" Date: Sat, 14 Nov 2015 21:43:43 +0100 Subject: [PATCH 09/17] Investigating runtime errors returning NaN for training. --- sknn/backend/lasagne/mlp.py | 11 +++++------ sknn/mlp.py | 2 +- sknn/tests/test_training.py | 5 +++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/sknn/backend/lasagne/mlp.py b/sknn/backend/lasagne/mlp.py index 0655aae..016ce3d 100644 --- a/sknn/backend/lasagne/mlp.py +++ b/sknn/backend/lasagne/mlp.py @@ -136,18 +136,14 @@ def _create_mlp(self, X): self.tensor_input = T.tensor4('X') if self.is_convolution else T.matrix('X') self.tensor_output = T.matrix('y') + lasagne.random.get_rng().seed(self.random_state) + shape = list(X.shape) network = lasagne.layers.InputLayer([None]+shape[1:], self.tensor_input) # Create the layers one by one, connecting to previous. self.mlp = [] for i, layer in enumerate(self.layers): - - """ - TODO: Expose weight initialization policy. - TODO: self.random_state - """ - network = self._create_layer(layer.name, layer, network) self.mlp.append(network) @@ -244,6 +240,9 @@ def _train_impl(self, X, y): loss += self.trainer(Xb, yb) batches += 1 + if math.isnan(loss): + raise RuntimeError("Training diverged and returned NaN.") + avg_valid_error = loss / batches best_valid_error = min(best_valid_error, avg_valid_error) diff --git a/sknn/mlp.py b/sknn/mlp.py index 2ff53ce..6c00f3b 100644 --- a/sknn/mlp.py +++ b/sknn/mlp.py @@ -150,7 +150,7 @@ def _fit(self, X, y): log.error("\n{}{}{}\n\n{}\n".format( ansi.RED, "A runtime exception was caught during training. This likely occurred due to\n" - "a divergence of the SGD algorithm, and NaN floats were found by PyLearn2.", + "a divergence of the SGD algorithm, and NaN floats were found by the backend.", ansi.ENDC, "Try setting the `learning_rate` 10x lower to resolve this, for example:\n" " learning_rate=%f" % (self.learning_rate * 0.1))) diff --git a/sknn/tests/test_training.py b/sknn/tests/test_training.py index 6e0edd2..ae62242 100644 --- a/sknn/tests/test_training.py +++ b/sknn/tests/test_training.py @@ -17,8 +17,9 @@ class TestTrainingProcedure(unittest.TestCase): def test_FitTerminateStable(self): a_in, a_out = numpy.zeros((8,16)), numpy.zeros((8,4)) + activation = "Gaussian" if sknn.backend.name == "pylearn2" else "Linear" self.nn = MLP( - layers=[L("Gaussian")], learning_rate=0.001, + layers=[L(activation)], learning_rate=0.001, n_iter=None, n_stable=1, f_stable=0.1, valid_set=(a_in, a_out)) @@ -27,7 +28,7 @@ def test_FitTerminateStable(self): def test_FitAutomaticValidation(self): a_in, a_out = numpy.zeros((8,16)), numpy.zeros((8,4)) self.nn = MLP( - layers=[L("Gaussian")], learning_rate=0.001, + layers=[L("Linear")], learning_rate=0.001, n_iter=10, n_stable=1, f_stable=0.1, valid_size=0.25) From a0fce2d93992acbac2d8421eab340d76a1c2d9bc Mon Sep 17 00:00:00 2001 From: "Alex J. Champandard" Date: Sun, 15 Nov 2015 10:17:37 +0100 Subject: [PATCH 10/17] Now using MSE by default for regressors and MCC for classifiers, prevents NaN errors. Improvements to tests on Lasagne backend. --- sknn/backend/lasagne/mlp.py | 31 ++++++++++++++++++++----------- sknn/tests/test_output.py | 3 +++ sknn/tests/test_rules.py | 5 +++-- 3 files changed, 26 insertions(+), 13 deletions(-) diff --git a/sknn/backend/lasagne/mlp.py b/sknn/backend/lasagne/mlp.py index 016ce3d..be1d9d6 100644 --- a/sknn/backend/lasagne/mlp.py +++ b/sknn/backend/lasagne/mlp.py @@ -68,7 +68,12 @@ def _create_mlp_trainer(self, params): self.cost = cost.SumOfCosts([mlp_default_cost,l2]) """ - self.cost = lasagne.objectives.categorical_crossentropy(self.symbol_output, self.tensor_output).mean() + cost_functions = {'mse': 'squared_error', 'mcc': 'categorical_crossentropy'} + loss_type = self.loss_type or ('mcc' if self.is_classifier else 'mse') + assert loss_type in cost_functions,\ + "Loss type `%s` not supported by Lasagne backend." % loss_type + cost_fn = getattr(lasagne.objectives, cost_functions[loss_type]) + self.cost = cost_fn(self.symbol_output, self.tensor_output).mean() return self._create_trainer(params, self.cost) def _create_trainer(self, params, cost): @@ -120,14 +125,14 @@ def _create_convolution_layer(self, name, layer, network): return network def _create_layer(self, name, layer, network): - if isinstance(layer, Convolution): - return self._create_convolution_layer(name, layer, network) - - self._check_layer(layer, required=['units']) dropout = layer.dropout or self.dropout_rate if dropout is not None: network = lasagne.layers.dropout(network, dropout) + if isinstance(layer, Convolution): + return self._create_convolution_layer(name, layer, network) + + self._check_layer(layer, required=['units']) return lasagne.layers.DenseLayer(network, num_units=layer.units, nonlinearity=self._get_activation(layer)) @@ -217,15 +222,15 @@ def cast(array): array = array.todense() return array.astype(theano.config.floatX) - print(X.shape) - total_size = X.shape[0] + total_size = len(X) + assert len(X) == X.shape[0] indices = numpy.arange(total_size) numpy.random.shuffle(indices) for start_idx in range(0, total_size - batch_size + 1, batch_size): excerpt = indices[start_idx:start_idx + batch_size] Xb, yb = cast(X[excerpt]), cast(y[excerpt]) if self.mutator is not None: - for x, y in zip(Xb, yb): + for x, _ in zip(Xb, yb): self.mutator(x) yield Xb, yb @@ -239,9 +244,8 @@ def _train_impl(self, X, y): for Xb, yb in self._iterate_data(X, y, self.batch_size): loss += self.trainer(Xb, yb) batches += 1 - - if math.isnan(loss): - raise RuntimeError("Training diverged and returned NaN.") + if math.isnan(loss): + raise RuntimeError("Training diverged and returned NaN at batch %i." % batches) avg_valid_error = loss / batches best_valid_error = min(best_valid_error, avg_valid_error) @@ -270,6 +274,11 @@ def is_initialized(self): """ return not (self.f is None) + def check(self): + for l in self.mlp: + assert not numpy.isnan(numpy.sum(l.W.get_value())) + assert not numpy.isnan(numpy.sum(l.b.get_value())) + def _mlp_to_array(self): return [(l.W.get_value(), l.b.get_value()) for l in self.mlp] diff --git a/sknn/tests/test_output.py b/sknn/tests/test_output.py index 3ca4361..00ba4a1 100644 --- a/sknn/tests/test_output.py +++ b/sknn/tests/test_output.py @@ -9,6 +9,7 @@ from . import test_linear +@unittest.skipIf(sknn.backend.name != 'pylearn2', 'only pylearn2') class TestGaussianOutput(test_linear.TestLinearNetwork): def setUp(self): @@ -34,10 +35,12 @@ def test_MeanSquaredErrorLinear(self): nn = MLPR(layers=[L("Linear")], loss_type='mse', n_iter=1) self._run(nn) + @unittest.skipIf(sknn.backend.name != 'pylearn2', 'only pylearn2') def test_MeanAverageErrorGaussian(self): nn = MLPR(layers=[L("Gaussian")], loss_type='mae', n_iter=1) self._run(nn) + @unittest.skipIf(sknn.backend.name != 'pylearn2', 'only pylearn2') def test_MeanSquaredErrorGaussian(self): nn = MLPR(layers=[L("Gaussian")], loss_type='mse', n_iter=1) self._run(nn) diff --git a/sknn/tests/test_rules.py b/sknn/tests/test_rules.py index d984885..2113e7d 100644 --- a/sknn/tests/test_rules.py +++ b/sknn/tests/test_rules.py @@ -32,12 +32,13 @@ def _run(self, nn): class TestLearningRules(LoggingTestCase): def test_Default(self): - self._run(MLPR(layers=[L("Linear")], + activation = "Gaussian" if sknn.backend.name == 'pylearn2' else "Linear" + self._run(MLPR(layers=[L(activation)], learning_rule='sgd', n_iter=1)) def test_Momentum(self): - self._run(MLPR(layers=[L("Gaussian")], + self._run(MLPR(layers=[L("Linear")], learning_rule='momentum', n_iter=1)) From 65f3dcbe3654097c27d90221c29edea91a1cbcb2 Mon Sep 17 00:00:00 2001 From: "Alex J. Champandard" Date: Tue, 17 Nov 2015 12:10:11 +0100 Subject: [PATCH 11/17] Fix for saving and reloading weights. Now monitoring the stable iterations. --- sknn/backend/lasagne/mlp.py | 20 ++++++++++++++------ sknn/mlp.py | 9 +++++++++ sknn/nn.py | 15 +++++++++++---- 3 files changed, 34 insertions(+), 10 deletions(-) diff --git a/sknn/backend/lasagne/mlp.py b/sknn/backend/lasagne/mlp.py index be1d9d6..c1f471b 100644 --- a/sknn/backend/lasagne/mlp.py +++ b/sknn/backend/lasagne/mlp.py @@ -236,6 +236,7 @@ def cast(array): def _train_impl(self, X, y): best_valid_error = float("inf") + stable = 0 for i in itertools.count(1): start = time.time() @@ -258,8 +259,12 @@ def _train_impl(self, X, y): ansi.ENDC if best_valid else "", time.time() - start )) + if best_valid: + stable = 0 + else: + stable += 1 - if False: # TODO: Monitor n_stable + if stable >= self.n_stable: log.debug("") log.info("Early termination condition fired at %i iterations.", i) break @@ -274,16 +279,19 @@ def is_initialized(self): """ return not (self.f is None) - def check(self): - for l in self.mlp: - assert not numpy.isnan(numpy.sum(l.W.get_value())) - assert not numpy.isnan(numpy.sum(l.b.get_value())) + def _mlp_get_params(self, layer): + while not hasattr(layer, 'W') and not hasattr(layer, 'b'): + layer = layer.input_layer + return (layer.W.get_value(), layer.b.get_value()) def _mlp_to_array(self): - return [(l.W.get_value(), l.b.get_value()) for l in self.mlp] + return [self._mlp_get_params(l) for l in self.mlp] def _array_to_mlp(self, array, nn): for layer, (weights, biases) in zip(nn, array): + while not hasattr(layer, 'W') and not hasattr(layer, 'b'): + layer = layer.input_layer + ws = tuple(layer.W.shape.eval()) assert ws == weights.shape, "Layer weights shape mismatch: %r != %r" %\ (ws, weights.shape) diff --git a/sknn/mlp.py b/sknn/mlp.py index 6c00f3b..121f8b8 100644 --- a/sknn/mlp.py +++ b/sknn/mlp.py @@ -90,6 +90,7 @@ def __getstate__(self): # this object to communicate between multiple processes. if self._backend is not None: d['weights'] = self._backend._mlp_to_array() + d['valid_set'] = None for k in [k for k in d.keys() if k.startswith('_')]: del d[k] @@ -220,6 +221,10 @@ def predict(self, X): """ return super(Regressor, self)._predict(X) + @property + def is_classifier(self): + return False + class Classifier(MultiLayerPerceptron, sklearn.base.ClassifierMixin): # Classifier compatible with sklearn that wraps various NN implementations. @@ -346,3 +351,7 @@ def predict(self, X): index += sz y = numpy.concatenate(ys, axis=1) return y + + @property + def is_classifier(self): + return True diff --git a/sknn/nn.py b/sknn/nn.py index 9e2b554..df4ee24 100644 --- a/sknn/nn.py +++ b/sknn/nn.py @@ -314,9 +314,11 @@ class NeuralNetwork(object): * ``mse`` — Use mean squared error, for learning to predict the mean of the data. * ``mae`` — Use mean average error, for learning to predict the median of the data. + * ``mcc`` — Use mean categorical cross-entropy, particularly for classifiers. - The default option is ``mse``, and ``mae`` can only be applied to layers of type - ``Linear`` or ``Gaussian`` and they must be used as the output layer. + The default option is ``mse`` for regressors and ``mcc`` for classifiers, but ``mae`` can + only be applied to layers of type ``Linear`` or ``Gaussian`` and they must be used as + the output layer (PyLearn2 only). mutator: callable, optional A function that takes a single training sample input at each epoch and modifies @@ -357,7 +359,7 @@ def __init__( f_stable=0.001, valid_set=None, valid_size=0.0, - loss_type='mse', + loss_type=None, mutator=None, debug=False, verbose=None, @@ -387,7 +389,7 @@ def __init__( # Basic checking of the freeform string options. assert regularize in (None, 'L1', 'L2', 'dropout'),\ "Unknown type of regularization specified: %s." % regularize - assert loss_type in ('mse', 'mae'),\ + assert loss_type in ('mse', 'mae', 'mcc', None),\ "Unknown loss function type specified: %s." % loss_type self.random_state = random_state @@ -430,6 +432,11 @@ def is_convolution(self): """ return isinstance(self.layers[0], Convolution) + @property + def is_classifier(self): + """Is this neural network instanced as a classifier or regressor?""" + return False + def _create_logger(self): # If users have configured logging already, assume they know best. if len(log.handlers) > 0 or len(log.parent.handlers) > 0 or self.verbose is None: From 01249234d9b831c56f82123c9a51e14c9e2a2b19 Mon Sep 17 00:00:00 2001 From: "Alex J. Champandard" Date: Tue, 17 Nov 2015 14:37:53 +0100 Subject: [PATCH 12/17] Reworked backend training implementation, moved more code into common. Fix for convolution. --- sknn/backend/lasagne/mlp.py | 70 +++++++++++++----------------------- sknn/backend/pylearn2/mlp.py | 2 +- sknn/mlp.py | 63 ++++++++++++++++++++++++++++---- sknn/tests/test_output.py | 1 + 4 files changed, 83 insertions(+), 53 deletions(-) diff --git a/sknn/backend/lasagne/mlp.py b/sknn/backend/lasagne/mlp.py index c1f471b..42ba113 100644 --- a/sknn/backend/lasagne/mlp.py +++ b/sknn/backend/lasagne/mlp.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import (absolute_import, unicode_literals, print_function) -__all__ = ['Regressor', 'Classifier', 'Layer', 'Convolution'] +__all__ = ['MultiLayerPerceptronBackend'] import os import sys @@ -158,7 +158,7 @@ def _create_mlp(self, X): for l, p, count in zip(self.layers, self.mlp, self.unit_counts[1:]): space = p.output_shape - if isinstance(l, Convolution): + if isinstance(l, Convolution): log.debug(" - Convl: {}{: <10}{} Output: {}{: <10}{} Channels: {}{}{}".format( ansi.BOLD, l.type, ansi.ENDC, ansi.BOLD, repr(space[2:]), ansi.ENDC, @@ -203,7 +203,11 @@ def _initialize_impl(self, X, y=None): random_state=self.random_state) self.valid_set = X_v, y_v - params = lasagne.layers.get_all_params(self.mlp[-1], trainable=True) + params = [] + for spec, mlp_layer in zip(self.layers, self.mlp): + if spec.frozen: continue + params.extend(mlp_layer.get_params()) + self.trainer = self._create_mlp_trainer(params) return X, y @@ -215,17 +219,17 @@ def _predict_impl(self, X): X = numpy.transpose(X, (0, 3, 1, 2)) return self.f(X) - def _iterate_data(self, X, y, batch_size): - + def _iterate_data(self, X, y, batch_size, shuffle=False): def cast(array): if type(array) != numpy.ndarray: array = array.todense() return array.astype(theano.config.floatX) - total_size = len(X) - assert len(X) == X.shape[0] + total_size = X.shape[0] indices = numpy.arange(total_size) - numpy.random.shuffle(indices) + if shuffle: + numpy.random.shuffle(indices) + for start_idx in range(0, total_size - batch_size + 1, batch_size): excerpt = indices[start_idx:start_idx + batch_size] Xb, yb = cast(X[excerpt]), cast(y[excerpt]) @@ -235,43 +239,19 @@ def cast(array): yield Xb, yb def _train_impl(self, X, y): - best_valid_error = float("inf") - stable = 0 - - for i in itertools.count(1): - start = time.time() - - loss, batches = 0.0, 0 - for Xb, yb in self._iterate_data(X, y, self.batch_size): - loss += self.trainer(Xb, yb) - batches += 1 - if math.isnan(loss): - raise RuntimeError("Training diverged and returned NaN at batch %i." % batches) - - avg_valid_error = loss / batches - best_valid_error = min(best_valid_error, avg_valid_error) - - best_valid = bool(best_valid_error == avg_valid_error) - log.debug("\r{:>5} {}{}{} {:>5.1f}s".format( - i, - ansi.GREEN if best_valid else "", - "{:>10.6f}".format(float(avg_valid_error)) if (avg_valid_error is not None) else " N/A ", - ansi.ENDC if best_valid else "", - time.time() - start - )) - if best_valid: - stable = 0 - else: - stable += 1 - - if stable >= self.n_stable: - log.debug("") - log.info("Early termination condition fired at %i iterations.", i) - break - if self.n_iter is not None and i >= self.n_iter: - log.debug("") - log.info("Terminating after specified %i total iterations.", i) - break + loss, batches = 0.0, 0 + for Xb, yb in self._iterate_data(X, y, self.batch_size, shuffle=True): + loss += self.trainer(Xb, yb) + batches += 1 + return loss / batches + + def _valid_impl(self, X, y): + loss, batches = 0.0, 0 + for Xb, yb in self._iterate_data(X, y, self.batch_size, shuffle=True): + ys = self.f(Xb) + loss += ((ys - yb) ** 2.0).mean() + batches += 1 + return loss / batches @property def is_initialized(self): diff --git a/sknn/backend/pylearn2/mlp.py b/sknn/backend/pylearn2/mlp.py index 1a9b8dc..fb4a94c 100644 --- a/sknn/backend/pylearn2/mlp.py +++ b/sknn/backend/pylearn2/mlp.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import (absolute_import, unicode_literals, print_function) -__all__ = ['Regressor', 'Classifier', 'Layer', 'Convolution'] +__all__ = ['MultiLayerPerceptronBackend'] import os import sys diff --git a/sknn/mlp.py b/sknn/mlp.py index 3b5e4b2..4b01d65 100644 --- a/sknn/mlp.py +++ b/sknn/mlp.py @@ -72,11 +72,11 @@ def _create_specs(self, X, y=None): assert l.kernel_shape is not None,\ "Layer `%s` requires parameter `kernel_shape` to be set." % (l.name,) if l.border_mode == 'valid': - res = (int((res[0] - l.kernel_shape[0]) / l.kernel_stride[0]) + 1, - int((res[1] - l.kernel_shape[1]) / l.kernel_stride[1]) + 1) + res = (int((res[0] - l.kernel_shape[0]) / l.pool_shape[0]) + 1, + int((res[1] - l.kernel_shape[1]) / l.pool_shape[1]) + 1) if l.border_mode == 'full': - res = (int((res[0] + l.kernel_shape[0]) / l.kernel_stride[0]) - 1, - int((res[1] + l.kernel_shape[1]) / l.kernel_stride[1]) - 1) + res = (int((res[0] + l.kernel_shape[0]) / l.pool_shape[0]) - 1, + int((res[1] + l.kernel_shape[1]) / l.pool_shape[1]) - 1) unit_count = numpy.prod(res) * l.channels else: unit_count = l.units @@ -117,6 +117,55 @@ def _reshape(self, X, y=None): if not self.is_convolution and X.ndim > 2: X = X.reshape((X.shape[0], numpy.product(X.shape[1:]))) return X, y + + def _train(self, X, y): + best_train_error, best_valid_error = float("inf"), float("inf") + stable = 0 + + for i in itertools.count(1): + start = time.time() + + avg_train_error = self._backend._train_impl(X, y) + best_train_error = min(best_train_error, avg_train_error) + best_train = bool(avg_train_error / best_train_error < (1.0 + self.f_stable)) + + if math.isnan(avg_train_error): + raise RuntimeError("Training diverged and returned NaN at batch %i." % batches) + + if self.valid_set is not None: + avg_valid_error = self._backend._valid_impl(*self.valid_set) + best_valid_error = min(best_valid_error, avg_valid_error) + best_valid = bool(avg_valid_error / best_valid_error < (1.0 + self.f_stable)) + else: + avg_valid_error = None + best_valid = True + + log.debug("\r{:>5} {}{}{} {}{}{} {:>5.1f}s".format( + i, + ansi.BLUE if best_train else "", + "{0:>10.3e}".format(float(avg_train_error)) if (avg_train_error is not None) else " N/A ", + ansi.ENDC if best_train else "", + + ansi.GREEN if best_valid else "", + "{:>10.3e}".format(float(avg_valid_error)) if (avg_valid_error is not None) else " N/A ", + ansi.ENDC if best_valid else "", + + time.time() - start + )) + + if best_valid: + stable = 0 + else: + stable += 1 + + if stable >= self.n_stable: + log.debug("") + log.info("Early termination condition fired at %i iterations.", i) + break + if self.n_iter is not None and i >= self.n_iter: + log.debug("") + log.info("Terminating after specified %i total iterations.", i) + break def _fit(self, X, y): assert X.shape[0] == y.shape[0],\ @@ -142,11 +191,11 @@ def _fit(self, X, y): log.debug(" - Early termination after {} stable iterations.".format(self.n_stable)) if self.verbose: - log.debug("\nEpoch Validation Error Time" - "\n-----------------------------------") + log.debug("\nEpoch Training Error Validation Error Time" + "\n------------------------------------------------------------") try: - self._backend._train_impl(X, y) + self._train(X, y) except RuntimeError as e: log.error("\n{}{}{}\n\n{}\n".format( ansi.RED, diff --git a/sknn/tests/test_output.py b/sknn/tests/test_output.py index 00ba4a1..110f4e7 100644 --- a/sknn/tests/test_output.py +++ b/sknn/tests/test_output.py @@ -3,6 +3,7 @@ import numpy +import sknn from sknn.mlp import Regressor as MLPR from sknn.mlp import Layer as L From 9cc3f256a968d1a1769e0b48381b579eab54f2e0 Mon Sep 17 00:00:00 2001 From: "Alex J. Champandard" Date: Tue, 17 Nov 2015 14:56:08 +0100 Subject: [PATCH 13/17] Fix for pylearn2 backend, porting to the new API. --- sknn/backend/pylearn2/mlp.py | 3 +++ sknn/backend/pylearn2/nn.py | 48 +++++++++++------------------------- sknn/mlp.py | 24 +++++++++--------- sknn/tests/test_training.py | 10 +++++--- 4 files changed, 36 insertions(+), 49 deletions(-) diff --git a/sknn/backend/pylearn2/mlp.py b/sknn/backend/pylearn2/mlp.py index fb4a94c..3407d2d 100644 --- a/sknn/backend/pylearn2/mlp.py +++ b/sknn/backend/pylearn2/mlp.py @@ -272,6 +272,9 @@ def _train_impl(self, X, y): self._train_layer(self.trainer, self.mlp, self.ds) + def _valid_impl(self, X, y): + self._valid_layer(self.mlp) + @property def is_initialized(self): """Check if the neural network was setup already. diff --git a/sknn/backend/pylearn2/nn.py b/sknn/backend/pylearn2/nn.py index 01210c0..dd4e9de 100644 --- a/sknn/backend/pylearn2/nn.py +++ b/sknn/backend/pylearn2/nn.py @@ -80,37 +80,17 @@ def _create_trainer(self, dataset, cost): def _train_layer(self, trainer, layer, dataset): # Bug in PyLearn2 that has some unicode channels, can't sort. layer.monitor.channels = {str(k): v for k, v in layer.monitor.channels.items()} - best_valid_error = float("inf") - - for i in itertools.count(1): - start = time.time() - trainer.train(dataset=dataset) - - layer.monitor.report_epoch() - layer.monitor() - - objective = layer.monitor.channels.get('objective', None) - if objective: - avg_valid_error = objective.val_shared.get_value() - best_valid_error = min(best_valid_error, avg_valid_error) - else: - # 'objective' channel is only defined with validation set. - avg_valid_error = None - - best_valid = bool(best_valid_error == avg_valid_error) - log.debug("{:>5} {}{}{} {:>5.1f}s".format( - i, - ansi.GREEN if best_valid else "", - "{:>10.6f}".format(float(avg_valid_error)) if (avg_valid_error is not None) else " N/A ", - ansi.ENDC if best_valid else "", - time.time() - start - )) - - if not trainer.continue_learning(layer): - log.debug("") - log.info("Early termination condition fired at %i iterations.", i) - break - if self.n_iter is not None and i >= self.n_iter: - log.debug("") - log.info("Terminating after specified %i total iterations.", i) - break + + trainer.train(dataset=dataset) + return None + + def _valid_layer(self, layer): + layer.monitor.report_epoch() + layer.monitor() + + objective = layer.monitor.channels.get('objective', None) + if objective: + return objective.val_shared.get_value() + else: + # 'objective' channel is only defined with validation set. + return None diff --git a/sknn/mlp.py b/sknn/mlp.py index 4b01d65..1e931dd 100644 --- a/sknn/mlp.py +++ b/sknn/mlp.py @@ -125,20 +125,22 @@ def _train(self, X, y): for i in itertools.count(1): start = time.time() + best_train = False avg_train_error = self._backend._train_impl(X, y) - best_train_error = min(best_train_error, avg_train_error) - best_train = bool(avg_train_error / best_train_error < (1.0 + self.f_stable)) - - if math.isnan(avg_train_error): - raise RuntimeError("Training diverged and returned NaN at batch %i." % batches) - + if avg_train_error is not None: + if math.isnan(avg_train_error): + raise RuntimeError("Training diverged and returned NaN at batch %i." % batches) + + best_train_error = min(best_train_error, avg_train_error) + best_train = bool(avg_train_error / best_train_error < (1.0 + self.f_stable)) + + best_valid = True + avg_valid_error = None if self.valid_set is not None: avg_valid_error = self._backend._valid_impl(*self.valid_set) - best_valid_error = min(best_valid_error, avg_valid_error) - best_valid = bool(avg_valid_error / best_valid_error < (1.0 + self.f_stable)) - else: - avg_valid_error = None - best_valid = True + if avg_valid_error is not None: + best_valid_error = min(best_valid_error, avg_valid_error) + best_valid = bool(avg_valid_error / best_valid_error < (1.0 + self.f_stable)) log.debug("\r{:>5} {}{}{} {}{}{} {:>5.1f}s".format( i, diff --git a/sknn/tests/test_training.py b/sknn/tests/test_training.py index ae62242..caf26a7 100644 --- a/sknn/tests/test_training.py +++ b/sknn/tests/test_training.py @@ -74,15 +74,17 @@ def test_VerboseRegressor(self): nn = MLPR(layers=[L("Linear")], verbose=1, n_iter=1) a_in, a_out = numpy.zeros((8,16)), numpy.zeros((8,4)) nn.fit(a_in, a_out) - assert_in("Epoch Validation Error Time", self.buf.getvalue()) - assert_in(" 1 N/A ", self.buf.getvalue()) + assert_in("Epoch Training Error Validation Error Time", self.buf.getvalue()) + assert_in(" 1 ", self.buf.getvalue()) + assert_in(" N/A ", self.buf.getvalue()) def test_VerboseClassifier(self): nn = MLPC(layers=[L("Linear")], verbose=1, n_iter=1) a_in, a_out = numpy.zeros((8,16)), numpy.zeros((8,1), dtype=numpy.int32) nn.fit(a_in, a_out) - assert_in("Epoch Validation Error Time", self.buf.getvalue()) - assert_in(" 1 N/A ", self.buf.getvalue()) + assert_in("Epoch Training Error Validation Error Time", self.buf.getvalue()) + assert_in(" 1 ", self.buf.getvalue()) + assert_in(" N/A ", self.buf.getvalue()) def test_CaughtRuntimeError(self): nn = MLPC(layers=[L("Linear")], learning_rate=float("nan"), n_iter=1) From 0d13b164784ae7f685a17c1407b89d09a81eb208 Mon Sep 17 00:00:00 2001 From: "Alex J. Champandard" Date: Tue, 17 Nov 2015 15:17:45 +0100 Subject: [PATCH 14/17] Fix for stability condition for tests to pass as before. --- sknn/mlp.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sknn/mlp.py b/sknn/mlp.py index 1e931dd..c33739a 100644 --- a/sknn/mlp.py +++ b/sknn/mlp.py @@ -132,15 +132,15 @@ def _train(self, X, y): raise RuntimeError("Training diverged and returned NaN at batch %i." % batches) best_train_error = min(best_train_error, avg_train_error) - best_train = bool(avg_train_error / best_train_error < (1.0 + self.f_stable)) + best_train = bool(avg_train_error <= best_train_error * (1.0 + self.f_stable)) - best_valid = True + best_valid = False avg_valid_error = None if self.valid_set is not None: avg_valid_error = self._backend._valid_impl(*self.valid_set) if avg_valid_error is not None: best_valid_error = min(best_valid_error, avg_valid_error) - best_valid = bool(avg_valid_error / best_valid_error < (1.0 + self.f_stable)) + best_valid = bool(avg_valid_error <= best_valid_error * (1.0 + self.f_stable)) log.debug("\r{:>5} {}{}{} {}{}{} {:>5.1f}s".format( i, From e1e9c1a4804c054353ebfc887a4d354d09fb4ae9 Mon Sep 17 00:00:00 2001 From: "Alex J. Champandard" Date: Tue, 17 Nov 2015 15:34:03 +0100 Subject: [PATCH 15/17] All tests pass but coverage is down as PyLearn2 backend is now lacking features. --- sknn/tests/test_types.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sknn/tests/test_types.py b/sknn/tests/test_types.py index a326c2f..fff99c3 100644 --- a/sknn/tests/test_types.py +++ b/sknn/tests/test_types.py @@ -120,7 +120,7 @@ def test_TrainRandomOneEpoch(self): assert_true(numpy.all(nn1._predict(X_s) == nn1._predict(X_s))) - def __test_TrainConstantOneEpoch(self): + def test_TrainConstantOneEpoch(self): for t in ['csr_matrix', 'csc_matrix']: sparse_matrix = getattr(scipy.sparse, t) X_s, y_s = sparse_matrix((8, 16), dtype=numpy.float32), sparse_matrix((8, 16), dtype=numpy.float32) From 9e561ffcdf55d60f9d0f837273b2449e25188164 Mon Sep 17 00:00:00 2001 From: "Alex J. Champandard" Date: Tue, 17 Nov 2015 19:56:19 +0100 Subject: [PATCH 16/17] Fix for all outstanding backend tests for Lasagne. --- sknn/backend/lasagne/mlp.py | 28 ++++++++++------------------ sknn/mlp.py | 6 +++--- sknn/tests/test_classifier.py | 4 ++-- sknn/tests/test_output.py | 6 ++++++ sknn/tests/test_rules.py | 1 + sknn/tests/test_sklearn.py | 14 ++++++++------ sknn/tests/test_training.py | 4 ++-- 7 files changed, 32 insertions(+), 31 deletions(-) diff --git a/sknn/backend/lasagne/mlp.py b/sknn/backend/lasagne/mlp.py index 42ba113..00adc71 100644 --- a/sknn/backend/lasagne/mlp.py +++ b/sknn/backend/lasagne/mlp.py @@ -51,30 +51,22 @@ def _create_mlp_trainer(self, params): assert len(layer_decay) == 0 or self.regularize in ('L1', 'L2', None) if len(layer_decay) > 0: - mlp_default_cost = self.mlp.get_default_cost() - if self.regularize == 'L1': - raise NotImplementedError - """ - l1 = mlp_cost.L1WeightDecay(layer_decay) - self.cost = cost.SumOfCosts([mlp_default_cost,l1]) - """ - else: # Default is 'L2'. - raise NotImplementedError - """ - if self.regularize is None: - self.regularize = 'L2' - - l2 = mlp_cost.WeightDecay(layer_decay) - self.cost = cost.SumOfCosts([mlp_default_cost,l2]) - """ + if self.regularize is None: + self.regularize = 'L2' + penalty = getattr(lasagne.regularization, self.regularize.lower()) + regularize = lasagne.regularization.apply_penalty + self.cost = sum(layer_decay[s.name] * regularize(l.get_params(tags={'regularizable': True}), penalty) + for s, l in zip(self.layers, self.mlp)) cost_functions = {'mse': 'squared_error', 'mcc': 'categorical_crossentropy'} loss_type = self.loss_type or ('mcc' if self.is_classifier else 'mse') assert loss_type in cost_functions,\ "Loss type `%s` not supported by Lasagne backend." % loss_type cost_fn = getattr(lasagne.objectives, cost_functions[loss_type]) - self.cost = cost_fn(self.symbol_output, self.tensor_output).mean() - return self._create_trainer(params, self.cost) + cost_eval = cost_fn(self.symbol_output, self.tensor_output).mean() + if self.cost is not None: + cost_eval = cost_eval * self.cost + return self._create_trainer(params, cost_eval) def _create_trainer(self, params, cost): if self.learning_rule in ('sgd', 'adagrad', 'adadelta', 'rmsprop', 'adam'): diff --git a/sknn/mlp.py b/sknn/mlp.py index c33739a..bbe9e66 100644 --- a/sknn/mlp.py +++ b/sknn/mlp.py @@ -129,10 +129,10 @@ def _train(self, X, y): avg_train_error = self._backend._train_impl(X, y) if avg_train_error is not None: if math.isnan(avg_train_error): - raise RuntimeError("Training diverged and returned NaN at batch %i." % batches) + raise RuntimeError("Training diverged and returned NaN.") best_train_error = min(best_train_error, avg_train_error) - best_train = bool(avg_train_error <= best_train_error * (1.0 + self.f_stable)) + best_train = bool(avg_train_error < best_train_error * (1.0 + self.f_stable)) best_valid = False avg_valid_error = None @@ -140,7 +140,7 @@ def _train(self, X, y): avg_valid_error = self._backend._valid_impl(*self.valid_set) if avg_valid_error is not None: best_valid_error = min(best_valid_error, avg_valid_error) - best_valid = bool(avg_valid_error <= best_valid_error * (1.0 + self.f_stable)) + best_valid = bool(avg_valid_error < best_valid_error * (1.0 + self.f_stable)) log.debug("\r{:>5} {}{}{} {}{}{} {:>5.1f}s".format( i, diff --git a/sknn/tests/test_classifier.py b/sknn/tests/test_classifier.py index 71f9ec8..4a95880 100644 --- a/sknn/tests/test_classifier.py +++ b/sknn/tests/test_classifier.py @@ -12,7 +12,7 @@ class TestClassifierFunctionality(unittest.TestCase): def setUp(self): - self.nn = MLPC(layers=[L("Linear")], n_iter=1) + self.nn = MLPC(layers=[L("Softmax")], n_iter=1) def test_FitAutoInitialize(self): a_in, a_out = numpy.zeros((8,16)), numpy.random.randint(0, 5, (8,)) @@ -69,7 +69,7 @@ def test_CalculateScore(self): class TestClassifierClone(TestClassifierFunctionality): def setUp(self): - cc = MLPC(layers=[L("Linear")], n_iter=1) + cc = MLPC(layers=[L("Sigmoid")], n_iter=1) self.nn = clone(cc) # This runs the same tests on the clone as for the original above. diff --git a/sknn/tests/test_output.py b/sknn/tests/test_output.py index 110f4e7..22fbc3b 100644 --- a/sknn/tests/test_output.py +++ b/sknn/tests/test_output.py @@ -28,6 +28,7 @@ class TestLossTypes(unittest.TestCase): def test_UnknownLossType(self): assert_raises(AssertionError, MLPR, layers=[], loss_type='unknown') + @unittest.skipIf(sknn.backend.name != 'pylearn2', 'only pylearn2') def test_MeanAverageErrorLinear(self): nn = MLPR(layers=[L("Linear")], loss_type='mae', n_iter=1) self._run(nn) @@ -35,6 +36,11 @@ def test_MeanAverageErrorLinear(self): def test_MeanSquaredErrorLinear(self): nn = MLPR(layers=[L("Linear")], loss_type='mse', n_iter=1) self._run(nn) + + @unittest.skipIf(sknn.backend.name != 'lasagne', 'only lasagne') + def test_MeanSquaredErrorLinear(self): + nn = MLPR(layers=[L("Softmax")], loss_type='mcc', n_iter=1) + self._run(nn) @unittest.skipIf(sknn.backend.name != 'pylearn2', 'only pylearn2') def test_MeanAverageErrorGaussian(self): diff --git a/sknn/tests/test_rules.py b/sknn/tests/test_rules.py index 2113e7d..2a3d59e 100644 --- a/sknn/tests/test_rules.py +++ b/sknn/tests/test_rules.py @@ -105,6 +105,7 @@ def test_DropoutPerLayer(self): self._run(nn) assert_in('Using `dropout` for regularization.', self.output.getvalue()) + @unittest.skipIf(sknn.backend.name != 'pylearn2', 'only pylearn2') def test_AutomaticDropout(self): nn = MLPR(layers=[L("Tanh", units=8, dropout=0.25), L("Linear")], n_iter=1) self._run(nn) diff --git a/sknn/tests/test_sklearn.py b/sknn/tests/test_sklearn.py index 402d908..9fcf105 100644 --- a/sknn/tests/test_sklearn.py +++ b/sknn/tests/test_sklearn.py @@ -14,6 +14,7 @@ class TestGridSearchRegressor(unittest.TestCase): __estimator__ = MLPR + __output__ = "Linear" def setUp(self): self.a_in = numpy.random.uniform(0.0, 1.0, (64,16)) @@ -21,33 +22,33 @@ def setUp(self): def test_GridGlobalParams(self): clf = GridSearchCV( - self.__estimator__(layers=[L("Linear")], n_iter=1), + self.__estimator__(layers=[L(self.__output__)], n_iter=1), param_grid={'learning_rate': [0.01, 0.001]}) clf.fit(self.a_in, self.a_out) def test_GridLayerParams(self): clf = GridSearchCV( - self.__estimator__(layers=[L("Rectifier", units=12), L("Linear")], n_iter=1), + self.__estimator__(layers=[L("Rectifier", units=12), L(self.__output__)], n_iter=1), param_grid={'hidden0__units': [4, 8, 12]}) clf.fit(self.a_in, self.a_out) def test_RandomGlobalParams(self): clf = RandomizedSearchCV( - self.__estimator__(layers=[L("Softmax")], n_iter=1), + self.__estimator__(layers=[L("Sigmoid")], n_iter=1), param_distributions={'learning_rate': uniform(0.001, 0.01)}, n_iter=2) clf.fit(self.a_in, self.a_out) def test_RandomLayerParams(self): clf = RandomizedSearchCV( - self.__estimator__(layers=[L("Softmax", units=12), L("Linear")], n_iter=1), + self.__estimator__(layers=[L("Rectifier", units=12), L(self.__output__)], n_iter=1), param_distributions={'hidden0__units': randint(4, 12)}, n_iter=2) clf.fit(self.a_in, self.a_out) def test_RandomMultipleJobs(self): clf = RandomizedSearchCV( - self.__estimator__(layers=[L("Softmax", units=12), L("Linear")], n_iter=1), + self.__estimator__(layers=[L("Sigmoid", units=12), L(self.__output__)], n_iter=1), param_distributions={'hidden0__units': randint(4, 12)}, n_iter=4, n_jobs=4) clf.fit(self.a_in, self.a_out) @@ -56,6 +57,7 @@ def test_RandomMultipleJobs(self): class TestGridSearchClassifier(TestGridSearchRegressor): __estimator__ = MLPC + __output__ = "Softmax" def setUp(self): self.a_in = numpy.random.uniform(0.0, 1.0, (64,16)) @@ -74,4 +76,4 @@ def test_Classifier(self): a_in = numpy.random.uniform(0.0, 1.0, (64,16)) a_out = numpy.random.randint(0, 4, (64,)) - cross_val_score(MLPC(layers=[L("Linear")], n_iter=1), a_in, a_out, cv=5) + cross_val_score(MLPC(layers=[L("Softmax")], n_iter=1), a_in, a_out, cv=5) diff --git a/sknn/tests/test_training.py b/sknn/tests/test_training.py index caf26a7..45c87f3 100644 --- a/sknn/tests/test_training.py +++ b/sknn/tests/test_training.py @@ -20,7 +20,7 @@ def test_FitTerminateStable(self): activation = "Gaussian" if sknn.backend.name == "pylearn2" else "Linear" self.nn = MLP( layers=[L(activation)], learning_rate=0.001, - n_iter=None, n_stable=1, f_stable=0.1, + n_iter=None, n_stable=1, f_stable=0.01, valid_set=(a_in, a_out)) self.nn._fit(a_in, a_out) @@ -79,7 +79,7 @@ def test_VerboseRegressor(self): assert_in(" N/A ", self.buf.getvalue()) def test_VerboseClassifier(self): - nn = MLPC(layers=[L("Linear")], verbose=1, n_iter=1) + nn = MLPC(layers=[L("Softmax")], verbose=1, n_iter=1) a_in, a_out = numpy.zeros((8,16)), numpy.zeros((8,1), dtype=numpy.int32) nn.fit(a_in, a_out) assert_in("Epoch Training Error Validation Error Time", self.buf.getvalue()) From b673aefd20b863f755ba046e7ae1f8f3269e2213 Mon Sep 17 00:00:00 2001 From: "Alex J. Champandard" Date: Tue, 17 Nov 2015 20:28:18 +0100 Subject: [PATCH 17/17] Re-enabled one missing test, reworked some code for coverage. --- sknn/backend/__init__.py | 2 +- sknn/backend/pylearn2/__init__.py | 12 ++++++++++++ sknn/backend/pylearn2/dataset.py | 16 +--------------- sknn/backend/pylearn2/mlp.py | 4 ++-- sknn/backend/pylearn2/nn.py | 9 +++------ sknn/tests/test_types.py | 4 ++-- 6 files changed, 21 insertions(+), 26 deletions(-) diff --git a/sknn/backend/__init__.py b/sknn/backend/__init__.py index 1a84f79..300dc0b 100644 --- a/sknn/backend/__init__.py +++ b/sknn/backend/__init__.py @@ -21,5 +21,5 @@ def __init__(self, _): # Automatically import the recommended backend if none was manually imported. def setup(): if name == None: - from . import lasagne + from . import pylearn2 assert name is not None, "No backend for module sknn was imported." diff --git a/sknn/backend/pylearn2/__init__.py b/sknn/backend/pylearn2/__init__.py index cc18126..c707789 100644 --- a/sknn/backend/pylearn2/__init__.py +++ b/sknn/backend/pylearn2/__init__.py @@ -1,6 +1,18 @@ # -*- coding: utf-8 -*- from __future__ import (absolute_import, unicode_literals, print_function) +from ...nn import ansi + + +import warnings +warnings.warn(ansi.YELLOW + """\n +The PyLearn2 backend is deprecated; the next release will switch to Lasagne by default. + +Test the change using the following at the top of your script: +> from sknn.backend import lasagne +""" + ansi.ENDC, category=UserWarning) + + from ... import backend from .mlp import MultiLayerPerceptronBackend from .ae import AutoEncoderBackend diff --git a/sknn/backend/pylearn2/dataset.py b/sknn/backend/pylearn2/dataset.py index d3bbf4c..2c52a2e 100644 --- a/sknn/backend/pylearn2/dataset.py +++ b/sknn/backend/pylearn2/dataset.py @@ -103,7 +103,7 @@ def _mutate_fn(self, array): array = self._conv_fn(array) if self.mutator is not None: for i in range(array.shape[0]): - self.mutator(array[i]) + array[i] = self.mutator(array[i]) return array @functools.wraps(dataset.Dataset.iterator) @@ -160,17 +160,3 @@ def iterator(self, **kwargs): if self.mutator is not None: bit._convert[0] = self._conv_fn return bit - -""" -OriginalDatasetIterator = iteration.FiniteDatasetIterator - -def create_finite_iterator(*args, **kwargs): - print('create_finite_iterator', kwargs['convert']) - def conv_fn(x): - return x + 0.01 - kwargs['convert'] = [conv_fn, None] - return OriginalDatasetIterator(*args, **kwargs) - # convert=convert) - -datasets.dense_design_matrix.FiniteDatasetIterator = create_finite_iterator -""" \ No newline at end of file diff --git a/sknn/backend/pylearn2/mlp.py b/sknn/backend/pylearn2/mlp.py index 3407d2d..18f3b9f 100644 --- a/sknn/backend/pylearn2/mlp.py +++ b/sknn/backend/pylearn2/mlp.py @@ -270,10 +270,10 @@ def _train_impl(self, X, y): X = self.ds.view_converter.topo_view_to_design_mat(X) self.ds.X, self.ds.y = X, y - self._train_layer(self.trainer, self.mlp, self.ds) + return self._train_layer(self.trainer, self.mlp, self.ds) def _valid_impl(self, X, y): - self._valid_layer(self.mlp) + return self._valid_layer(self.mlp) @property def is_initialized(self): diff --git a/sknn/backend/pylearn2/nn.py b/sknn/backend/pylearn2/nn.py index dd4e9de..2542d5b 100644 --- a/sknn/backend/pylearn2/nn.py +++ b/sknn/backend/pylearn2/nn.py @@ -87,10 +87,7 @@ def _train_layer(self, trainer, layer, dataset): def _valid_layer(self, layer): layer.monitor.report_epoch() layer.monitor() - + + # 'objective' channel is only defined with validation set. objective = layer.monitor.channels.get('objective', None) - if objective: - return objective.val_shared.get_value() - else: - # 'objective' channel is only defined with validation set. - return None + return objective.val_shared.get_value() if objective else None diff --git a/sknn/tests/test_types.py b/sknn/tests/test_types.py index fff99c3..77da0ea 100644 --- a/sknn/tests/test_types.py +++ b/sknn/tests/test_types.py @@ -41,10 +41,10 @@ def test_FitHybrid(self): y = numpy.zeros((8, 4), dtype=numpy.float32) self.nn._fit(X, y) - def __test_FitMutator(self): + def test_FitMutator(self): def mutate(x): - x -= 0.5 self.count += 1 + return x - 0.5 self.nn.mutator = mutate for t in SPARSE_TYPES: