Skip to content
This repository has been archived by the owner on Jul 10, 2021. It is now read-only.

Commit

Permalink
Merge pull request #135 from aigamedev/cost
Browse files Browse the repository at this point in the history
Dataset Masking: Per-Sample Training Weight
  • Loading branch information
alexjc committed Nov 22, 2015
2 parents e1c4f4b + 8593537 commit 666a68e
Show file tree
Hide file tree
Showing 3 changed files with 128 additions and 35 deletions.
52 changes: 30 additions & 22 deletions sknn/backend/lasagne/mlp.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def __init__(self, spec):
self.f = None
self.trainer = None
self.validator = None
self.cost = None
self.regularizer = None

def _create_mlp_trainer(self, params):
# Aggregate all regularization parameters into common dictionaries.
Expand All @@ -56,17 +56,19 @@ def _create_mlp_trainer(self, params):
self.regularize = 'L2'
penalty = getattr(lasagne.regularization, self.regularize.lower())
regularize = lasagne.regularization.apply_penalty
self.cost = sum(layer_decay[s.name] * regularize(l.get_params(tags={'regularizable': True}), penalty)
for s, l in zip(self.layers, self.mlp))
self.regularizer = sum(layer_decay[s.name] * regularize(l.get_params(tags={'regularizable': True}), penalty)
for s, l in zip(self.layers, self.mlp))

cost_functions = {'mse': 'squared_error', 'mcc': 'categorical_crossentropy'}
loss_type = self.loss_type or ('mcc' if self.is_classifier else 'mse')
assert loss_type in cost_functions,\
"Loss type `%s` not supported by Lasagne backend." % loss_type
self.cost_function = getattr(lasagne.objectives, cost_functions[loss_type])
cost_symbol = self.cost_function(self.network_output, self.data_output).mean()
if self.cost is not None:
cost_symbol = cost_symbol + self.cost
cost_symbol = self.cost_function(self.network_output, self.data_output)
cost_symbol = lasagne.objectives.aggregate(cost_symbol.T, self.data_mask, mode='mean')

if self.regularizer is not None:
cost_symbol = cost_symbol + self.regularizer
return self._create_trainer(params, cost_symbol)

def _create_trainer(self, params, cost):
Expand All @@ -81,8 +83,9 @@ def _create_trainer(self, params, cost):
raise NotImplementedError(
"Learning rule type `%s` is not supported." % self.learning_rule)

trainer = theano.function([self.data_input, self.data_output], cost,
trainer = theano.function([self.data_input, self.data_output, self.data_mask], cost,
updates=self._learning_rule,
on_unused_input='ignore',
allow_input_downcast=True)

compare = self.cost_function(self.network_output, self.data_correct).mean()
Expand Down Expand Up @@ -135,9 +138,10 @@ def _create_layer(self, name, layer, network):
num_units=layer.units,
nonlinearity=self._get_activation(layer))

def _create_mlp(self, X):
def _create_mlp(self, X, w=None):
self.data_input = T.tensor4('X') if self.is_convolution else T.matrix('X')
self.data_output = T.matrix('y')
self.data_mask = T.vector('m') if w is not None else T.scalar('m')
self.data_correct = T.matrix('yp')

lasagne.random.get_rng().seed(self.random_state)
Expand Down Expand Up @@ -183,12 +187,12 @@ def _create_mlp(self, X):
self.network_output = lasagne.layers.get_output(network, deterministic=True)
self.f = theano.function([self.data_input], self.network_output, allow_input_downcast=True)

def _initialize_impl(self, X, y=None):
def _initialize_impl(self, X, y=None, w=None):
if self.is_convolution:
X = numpy.transpose(X, (0, 3, 1, 2))

if self.mlp is None:
self._create_mlp(X)
self._create_mlp(X, w)

# Can do partial initialization when predicting, no trainer needed.
if y is None:
Expand Down Expand Up @@ -220,7 +224,7 @@ def _predict_impl(self, X):
X = numpy.transpose(X, (0, 3, 1, 2))
return self.f(X)

def _iterate_data(self, X, y, batch_size, shuffle=False):
def _iterate_data(self, batch_size, X, y, w, shuffle=False):
def cast(array):
if type(array) != numpy.ndarray:
array = array.todense()
Expand All @@ -233,22 +237,26 @@ def cast(array):

for start_idx in range(0, total_size - batch_size + 1, batch_size):
excerpt = indices[start_idx:start_idx + batch_size]
Xb, yb = cast(X[excerpt]), cast(y[excerpt])

yield Xb, yb
Xb, yb, wb = cast(X[excerpt]), cast(y[excerpt]), None
if w is not None:
wb = cast(w[excerpt])
yield Xb, yb, wb

def _print(self, text):
if self.verbose:
sys.stdout.write(text)
sys.stdout.flush()

def _batch_impl(self, X, y, processor, mode, output, shuffle):
def _batch_impl(self, X, y, w, processor, mode, output, shuffle):
progress, batches = 0, X.shape[0] / self.batch_size
loss, count = 0.0, 0
for Xb, yb in self._iterate_data(X, y, self.batch_size, shuffle):
for Xb, yb, wb in self._iterate_data(self.batch_size, X, y, w, shuffle):
self._do_callback('on_batch_start', locals())

loss += processor(Xb, yb)

if mode == 'train':
loss += processor(Xb, yb, wb if wb is not None else 1.0)
else:
loss += processor(Xb, yb)
count += 1

while count / batches > progress / 60:
Expand All @@ -260,11 +268,11 @@ def _batch_impl(self, X, y, processor, mode, output, shuffle):
self._print('\r')
return loss / count

def _train_impl(self, X, y):
return self._batch_impl(X, y, self.trainer, mode='train', output='.', shuffle=True)
def _train_impl(self, X, y, w=None):
return self._batch_impl(X, y, w, self.trainer, mode='train', output='.', shuffle=True)

def _valid_impl(self, X, y):
return self._batch_impl(X, y, self.validator, mode='valid', output=' ', shuffle=False)
def _valid_impl(self, X, y, w=None):
return self._batch_impl(X, y, w, self.validator, mode='valid', output=' ', shuffle=False)

@property
def is_initialized(self):
Expand Down
22 changes: 11 additions & 11 deletions sknn/mlp.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,14 @@ class MultiLayerPerceptron(NeuralNetwork, sklearn.base.BaseEstimator):
def _setup(self):
pass

def _initialize(self, X, y=None):
def _initialize(self, X, y=None, w=None):
assert not self.is_initialized,\
"This neural network has already been initialized."
self._create_specs(X, y)

backend.setup()
self._backend = backend.MultiLayerPerceptronBackend(self)
return self._backend._initialize_impl(X, y)
return self._backend._initialize_impl(X, y, w)

def _check_layer(self, layer, required, optional=[]):
required.extend(['name', 'type'])
Expand Down Expand Up @@ -129,7 +129,7 @@ def _do_callback(self, event, variables):
else:
return self.callback(event, **variables)

def _train(self, X, y):
def _train(self, X, y, w=None):
assert self.n_iter or self.n_stable,\
"Neither n_iter nor n_stable were specified; training would loop forever."

Expand All @@ -143,7 +143,7 @@ def _train(self, X, y):
self._do_callback('on_epoch_start', locals())

is_best_train = False
avg_train_error = self._backend._train_impl(X, y)
avg_train_error = self._backend._train_impl(X, y, w)
if avg_train_error is not None:
if math.isnan(avg_train_error):
raise RuntimeError("Training diverged and returned NaN.")
Expand Down Expand Up @@ -196,14 +196,14 @@ def _train(self, X, y):
self._do_callback('on_train_finish', locals())
self._backend._array_to_mlp(best_params, self._backend.mlp)

def _fit(self, X, y):
def _fit(self, X, y, w=None):
assert X.shape[0] == y.shape[0],\
"Expecting same number of input and output samples."
data_shape, data_size = X.shape, X.size+y.size
X, y = self._reshape(X, y)

if not self.is_initialized:
X, y = self._initialize(X, y)
X, y = self._initialize(X, y, w)

log.info("Training on dataset of {:,} samples with {:,} total size.".format(data_shape[0], data_size))
if data_shape[1:] != X.shape[1:]:
Expand All @@ -224,7 +224,7 @@ def _fit(self, X, y):
"\n------------------------------------------------------------")

try:
self._train(X, y)
self._train(X, y, w)
except RuntimeError as e:
log.error("\n{}{}{}\n\n{}\n".format(
ansi.RED,
Expand Down Expand Up @@ -262,7 +262,7 @@ class Regressor(MultiLayerPerceptron, sklearn.base.RegressorMixin):
# Regressor compatible with sklearn that wraps various NN implementations.
# The constructor and bulk of documentation is inherited from MultiLayerPerceptron.

def fit(self, X, y):
def fit(self, X, y, w=None):
"""Fit the neural network to the given continuous data as a regression problem.
Parameters
Expand All @@ -283,7 +283,7 @@ def fit(self, X, y):
if self.valid_set is not None:
self.valid_set = self._reshape(*self.valid_set)

return super(Regressor, self)._fit(X, y)
return super(Regressor, self)._fit(X, y, w)

def predict(self, X):
"""Calculate predictions for specified inputs.
Expand Down Expand Up @@ -322,7 +322,7 @@ def _setup(self):
import sklearn.preprocessing.label as spl
spl.type_of_target = lambda _: "multiclass"

def fit(self, X, y):
def fit(self, X, y, w=None):
"""Fit the neural network to symbolic labels as a classification problem.
Parameters
Expand Down Expand Up @@ -369,7 +369,7 @@ def fit(self, X, y):
self.valid_set = (X_v, y_vp)

# Now train based on a problem transformed into regression.
return super(Classifier, self)._fit(X, yp)
return super(Classifier, self)._fit(X, yp, w)

def partial_fit(self, X, y, classes=None):
if y.ndim == 1:
Expand Down
89 changes: 87 additions & 2 deletions sknn/tests/test_data.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import random
import unittest
from nose.tools import (assert_in, assert_raises, assert_equals, assert_true)
from nose.tools import (assert_greater, assert_less, assert_raises, assert_equals, assert_true)

import logging

import numpy
from sknn.mlp import Regressor as MLPR
from sknn.mlp import Regressor as MLPR, Classifier as MLPC
from sknn.mlp import Layer as L, Convolution as C


Expand Down Expand Up @@ -88,3 +89,87 @@ def test_SetLayerParamsDict(self):
p = nn.get_parameters()
assert_true((p[1].weights.astype('float32') == weights.astype('float32')).all())
assert_true((p[1].biases.astype('float32') == biases.astype('float32')).all())


class TestMaskedDataRegression(unittest.TestCase):

def check(self, a_in, a_out, a_mask):
nn = MLPR(layers=[L("Linear")], learning_rule='adam', n_iter=50)
nn.fit(a_in, a_out, a_mask)
v_out = nn.predict(a_in)

# Make sure the examples weighted 1.0 have low error, 0.0 high error.
print(abs(a_out - v_out).T * a_mask)
assert_true((abs(a_out - v_out).T * a_mask < 5E-2).all())
assert_true((abs(a_out - v_out).T * (1.0 - a_mask) > 5E-1).any())

def test_SingleOutputOne(self):
a_in = numpy.random.uniform(-1.0, +1.0, (8,16))
a_out = numpy.random.randint(2, size=(8,1)).astype(numpy.float32)
a_mask = (0.0 + a_out).flatten()

self.check(a_in, a_out, a_mask)

def test_SingleOutputZero(self):
a_in = numpy.random.uniform(-1.0, +1.0, (8,16))
a_out = numpy.random.randint(2, size=(8,1)).astype(numpy.float32)
a_mask = (1.0 - a_out).flatten()

self.check(a_in, a_out, a_mask)

def test_SingleOutputNegative(self):
a_in = numpy.random.uniform(-1.0, +1.0, (8,16))
a_out = numpy.random.randint(2, size=(8,1)).astype(numpy.float32)
a_mask = (0.0 + a_out).flatten()
a_out = -1.0 * 2.0 + a_out

self.check(a_in, a_out, a_mask)

def test_MultipleOutputRandom(self):
a_in = numpy.random.uniform(-1.0, +1.0, (8,16))
a_out = numpy.random.randint(2, size=(8,4)).astype(numpy.float32)
a_mask = numpy.random.randint(2, size=(8,)).astype(numpy.float32)

self.check(a_in, a_out, a_mask)


class TestMaskedDataClassification(unittest.TestCase):

def check(self, a_in, a_out, a_mask, act='Softmax'):
nn = MLPC(layers=[L(act)], learning_rule='rmsprop', n_iter=100)
nn.fit(a_in, a_out, a_mask)
print(nn.classes_)
return nn.predict_proba(a_in)

def test_TwoLabelsOne(self):
# Only one sample has the value 1 with weight 1.0, but all 0s are weighted 0.0.
a_in = numpy.random.uniform(-1.0, +1.0, (16,4))
a_out = numpy.zeros((16,1), dtype=numpy.int32)
a_out[0] = 1
a_mask = (0.0 + a_out).flatten()

a_test = self.check(a_in, a_out, a_mask).mean(axis=0)
assert_greater(a_test[1], a_test[0] * 1.5)

def test_TwoLabelsZero(self):
# Only one sample has the value 0 with weight 1.0, but all 1s are weighted 0.0.
a_in = numpy.random.uniform(-1.0, +1.0, (16,4))
a_out = numpy.ones((16,1), dtype=numpy.int32)
a_out[-1] = 0
a_mask = (1.0 - a_out).flatten()

a_test = self.check(a_in, a_out, a_mask).mean(axis=0)
assert_greater(a_test[0], a_test[1] * 1.5)

def test_FourLabels(self):
# Only multi-label sample has weight 1.0, the others have weight 0.0. Check probabilities!
chosen = random.randint(0,16)
a_in = numpy.random.uniform(-1.0, +1.0, (16,4))
a_out = numpy.random.randint(2, size=(16,4))
a_mask = numpy.zeros((16,), dtype=numpy.int32)
a_mask[chosen] = 1.0

a_test = self.check(a_in, a_out, a_mask, act="Sigmoid").mean(axis=0)
for i in range(a_out.shape[1]):
compare = assert_greater if a_out[chosen][i]==0 else assert_less
compare(a_test[i*2], a_test[i*2+1])

0 comments on commit 666a68e

Please sign in to comment.