From 5ae453e99bdd27a6bd9390d57cb418f2a8360f15 Mon Sep 17 00:00:00 2001
From: "Alex J. Champandard" <alexjc@aigamedev.com>
Date: Sun, 22 Nov 2015 08:12:30 +0100
Subject: [PATCH 1/6] Another experiment with per-sample training costs, but
 this doesn't seem to work on MNIST. All or nothing.

---
 sknn/backend/lasagne/mlp.py | 48 ++++++++++++++++++++++---------------
 sknn/mlp.py                 | 16 ++++++-------
 2 files changed, 37 insertions(+), 27 deletions(-)

diff --git a/sknn/backend/lasagne/mlp.py b/sknn/backend/lasagne/mlp.py
index c9b801d..dc69be6 100644
--- a/sknn/backend/lasagne/mlp.py
+++ b/sknn/backend/lasagne/mlp.py
@@ -40,7 +40,7 @@ def __init__(self, spec):
         self.f = None
         self.trainer = None
         self.validator = None
-        self.cost = None
+        self.regularizer = None
 
     def _create_mlp_trainer(self, params):
         # Aggregate all regularization parameters into common dictionaries.
@@ -56,17 +56,21 @@ def _create_mlp_trainer(self, params):
                 self.regularize = 'L2'
             penalty = getattr(lasagne.regularization, self.regularize.lower())
             regularize = lasagne.regularization.apply_penalty
-            self.cost = sum(layer_decay[s.name] * regularize(l.get_params(tags={'regularizable': True}), penalty)
-                                for s, l in zip(self.layers, self.mlp))
+            self.regularizer = sum(layer_decay[s.name] * regularize(l.get_params(tags={'regularizable': True}), penalty)
+                                   for s, l in zip(self.layers, self.mlp))
 
         cost_functions = {'mse': 'squared_error', 'mcc': 'categorical_crossentropy'}
         loss_type = self.loss_type or ('mcc' if self.is_classifier else 'mse')
         assert loss_type in cost_functions,\
                     "Loss type `%s` not supported by Lasagne backend." % loss_type
         self.cost_function = getattr(lasagne.objectives, cost_functions[loss_type])
-        cost_symbol = self.cost_function(self.network_output, self.data_output).mean()
-        if self.cost is not None:
-            cost_symbol = cost_symbol + self.cost
+        cost_symbol = self.cost_function(self.network_output, self.data_output)
+        if True:
+            cost_symbol = self.data_weight.T * cost_symbol
+
+        cost_symbol = cost_symbol.mean()
+        if self.regularizer is not None:
+            cost_symbol = cost_symbol + self.regularizer
         return self._create_trainer(params, cost_symbol)
 
     def _create_trainer(self, params, cost):
@@ -81,8 +85,9 @@ def _create_trainer(self, params, cost):
             raise NotImplementedError(
                 "Learning rule type `%s` is not supported." % self.learning_rule)
 
-        trainer = theano.function([self.data_input, self.data_output], cost,
+        trainer = theano.function([self.data_input, self.data_output, self.data_weight], cost,
                                    updates=self._learning_rule,
+                                   on_unused_input='ignore',
                                    allow_input_downcast=True)
 
         compare = self.cost_function(self.network_output, self.data_correct).mean()
@@ -138,6 +143,7 @@ def _create_layer(self, name, layer, network):
     def _create_mlp(self, X):
         self.data_input = T.tensor4('X') if self.is_convolution else T.matrix('X')
         self.data_output = T.matrix('y')
+        self.data_weight = T.matrix('s')
         self.data_correct = T.matrix('yp')
         
         lasagne.random.get_rng().seed(self.random_state)
@@ -220,7 +226,7 @@ def _predict_impl(self, X):
             X = numpy.transpose(X, (0, 3, 1, 2))
         return self.f(X)
     
-    def _iterate_data(self, X, y, batch_size, shuffle=False):
+    def _iterate_data(self, batch_size, X, y, w, shuffle=False):
         def cast(array):
             if type(array) != numpy.ndarray:
                 array = array.todense()
@@ -233,22 +239,26 @@ def cast(array):
 
         for start_idx in range(0, total_size - batch_size + 1, batch_size):
             excerpt = indices[start_idx:start_idx + batch_size]
-            Xb, yb = cast(X[excerpt]), cast(y[excerpt])
-
-            yield Xb, yb
+            Xb, yb, wb = cast(X[excerpt]), cast(y[excerpt]), None
+            if w is not None:
+                wb = cast(w[excerpt])
+            yield Xb, yb, wb
 
     def _print(self, text):
         if self.verbose:
             sys.stdout.write(text)
             sys.stdout.flush()
 
-    def _batch_impl(self, X, y, processor, mode, output, shuffle):
+    def _batch_impl(self, X, y, w, processor, mode, output, shuffle):
         progress, batches = 0, X.shape[0] / self.batch_size
         loss, count = 0.0, 0
-        for Xb, yb in self._iterate_data(X, y, self.batch_size, shuffle):
+        for Xb, yb, wb in self._iterate_data(self.batch_size, X, y, w, shuffle):
             self._do_callback('on_batch_start', locals())
-
-            loss += processor(Xb, yb)
+            
+            if mode == 'train':
+                loss += processor(Xb, yb, wb)
+            else:
+                loss += processor(Xb, yb)
             count += 1
 
             while count / batches > progress / 60:
@@ -260,11 +270,11 @@ def _batch_impl(self, X, y, processor, mode, output, shuffle):
         self._print('\r')
         return loss / count
         
-    def _train_impl(self, X, y):
-        return self._batch_impl(X, y, self.trainer, mode='train', output='.', shuffle=True)
+    def _train_impl(self, X, y, w=None):
+        return self._batch_impl(X, y, w, self.trainer, mode='train', output='.', shuffle=True)
 
-    def _valid_impl(self, X, y):
-        return self._batch_impl(X, y, self.validator, mode='valid', output=' ', shuffle=False)
+    def _valid_impl(self, X, y, w=None):
+        return self._batch_impl(X, y, w, self.validator, mode='valid', output=' ', shuffle=False)
 
     @property
     def is_initialized(self):
diff --git a/sknn/mlp.py b/sknn/mlp.py
index 64da4c8..90505cb 100644
--- a/sknn/mlp.py
+++ b/sknn/mlp.py
@@ -129,7 +129,7 @@ def _do_callback(self, event, variables):
         else:
             return self.callback(event, **variables)
 
-    def _train(self, X, y):
+    def _train(self, X, y, w=None):
         assert self.n_iter or self.n_stable,\
             "Neither n_iter nor n_stable were specified; training would loop forever."
 
@@ -143,7 +143,7 @@ def _train(self, X, y):
             self._do_callback('on_epoch_start', locals())
 
             is_best_train = False
-            avg_train_error = self._backend._train_impl(X, y)
+            avg_train_error = self._backend._train_impl(X, y, w)
             if avg_train_error is not None:
                 if math.isnan(avg_train_error):
                     raise RuntimeError("Training diverged and returned NaN.")
@@ -196,7 +196,7 @@ def _train(self, X, y):
         self._do_callback('on_train_finish', locals())
         self._backend._array_to_mlp(best_params, self._backend.mlp)
 
-    def _fit(self, X, y):
+    def _fit(self, X, y, w=None):
         assert X.shape[0] == y.shape[0],\
             "Expecting same number of input and output samples."
         data_shape, data_size = X.shape, X.size+y.size
@@ -224,7 +224,7 @@ def _fit(self, X, y):
                       "\n------------------------------------------------------------")
 
         try:
-            self._train(X, y)
+            self._train(X, y, w)
         except RuntimeError as e:
             log.error("\n{}{}{}\n\n{}\n".format(
                 ansi.RED,
@@ -262,7 +262,7 @@ class Regressor(MultiLayerPerceptron, sklearn.base.RegressorMixin):
     # Regressor compatible with sklearn that wraps various NN implementations.
     # The constructor and bulk of documentation is inherited from MultiLayerPerceptron.
 
-    def fit(self, X, y):
+    def fit(self, X, y, w=None):
         """Fit the neural network to the given continuous data as a regression problem.
 
         Parameters
@@ -283,7 +283,7 @@ def fit(self, X, y):
         if self.valid_set is not None:
             self.valid_set = self._reshape(*self.valid_set)
 
-        return super(Regressor, self)._fit(X, y)
+        return super(Regressor, self)._fit(X, y, w)
 
     def predict(self, X):
         """Calculate predictions for specified inputs.
@@ -322,7 +322,7 @@ def _setup(self):
         import sklearn.preprocessing.label as spl
         spl.type_of_target = lambda _: "multiclass"
 
-    def fit(self, X, y):
+    def fit(self, X, y, w=None):
         """Fit the neural network to symbolic labels as a classification problem.
 
         Parameters
@@ -369,7 +369,7 @@ def fit(self, X, y):
             self.valid_set = (X_v, y_vp)
  
         # Now train based on a problem transformed into regression.
-        return super(Classifier, self)._fit(X, yp)
+        return super(Classifier, self)._fit(X, yp, w)
 
     def partial_fit(self, X, y, classes=None):
         if y.ndim == 1:

From b674885048a1463645149d83130af9baf8281f54 Mon Sep 17 00:00:00 2001
From: "Alex J. Champandard" <alexjc@aigamedev.com>
Date: Sun, 22 Nov 2015 13:17:53 +0100
Subject: [PATCH 2/6] Tests for weighted examples using regressor, that works
 quite well apparently!

---
 sknn/backend/lasagne/mlp.py |  8 ++---
 sknn/tests/test_data.py     | 61 +++++++++++++++++++++++++++++++++++++
 2 files changed, 64 insertions(+), 5 deletions(-)

diff --git a/sknn/backend/lasagne/mlp.py b/sknn/backend/lasagne/mlp.py
index dc69be6..3326f44 100644
--- a/sknn/backend/lasagne/mlp.py
+++ b/sknn/backend/lasagne/mlp.py
@@ -65,10 +65,8 @@ def _create_mlp_trainer(self, params):
                     "Loss type `%s` not supported by Lasagne backend." % loss_type
         self.cost_function = getattr(lasagne.objectives, cost_functions[loss_type])
         cost_symbol = self.cost_function(self.network_output, self.data_output)
-        if True:
-            cost_symbol = self.data_weight.T * cost_symbol
+        cost_symbol = lasagne.objectives.aggregate(cost_symbol.T, self.data_mask, mode='mean')
 
-        cost_symbol = cost_symbol.mean()
         if self.regularizer is not None:
             cost_symbol = cost_symbol + self.regularizer
         return self._create_trainer(params, cost_symbol)
@@ -85,7 +83,7 @@ def _create_trainer(self, params, cost):
             raise NotImplementedError(
                 "Learning rule type `%s` is not supported." % self.learning_rule)
 
-        trainer = theano.function([self.data_input, self.data_output, self.data_weight], cost,
+        trainer = theano.function([self.data_input, self.data_output, self.data_mask], cost,
                                    updates=self._learning_rule,
                                    on_unused_input='ignore',
                                    allow_input_downcast=True)
@@ -143,7 +141,7 @@ def _create_layer(self, name, layer, network):
     def _create_mlp(self, X):
         self.data_input = T.tensor4('X') if self.is_convolution else T.matrix('X')
         self.data_output = T.matrix('y')
-        self.data_weight = T.matrix('s')
+        self.data_mask = T.vector('m')
         self.data_correct = T.matrix('yp')
         
         lasagne.random.get_rng().seed(self.random_state)
diff --git a/sknn/tests/test_data.py b/sknn/tests/test_data.py
index defa4fe..ed1d5b2 100644
--- a/sknn/tests/test_data.py
+++ b/sknn/tests/test_data.py
@@ -1,3 +1,4 @@
+import random
 import unittest
 from nose.tools import (assert_in, assert_raises, assert_equals, assert_true)
 
@@ -88,3 +89,63 @@ def test_SetLayerParamsDict(self):
         p = nn.get_parameters()
         assert_true((p[1].weights.astype('float32') == weights.astype('float32')).all())
         assert_true((p[1].biases.astype('float32') == biases.astype('float32')).all())
+
+
+class TestMaskedDataset(unittest.TestCase):
+
+    def test_SingleOutputOne(self):
+        nn = MLPR(layers=[L("Linear")], learning_rule='adam', n_iter=50)
+        a_in, a_out, a_mask = numpy.random.uniform(-1.0, +1.0, (8,16)), numpy.zeros((8,1)), numpy.ones((8,))
+        for i in range(8):
+            if random.choice([True, False]):
+                a_out[i] = 1.0
+                a_mask[i] = 1.0
+            else:
+                a_out[i] = 0.0
+                a_mask[i] = 0.0
+
+        nn.fit(a_in, a_out, a_mask)
+        v_out = nn.predict(a_in)
+        
+        # Make sure the examples weighted 1.0 have low error, 0.0 high error.
+        print(abs(a_out - v_out).T[0] * (1.0 - a_mask))
+        assert_true((abs(a_out - v_out).T[0] * a_mask < 5E-2).all())
+        assert_true((abs(a_out - v_out).T[0] * (1.0 - a_mask) > 5E-1).any())
+
+    def test_SingleOutputZero(self):
+        nn = MLPR(layers=[L("Linear")], learning_rule='adam', n_iter=50)
+        a_in, a_out, a_mask = numpy.random.uniform(-1.0, +1.0, (8,16)), numpy.zeros((8,1)), numpy.ones((8,))
+        for i in range(8):
+            if random.choice([True, False]):
+                a_out[i] = 1.0
+                a_mask[i] = 0.0
+            else:
+                a_out[i] = 0.0
+                a_mask[i] = 1.0
+
+        nn.fit(a_in, a_out, a_mask)
+        v_out = nn.predict(a_in)
+        
+        # Make sure the examples weighted 1.0 have low error, 0.0 high error.
+        print(abs(a_out - v_out).T[0] * (1.0 - a_mask))
+        assert_true((abs(a_out - v_out).T[0] * a_mask < 5E-2).all())
+        assert_true((abs(a_out - v_out).T[0] * (1.0 - a_mask) > 5E-1).any())
+
+    def test_SingleOutputNegative(self):
+        nn = MLPR(layers=[L("Linear")], learning_rule='adam', n_iter=50)
+        a_in, a_out, a_mask = numpy.random.uniform(-1.0, +1.0, (8,16)), numpy.zeros((8,1)), numpy.ones((8,))
+        for i in range(8):
+            if random.choice([True, False]):
+                a_out[i] = -1.0
+                a_mask[i] = 1.0
+            else:
+                a_out[i] = 0.0
+                a_mask[i] = 0.0
+
+        nn.fit(a_in, a_out, a_mask)
+        v_out = nn.predict(a_in)
+        
+        # Make sure the examples weighted 1.0 have low error, 0.0 high error.
+        print(abs(a_out - v_out).T[0] * (1.0 - a_mask))
+        assert_true((abs(a_out - v_out).T[0] * (1.0 - a_mask) > 5E-1).any())
+        assert_true((abs(a_out - v_out).T[0] * a_mask < 5E-2).all())
\ No newline at end of file

From a5e2b10045cc6d75f1c823593e53c2ee6f136a54 Mon Sep 17 00:00:00 2001
From: "Alex J. Champandard" <alexjc@aigamedev.com>
Date: Sun, 22 Nov 2015 15:18:31 +0100
Subject: [PATCH 3/6] Improved the test quality and added multi-output
 regressor tests.

---
 sknn/tests/test_data.py | 74 ++++++++++++++++-------------------------
 1 file changed, 28 insertions(+), 46 deletions(-)

diff --git a/sknn/tests/test_data.py b/sknn/tests/test_data.py
index ed1d5b2..f6ef02b 100644
--- a/sknn/tests/test_data.py
+++ b/sknn/tests/test_data.py
@@ -93,59 +93,41 @@ def test_SetLayerParamsDict(self):
 
 class TestMaskedDataset(unittest.TestCase):
 
-    def test_SingleOutputOne(self):
+    def check(self, a_in, a_out, a_mask):
         nn = MLPR(layers=[L("Linear")], learning_rule='adam', n_iter=50)
-        a_in, a_out, a_mask = numpy.random.uniform(-1.0, +1.0, (8,16)), numpy.zeros((8,1)), numpy.ones((8,))
-        for i in range(8):
-            if random.choice([True, False]):
-                a_out[i] = 1.0
-                a_mask[i] = 1.0
-            else:
-                a_out[i] = 0.0
-                a_mask[i] = 0.0
-
         nn.fit(a_in, a_out, a_mask)
         v_out = nn.predict(a_in)
-        
+
         # Make sure the examples weighted 1.0 have low error, 0.0 high error.
-        print(abs(a_out - v_out).T[0] * (1.0 - a_mask))
-        assert_true((abs(a_out - v_out).T[0] * a_mask < 5E-2).all())
-        assert_true((abs(a_out - v_out).T[0] * (1.0 - a_mask) > 5E-1).any())
+        print(abs(a_out - v_out).T * a_mask)
+        assert_true((abs(a_out - v_out).T * a_mask < 5E-2).all())
+        assert_true((abs(a_out - v_out).T * (1.0 - a_mask) > 5E-1).any())
+
+    def test_SingleOutputOne(self):
+        a_in = numpy.random.uniform(-1.0, +1.0, (8,16))
+        a_out = numpy.random.randint(2, size=(8,1)).astype(numpy.float32)
+        a_mask = (0.0 + a_out).flatten()
+        
+        self.check(a_in, a_out, a_mask)
 
     def test_SingleOutputZero(self):
-        nn = MLPR(layers=[L("Linear")], learning_rule='adam', n_iter=50)
-        a_in, a_out, a_mask = numpy.random.uniform(-1.0, +1.0, (8,16)), numpy.zeros((8,1)), numpy.ones((8,))
-        for i in range(8):
-            if random.choice([True, False]):
-                a_out[i] = 1.0
-                a_mask[i] = 0.0
-            else:
-                a_out[i] = 0.0
-                a_mask[i] = 1.0
+        a_in = numpy.random.uniform(-1.0, +1.0, (8,16))
+        a_out = numpy.random.randint(2, size=(8,1)).astype(numpy.float32)
+        a_mask = (1.0 - a_out).flatten()
 
-        nn.fit(a_in, a_out, a_mask)
-        v_out = nn.predict(a_in)
-        
-        # Make sure the examples weighted 1.0 have low error, 0.0 high error.
-        print(abs(a_out - v_out).T[0] * (1.0 - a_mask))
-        assert_true((abs(a_out - v_out).T[0] * a_mask < 5E-2).all())
-        assert_true((abs(a_out - v_out).T[0] * (1.0 - a_mask) > 5E-1).any())
+        self.check(a_in, a_out, a_mask)
 
     def test_SingleOutputNegative(self):
-        nn = MLPR(layers=[L("Linear")], learning_rule='adam', n_iter=50)
-        a_in, a_out, a_mask = numpy.random.uniform(-1.0, +1.0, (8,16)), numpy.zeros((8,1)), numpy.ones((8,))
-        for i in range(8):
-            if random.choice([True, False]):
-                a_out[i] = -1.0
-                a_mask[i] = 1.0
-            else:
-                a_out[i] = 0.0
-                a_mask[i] = 0.0
-
-        nn.fit(a_in, a_out, a_mask)
-        v_out = nn.predict(a_in)
+        a_in = numpy.random.uniform(-1.0, +1.0, (8,16))
+        a_out = numpy.random.randint(2, size=(8,1)).astype(numpy.float32)
+        a_mask = (0.0 + a_out).flatten()
+        a_out = -1.0 * 2.0 + a_out
         
-        # Make sure the examples weighted 1.0 have low error, 0.0 high error.
-        print(abs(a_out - v_out).T[0] * (1.0 - a_mask))
-        assert_true((abs(a_out - v_out).T[0] * (1.0 - a_mask) > 5E-1).any())
-        assert_true((abs(a_out - v_out).T[0] * a_mask < 5E-2).all())
\ No newline at end of file
+        self.check(a_in, a_out, a_mask)
+        
+    def test_MultipleOutputRandom(self):
+        a_in = numpy.random.uniform(-1.0, +1.0, (8,16))
+        a_out = numpy.random.randint(2, size=(8,4)).astype(numpy.float32)
+        a_mask = numpy.random.randint(2, size=(8,)).astype(numpy.float32)
+
+        self.check(a_in, a_out, a_mask)

From bb2b9ae24b0d6702c7f6911e1e0f8819561c8c86 Mon Sep 17 00:00:00 2001
From: "Alex J. Champandard" <alexjc@aigamedev.com>
Date: Sun, 22 Nov 2015 15:51:30 +0100
Subject: [PATCH 4/6] Classification tests for the data sample
 masking/weighting. Implementation seems to work reliably on these test
 problems!

---
 sknn/tests/test_data.py | 48 ++++++++++++++++++++++++++++++++++++++---
 1 file changed, 45 insertions(+), 3 deletions(-)

diff --git a/sknn/tests/test_data.py b/sknn/tests/test_data.py
index f6ef02b..08347c9 100644
--- a/sknn/tests/test_data.py
+++ b/sknn/tests/test_data.py
@@ -1,11 +1,11 @@
 import random
 import unittest
-from nose.tools import (assert_in, assert_raises, assert_equals, assert_true)
+from nose.tools import (assert_greater, assert_less, assert_raises, assert_equals, assert_true)
 
 import logging
 
 import numpy
-from sknn.mlp import Regressor as MLPR
+from sknn.mlp import Regressor as MLPR, Classifier as MLPC
 from sknn.mlp import Layer as L, Convolution as C
 
 
@@ -91,7 +91,7 @@ def test_SetLayerParamsDict(self):
         assert_true((p[1].biases.astype('float32') == biases.astype('float32')).all())
 
 
-class TestMaskedDataset(unittest.TestCase):
+class TestMaskedDataRegression(unittest.TestCase):
 
     def check(self, a_in, a_out, a_mask):
         nn = MLPR(layers=[L("Linear")], learning_rule='adam', n_iter=50)
@@ -131,3 +131,45 @@ def test_MultipleOutputRandom(self):
         a_mask = numpy.random.randint(2, size=(8,)).astype(numpy.float32)
 
         self.check(a_in, a_out, a_mask)
+
+
+class TestMaskedDataClassification(unittest.TestCase):
+
+    def check(self, a_in, a_out, a_mask, act='Softmax'):
+        nn = MLPC(layers=[L(act)], learning_rule='rmsprop', n_iter=100)
+        nn.fit(a_in, a_out, a_mask)
+        print(nn.classes_)
+        return nn.predict_proba(a_in)
+
+    def test_TwoLabelsOne(self):
+        # Only one sample has the value 1 with weight 1.0, but all 0s are weighted 0.0.
+        a_in = numpy.random.uniform(-1.0, +1.0, (16,4))
+        a_out = numpy.zeros((16,1), dtype=numpy.int32)
+        a_out[0] = 1
+        a_mask = (0.0 + a_out).flatten()
+        
+        a_test = self.check(a_in, a_out, a_mask).mean(axis=0)
+        assert_greater(a_test[1], a_test[0] * 2.0)
+
+    def test_TwoLabelsZero(self):
+        # Only one sample has the value 0 with weight 1.0, but all 1s are weighted 0.0. 
+        a_in = numpy.random.uniform(-1.0, +1.0, (16,4))
+        a_out = numpy.ones((16,1), dtype=numpy.int32)
+        a_out[-1] = 0
+        a_mask = (1.0 - a_out).flatten()
+        
+        a_test = self.check(a_in, a_out, a_mask).mean(axis=0)
+        assert_greater(a_test[0], a_test[1] * 2.0)
+
+    def test_FourLabels(self):
+        # Only multi-label sample has weight 1.0, the others have weight 0.0. Check probabilities!
+        chosen = random.randint(0,16)
+        a_in = numpy.random.uniform(-1.0, +1.0, (16,4))
+        a_out = numpy.random.randint(2, size=(16,4))
+        a_mask = numpy.zeros((16,), dtype=numpy.int32)
+        a_mask[chosen] = 1.0
+
+        a_test = self.check(a_in, a_out, a_mask, act="Sigmoid").mean(axis=0)
+        for i in range(a_out.shape[1]):
+            compare = assert_greater if a_out[chosen][i]==0 else assert_less
+            compare(a_test[i*2], a_test[i*2+1])

From bcf9b229fa9cc7fc6694f49db4097e9820b813a7 Mon Sep 17 00:00:00 2001
From: "Alex J. Champandard" <alexjc@aigamedev.com>
Date: Sun, 22 Nov 2015 16:09:56 +0100
Subject: [PATCH 5/6] Using vector for mask weights if they are set, otherwise
 a scalar that becomes 1.0. Simplifies the code.

---
 sknn/backend/lasagne/mlp.py | 10 +++++-----
 sknn/mlp.py                 |  6 +++---
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/sknn/backend/lasagne/mlp.py b/sknn/backend/lasagne/mlp.py
index 3326f44..f8b78b2 100644
--- a/sknn/backend/lasagne/mlp.py
+++ b/sknn/backend/lasagne/mlp.py
@@ -138,10 +138,10 @@ def _create_layer(self, name, layer, network):
                                          num_units=layer.units,
                                          nonlinearity=self._get_activation(layer))
 
-    def _create_mlp(self, X):
+    def _create_mlp(self, X, w=None):
         self.data_input = T.tensor4('X') if self.is_convolution else T.matrix('X')
         self.data_output = T.matrix('y')
-        self.data_mask = T.vector('m')
+        self.data_mask = T.vector('m') if w is not None else T.scalar('m')
         self.data_correct = T.matrix('yp')
         
         lasagne.random.get_rng().seed(self.random_state)
@@ -187,12 +187,12 @@ def _create_mlp(self, X):
         self.network_output = lasagne.layers.get_output(network, deterministic=True)
         self.f = theano.function([self.data_input], self.network_output, allow_input_downcast=True)
 
-    def _initialize_impl(self, X, y=None):
+    def _initialize_impl(self, X, y=None, w=None):
         if self.is_convolution:
             X = numpy.transpose(X, (0, 3, 1, 2))
 
         if self.mlp is None:            
-            self._create_mlp(X)
+            self._create_mlp(X, w)
 
         # Can do partial initialization when predicting, no trainer needed.
         if y is None:
@@ -254,7 +254,7 @@ def _batch_impl(self, X, y, w, processor, mode, output, shuffle):
             self._do_callback('on_batch_start', locals())
             
             if mode == 'train':
-                loss += processor(Xb, yb, wb)
+                loss += processor(Xb, yb, wb if wb is not None else 1.0)
             else:
                 loss += processor(Xb, yb)
             count += 1
diff --git a/sknn/mlp.py b/sknn/mlp.py
index 90505cb..f632a41 100644
--- a/sknn/mlp.py
+++ b/sknn/mlp.py
@@ -31,14 +31,14 @@ class MultiLayerPerceptron(NeuralNetwork, sklearn.base.BaseEstimator):
     def _setup(self):
         pass
 
-    def _initialize(self, X, y=None):
+    def _initialize(self, X, y=None, w=None):
         assert not self.is_initialized,\
             "This neural network has already been initialized."
         self._create_specs(X, y)
 
         backend.setup()
         self._backend = backend.MultiLayerPerceptronBackend(self)
-        return self._backend._initialize_impl(X, y)
+        return self._backend._initialize_impl(X, y, w)
 
     def _check_layer(self, layer, required, optional=[]):
         required.extend(['name', 'type'])
@@ -203,7 +203,7 @@ def _fit(self, X, y, w=None):
         X, y = self._reshape(X, y)
 
         if not self.is_initialized:
-            X, y = self._initialize(X, y)
+            X, y = self._initialize(X, y, w)
 
         log.info("Training on dataset of {:,} samples with {:,} total size.".format(data_shape[0], data_size))
         if data_shape[1:] != X.shape[1:]:

From 859353733465a66635c68bfa80683f80d7aa5e4f Mon Sep 17 00:00:00 2001
From: "Alex J. Champandard" <alexjc@aigamedev.com>
Date: Sun, 22 Nov 2015 17:07:04 +0100
Subject: [PATCH 6/6] Test was too strict to succeed deterministically.

---
 sknn/tests/test_data.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sknn/tests/test_data.py b/sknn/tests/test_data.py
index 08347c9..1095d32 100644
--- a/sknn/tests/test_data.py
+++ b/sknn/tests/test_data.py
@@ -149,7 +149,7 @@ def test_TwoLabelsOne(self):
         a_mask = (0.0 + a_out).flatten()
         
         a_test = self.check(a_in, a_out, a_mask).mean(axis=0)
-        assert_greater(a_test[1], a_test[0] * 2.0)
+        assert_greater(a_test[1], a_test[0] * 1.5)
 
     def test_TwoLabelsZero(self):
         # Only one sample has the value 0 with weight 1.0, but all 1s are weighted 0.0. 
@@ -159,7 +159,7 @@ def test_TwoLabelsZero(self):
         a_mask = (1.0 - a_out).flatten()
         
         a_test = self.check(a_in, a_out, a_mask).mean(axis=0)
-        assert_greater(a_test[0], a_test[1] * 2.0)
+        assert_greater(a_test[0], a_test[1] * 1.5)
 
     def test_FourLabels(self):
         # Only multi-label sample has weight 1.0, the others have weight 0.0. Check probabilities!