c0dearm · c0dearm · Jun 7, 2022 · Jun 7, 2022
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "mushin"
-version = "0.4.0"
+version = "0.5.0"
 authors = ["Aitor Ruano <codearm@pm.me>"]
 edition = "2021"
 description = "Computational graphs with reverse automatic differentation in the GPU"
@@ -15,5 +15,9 @@ license = "MIT/Apache-2.0"
 maintenance = { status = "actively-developed" }
 codecov = { repository = "c0dearm/mushin" }
 
+[features]
+default = ["nn"]
+nn = []
+
 [dependencies]
 arrayfire = "3.8"
diff --git a/README.md b/README.md
@@ -31,11 +31,10 @@ Then, add `mushin` as one of your dependencies:
 
 ```toml
 [dependencies]
-mushin = "0.4"
+mushin = "0.5"
 ```
 
-The following is quite a self-explanatory example of the basic usage of **Mushin**, for more details, please check the crate [docs](https://docs.rs/mushin/latest/mushin/) or just ask us questions in the form of [issues](https://github.com/c0dearm/mushin/issues/new)! 😊
-
+The following is quite a self-explanatory example of the basic usage of **Mushin** to build computation graphs and get the derivatives back:
 ```rust
 use mushin as mu;
 use mu::Tensor;
@@ -53,10 +52,31 @@ fn main() {
 }
 ```
 
+By default, this library enables the `nn` feature that gives access to the `nn` module, which builds upon the auto-grad foundation of `Mushin` to deliver a set of **Deep Learning** utilities, such as activation functions, layers, losses and optimizers. If you don't really need that part and you are only insterested in the pure auto-grad functionality of this library, the `nn` module can be disabled with `default-features = false`. Here follows a brief example on how it works:
+
+```rust
+use mushin as mu;
+use mu::nn::{layers::Linear, activations::relu, losses::mse, optimizers::SGD};
+
+let x = mu::eye::<16, 1, 1, 3>(1.0).freeze();
+let y = mu::eye::<16, 1, 1, 5>(3.0).freeze();
+
+let linear = Linear::<16, 3, 5, _, _>::new();
+let optim = SGD::new(&linear.parameters(), 0.01);
+
+for _ in 0..5 {
+    let z = relu(&linear.forward(&x));
+    let loss = mse(&z, &y);
+
+    loss.backward();
+    optim.step();
+    loss.reset();
+}
+```
+
 ## Roadmap
 
-- [ ] Add more operations
-- [ ] Add a cargo feature for deep learning, which adds layers, optimizers, losses and activation functions
+- [ ] Continue to add more deep learning utilities
 - [ ] Add benchmarks
 
 ## Contributing

diff --git a/src/graph/node.rs b/src/graph/node.rs
@@ -9,23 +9,6 @@ static COUNTER: AtomicUsize = AtomicUsize::new(0);
 #[allow(clippy::module_name_repetitions)]
 pub type NodeId = usize;
 
-/// Contains the data and gradients of a `Node`
-pub struct Data {
-    data: Array<f32>,
-    grad: Array<f32>,
-}
-
-impl Data {
-    /// Creates new `Node` data with the gradients set to 0 as default
-    fn new(data: Array<f32>) -> Self {
-        let dims = data.dims();
-        Self {
-            data,
-            grad: constant(0.0, dims),
-        }
-    }
-}
-
 /// Represents the origin of a `Node`
 enum Origin {
     /// The node is a new variable declaration
@@ -39,7 +22,8 @@ enum Origin {
 /// A `Node` holds a `Variable` tensor data (values and gradients) as
 /// well as information about its `Origin`
 pub struct Node {
-    data: RefCell<Data>,
+    data: RefCell<Array<f32>>,
+    grad: RefCell<Array<f32>>,
     origin: Origin,
     id: NodeId,
 }
@@ -51,8 +35,11 @@ impl Node {
     /// to be able to tell if two nodes (tensors) are the same when used in
     /// different operations.
     fn new(data: Array<f32>, origin: Origin) -> Self {
+        let dims = data.dims();
+
         Self {
-            data: RefCell::new(Data::new(data)),
+            data: RefCell::new(data),
+            grad: RefCell::new(constant(0.0, dims)),
             origin,
             id: COUNTER.fetch_add(1, Ordering::Relaxed),
         }
@@ -118,17 +105,22 @@ impl Node {
 
     /// Returns the tensor data
     pub(crate) fn data(&self) -> Ref<Array<f32>> {
-        Ref::map(self.data.borrow(), |d| &d.data)
+        self.data.borrow()
+    }
+
+    /// Returns a mutable reference to the tensor data
+    pub(crate) fn data_mut(&self) -> RefMut<Array<f32>> {
+        self.data.borrow_mut()
     }
 
     /// Returns the tensor gradients
     pub(crate) fn grad(&self) -> Ref<Array<f32>> {
-        Ref::map(self.data.borrow(), |d| &d.grad)
+        self.grad.borrow()
     }
 
     /// Returns a mutable reference to the tensor gradients
     pub(crate) fn grad_mut(&self) -> RefMut<Array<f32>> {
-        RefMut::map(self.data.borrow_mut(), |d| &mut d.grad)
+        self.grad.borrow_mut()
     }
 
     /// Computes the gradients of this node ancestors by following the
@@ -161,6 +153,11 @@ impl Node {
     pub(crate) const fn id(&self) -> NodeId {
         self.id
     }
+
+    /// Returns `true` if the node is `Variable` declaration, `false` otherwise
+    pub(crate) const fn is_declaration(&self) -> bool {
+        matches!(self.origin, Origin::Declaration)
+    }
 }
 
 impl Drop for Node {
@@ -228,16 +225,9 @@ impl BinaryOp {
 
 #[cfg(test)]
 pub(crate) mod tests {
-    use super::{Data, Node, Origin};
+    use super::{Node, Origin};
     use crate::tests::equal_arrays;
 
-    #[test]
-    fn new_data() {
-        let data = Data::new(arrayfire::constant!(2.0; 1,2,3,4));
-        assert!(equal_arrays(data.data, arrayfire::constant!(2.0; 1,2,3,4)));
-        assert!(equal_arrays(data.grad, arrayfire::constant!(0.0; 1,2,3,4)));
-    }
-
     #[test]
     fn new_node() {
         let node = Node::new(arrayfire::constant!(2.0; 1,2,3,4), Origin::Declaration);

diff --git a/src/lib.rs b/src/lib.rs
@@ -42,6 +42,12 @@
 //! gradients of all of its ancestor variables. By using the `grad()` method in any of them we can
 //! now retrieve their gradients as new `Variable` tensor, which in turn can be used to compute
 //! further gradients!
+//!
+//! It is quite possible the reader is more interested in the Deep Learning utilities of this
+//! library rather than the raw auto-grad foundations.
+//! By default, **Mushin** includes the [nn module](https://docs.rs/mushin/latest/mushin/nn/index.html)
+//! that provides optimizers, activation functions, layers and losses ready to use to build neural network
+//! modules. Checkout the module docs for instructions on how to use them.
 
 #![deny(
     unsafe_code,
@@ -55,6 +61,9 @@
     clippy::missing_inline_in_public_items
 )]
 
+#[cfg(feature = "nn")]
+pub mod nn;
+
 mod graph;
 mod tensor;
 
@@ -149,27 +158,4 @@ mod tests {
         let x = mu::custom::<1, 1, 1, 1>(&[1.0]);
         assert!(equal_arrays(x.data(), constant!(1.0;1,1,1,1)));
     }
-
-    #[test]
-    fn perceptron_backprop() {
-        let x = mu::eye::<1, 1, 2, 3>(3.0).freeze();
-        let w = mu::fill::<1, 1, 3, 2>(2.0);
-        let b = mu::fill::<1, 1, 3, 3>(1.0);
-
-        for _ in 0..2 {
-            let z = w.mm(&x).add(&b);
-            assert_eq!(z.tape().nodes().len(), 4);
-            assert!(equal_arrays(
-                z.data(),
-                Array::new(
-                    &[7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 1.0, 1.0, 1.0],
-                    dim4!(3, 3, 1, 1),
-                ),
-            ));
-            z.backward();
-            assert!(equal_arrays(w.grad().data(), constant!(3.0; 3,2,1,1)));
-            assert!(equal_arrays(b.grad().data(), constant!(1.0; 3,3,1,1)));
-            z.reset();
-        }
-    }
 }
diff --git a/src/nn/activations.rs b/src/nn/activations.rs
@@ -0,0 +1,35 @@
+use crate::tensor::{constant::Constant, params::DoubleParam, Tensor};
+
+/// Performs the `ReLu` activation function on the given tensor
+#[inline]
+pub fn relu<const B: u64, const L: u64, const R: u64, const C: u64, X>(x: &X) -> X
+where
+    X: Tensor<B, L, R, C> + DoubleParam<Constant<B, L, R, C>, X>,
+{
+    x.maximum(&Constant::new(arrayfire::constant!(0.0; R,C,L,B)))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::relu;
+    use crate as mu;
+    use crate::tests::equal_arrays;
+    use crate::Tensor;
+
+    #[test]
+    fn relu_forward_backward() {
+        let x = mu::custom::<1, 1, 2, 3>(&[1.0, -1.0, -1.0, 1.0, -1.0, -1.0]);
+        let z = relu(&x);
+
+        assert!(equal_arrays(
+            z.data(),
+            arrayfire::identity(arrayfire::dim4!(2, 3, 1, 1))
+        ));
+
+        z.backward();
+        assert!(equal_arrays(
+            x.grad().data(),
+            arrayfire::identity(arrayfire::dim4!(2, 3, 1, 1))
+        ));
+    }
+}
diff --git a/src/nn/layers.rs b/src/nn/layers.rs
@@ -0,0 +1,96 @@
+use crate as mu;
+use crate::graph::node::Node;
+use crate::tensor::{constant::Constant, params::DoubleParam, variable::Variable, Tensor};
+use std::rc::Rc;
+
+/// A Linear (perceptron) neural network layer with `B` batch size, `I` input size and `O` output size
+pub struct Linear<const B: u64, const I: u64, const O: u64, W, P> {
+    weights: W,
+    bias: P,
+}
+
+impl<const B: u64, const I: u64, const O: u64, W, P> Linear<B, I, O, W, P>
+where
+    W: Tensor<B, 1, I, O>,
+    P: Tensor<B, 1, 1, O>,
+{
+    /// Given an input computes the output
+    #[inline]
+    pub fn forward<X, Y>(&self, x: &X) -> Y
+    where
+        X: Tensor<B, 1, 1, I> + DoubleParam<W, Y>,
+        Y: Tensor<B, 1, 1, O> + DoubleParam<P, Y>,
+    {
+        x.mm(&self.weights).add(&self.bias)
+    }
+}
+
+impl<const B: u64, const I: u64, const O: u64>
+    Linear<B, I, O, Variable<B, 1, I, O>, Variable<B, 1, 1, O>>
+{
+    #[must_use]
+    #[inline]
+    pub fn new() -> Self {
+        Self {
+            weights: mu::randn(),
+            bias: mu::randn(),
+        }
+    }
+
+    /// Consumes this layer and returns a copy with constant parameters
+    #[must_use]
+    #[inline]
+    pub fn freeze(self) -> Linear<B, I, O, Constant<B, 1, I, O>, Constant<B, 1, 1, O>> {
+        Linear {
+            weights: self.weights.freeze(),
+            bias: self.bias.freeze(),
+        }
+    }
+
+    /// Returns the layer parameters as an array of computation graph nodes
+    #[must_use]
+    #[inline]
+    pub fn parameters(&self) -> [Rc<Node>; 2] {
+        [(&self.weights).into(), (&self.bias).into()]
+    }
+}
+
+impl<const B: u64, const I: u64, const O: u64>
+    Linear<B, I, O, Constant<B, 1, I, O>, Constant<B, 1, 1, O>>
+{
+    /// Consumes this layer and returns a copy with trainable parameters
+    #[must_use]
+    #[inline]
+    pub fn unfreeze(self) -> Linear<B, I, O, Variable<B, 1, I, O>, Variable<B, 1, 1, O>> {
+        Linear {
+            weights: self.weights.unfreeze(),
+            bias: self.bias.unfreeze(),
+        }
+    }
+}
+
+impl<const B: u64, const I: u64, const O: u64> Default
+    for Linear<B, I, O, Variable<B, 1, I, O>, Variable<B, 1, 1, O>>
+{
+    #[inline]
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::Linear;
+    use crate as mu;
+    use crate::Tensor;
+
+    #[test]
+    fn linear_freeze_unfreeze() {
+        let linear = Linear::<1, 3, 5, _, _>::new().freeze().unfreeze();
+
+        let x = mu::fill::<1, 1, 1, 3>(2.0).freeze();
+        let z = linear.forward(&x);
+
+        assert_eq!(z.data().dims(), arrayfire::dim4!(1, 5, 1, 1));
+    }
+}