diff --git a/README.md b/README.md index 568d3178..e68cf407 100644 --- a/README.md +++ b/README.md @@ -199,13 +199,6 @@ backends that are available on your target platform to ensure optimal performance. For more information see the [benchmark results](https://github.com/jatinchowdhury18/RTNeural/actions?query=workflow%3ABench). -RTNeural also has experimental support for Apple's -[`Accelerate`](https://developer.apple.com/documentation/accelerate) framework (`-DRTNEURAL_ACCELERATE=ON`). -Please note that the `Accelerate` backend can only be -used when compiling for Apple devices, and does not -currently support defining [compile-time inferencing -engines](#compile-time-api). - Note that you must abide by the licensing rules of whichever backend library you choose. ### Other configuration flags diff --git a/RTNeural/CMakeLists.txt b/RTNeural/CMakeLists.txt index deee0345..61f1876a 100755 --- a/RTNeural/CMakeLists.txt +++ b/RTNeural/CMakeLists.txt @@ -1,6 +1,5 @@ add_library(RTNeural STATIC activation/activation.h - activation/activation_accelerate.h activation/activation_eigen.h activation/activation_xsimd.h Model.h @@ -16,13 +15,10 @@ add_library(RTNeural STATIC conv2d/conv2d_eigen.h conv2d/conv2d_eigen.tpp dense/dense.h - dense/dense_accelerate.h dense/dense_eigen.h dense/dense_xsimd.h gru/gru.h gru/gru.tpp - gru/gru_accelerate.h - gru/gru_accelerate.tpp gru/gru_eigen.h gru/gru_eigen.tpp gru/gru_xsimd.h diff --git a/RTNeural/Layer.h b/RTNeural/Layer.h index bf00d7e7..5ea2ef65 100644 --- a/RTNeural/Layer.h +++ b/RTNeural/Layer.h @@ -4,15 +4,6 @@ #include #include -#if RTNEURAL_USE_ACCELERATE -// Dummy defines to make this include safe for JUCE and other libraries -#define Point CarbonDummyPointName -#define Component CarbonDummyCompName -#include -#undef Point -#undef Component -#endif - namespace RTNeural { diff --git a/RTNeural/ModelT.h b/RTNeural/ModelT.h index 7fe11c1f..7376e6bf 100644 --- a/RTNeural/ModelT.h +++ b/RTNeural/ModelT.h @@ -2,10 +2,6 @@ #include "model_loader.h" -#define MODELT_AVAILABLE (!RTNEURAL_USE_ACCELERATE) - -#if MODELT_AVAILABLE - namespace RTNeural { @@ -580,5 +576,3 @@ class ModelT2D }; #endif // RTNEURAL_USE_XSIMD } // namespace RTNeural - -#endif // MODELT_AVAILABLE diff --git a/RTNeural/activation/activation.h b/RTNeural/activation/activation.h index 2f2c04e3..91fb69d6 100644 --- a/RTNeural/activation/activation.h +++ b/RTNeural/activation/activation.h @@ -43,9 +43,6 @@ class Activation : public Layer #elif RTNEURAL_USE_XSIMD #include "activation_xsimd.h" -#elif RTNEURAL_USE_ACCELERATE -#include "activation_accelerate.h" - #else #include "../common.h" #include diff --git a/RTNeural/activation/activation_accelerate.h b/RTNeural/activation/activation_accelerate.h deleted file mode 100644 index 0b37cfcb..00000000 --- a/RTNeural/activation/activation_accelerate.h +++ /dev/null @@ -1,138 +0,0 @@ -#ifndef ACTIVATIONACCELERATE_H_INCLUDED -#define ACTIVATIONACCELERATE_H_INCLUDED - -#include "../common.h" - -namespace RTNeural -{ - -/** Dynamic implementation of a tanh activation layer. */ -template -class TanhActivation : public Activation -{ -public: - /** Constructs a tanh activation layer for a given size. */ - TanhActivation(int size) - : Activation(size, {}, "tanh") - { - } - - TanhActivation(std::initializer_list sizes) - : TanhActivation(*sizes.begin()) - { - } - - /** Performs forward propagation for tanh activation. */ - inline void forward(const T* input, T* out) noexcept override - { - forward_internal(input, out); - } - -private: - template - inline typename std::enable_if::value>::type - forward_internal(const float* input, float* out) noexcept - { - const auto dim_int = static_cast(Layer::in_size); - vvtanhf(out, input, &dim_int); - } - - template - inline typename std::enable_if::value>::type - forward_internal(const double* input, double* out) noexcept - { - const auto dim_int = static_cast(Layer::in_size); - vvtanh(out, input, &dim_int); - } -}; - -/** Dynamic implementation of a ReLU activation layer. */ -template -class ReLuActivation : public Activation -{ -public: - /** Constructs a ReLU activation layer for a given size. */ - ReLuActivation(int size) - : Activation(size, {}, "relu") - { - zeros.resize(size, (T)0); - } - - ReLuActivation(std::initializer_list sizes) - : ReLuActivation(*sizes.begin()) - { - } - - /** Performs forward propagation for ReLU activation. */ - inline void forward(const T* input, T* out) noexcept override - { - forward_internal(input, out); - } - -private: - template - inline typename std::enable_if::value>::type - forward_internal(const float* input, float* out) noexcept - { - vDSP_vmax(input, 1, zeros.data(), 1, out, 1, Layer::in_size); - } - - template - inline typename std::enable_if::value>::type - forward_internal(const double* input, double* out) noexcept - { - vDSP_vmaxD(input, 1, zeros.data(), 1, out, 1, Layer::in_size); - } - - std::vector zeros; -}; - -/** Dynamic implementation of a sigmoid activation layer. */ -template -class SigmoidActivation : public Activation -{ -public: - /** Constructs a sigmoid activation layer for a given size. */ - SigmoidActivation(int size) - : Activation(size, {}, "sigmoid") - { - } - - SigmoidActivation(std::initializer_list sizes) - : SigmoidActivation(*sizes.begin()) - { - } - - /** Performs forward propagation for sigmoid activation. */ - inline void forward(const T* input, T* out) noexcept override - { - sigmoid(input, out, Layer::in_size); - } -}; - -/** Dynamic implementation of a softmax activation layer. */ -template -class SoftmaxActivation : public Activation -{ -public: - /** Constructs a softmax activation layer for a given size. */ - SoftmaxActivation(int size) - : Activation(size, {}, "softmax") - { - } - - SoftmaxActivation(std::initializer_list sizes) - : SoftmaxActivation(*sizes.begin()) - { - } - - /** Performs forward propagation for softmax activation. */ - inline void forward(const T* input, T* out) noexcept override - { - softmax(input, out, Layer::in_size); - } -}; - -} // namespace RTNeural - -#endif // ACTIVATIONACCELERATE_H_INCLUDED diff --git a/RTNeural/batchnorm/batchnorm.tpp b/RTNeural/batchnorm/batchnorm.tpp index 5b97ef9d..746d3acf 100644 --- a/RTNeural/batchnorm/batchnorm.tpp +++ b/RTNeural/batchnorm/batchnorm.tpp @@ -2,7 +2,7 @@ namespace RTNeural { -#if !RTNEURAL_USE_EIGEN && !RTNEURAL_USE_XSIMD && !RTNEURAL_USE_ACCELERATE +#if !RTNEURAL_USE_EIGEN && !RTNEURAL_USE_XSIMD template BatchNorm1DLayer::BatchNorm1DLayer(int size) diff --git a/RTNeural/batchnorm/batchnorm2d.tpp b/RTNeural/batchnorm/batchnorm2d.tpp index ed7661dc..4cae8531 100644 --- a/RTNeural/batchnorm/batchnorm2d.tpp +++ b/RTNeural/batchnorm/batchnorm2d.tpp @@ -1,6 +1,6 @@ #include "batchnorm2d.h" -#if !RTNEURAL_USE_EIGEN && !RTNEURAL_USE_XSIMD && !RTNEURAL_USE_ACCELERATE +#if !RTNEURAL_USE_EIGEN && !RTNEURAL_USE_XSIMD namespace RTNeural { diff --git a/RTNeural/common.h b/RTNeural/common.h index 4d42c394..431d493c 100644 --- a/RTNeural/common.h +++ b/RTNeural/common.h @@ -322,58 +322,6 @@ static inline void fast_tanh(const T* in, T* out, int dim) noexcept } // namespace RTNeural -#elif RTNEURAL_USE_ACCELERATE -#include - -namespace RTNeural -{ - -static inline void sigmoid(const float* in, float* out, int dim) noexcept -{ - constexpr float one = 1.0f; - constexpr float neg_one = -1.0f; - const auto dim_int = static_cast(dim); - - vDSP_vsmul(in, 1, &neg_one, out, 1, dim); - vvexpf(out, out, &dim_int); - vDSP_vsadd(out, 1, &one, out, 1, dim); - vvrecf(out, out, &dim_int); -} - -static inline void sigmoid(const double* in, double* out, int dim) noexcept -{ - constexpr double one = 1.0; - constexpr double neg_one = -1.0; - const auto dim_int = static_cast(dim); - - vDSP_vsmulD(in, 1, &neg_one, out, 1, dim); - vvexp(out, out, &dim_int); - vDSP_vsaddD(out, 1, &one, out, 1, dim); - vvrec(out, out, &dim_int); -} - -static inline void softmax(const float* in, float* out, int dim) noexcept -{ - const auto dim_int = static_cast(dim); - float exp_sum; - - vvexpf(out, in, &dim_int); - vDSP_sve(out, 1, &exp_sum, dim); - vDSP_vsdiv(out, 1, &exp_sum, out, 1, dim); -} - -static inline void softmax(const double* in, double* out, int dim) noexcept -{ - const auto dim_int = static_cast(dim); - double exp_sum; - - vvexp(out, in, &dim_int); - vDSP_sveD(out, 1, &exp_sum, dim); - vDSP_vsdivD(out, 1, &exp_sum, out, 1, dim); -} - -} // namespace RTNeural - #else // STL backend #include #include diff --git a/RTNeural/conv1d/conv1d.h b/RTNeural/conv1d/conv1d.h index 64060819..ed513aa8 100644 --- a/RTNeural/conv1d/conv1d.h +++ b/RTNeural/conv1d/conv1d.h @@ -7,9 +7,6 @@ #elif RTNEURAL_USE_XSIMD #include "conv1d_xsimd.h" #include "conv1d_xsimd.tpp" -#elif RTNEURAL_USE_ACCELERATE -#include "conv1d_accelerate.h" -#include "conv1d_accelerate.tpp" #else #include "../Layer.h" #include "../common.h" diff --git a/RTNeural/conv1d/conv1d.tpp b/RTNeural/conv1d/conv1d.tpp index 88d79022..147b3d92 100644 --- a/RTNeural/conv1d/conv1d.tpp +++ b/RTNeural/conv1d/conv1d.tpp @@ -3,7 +3,7 @@ namespace RTNeural { -#if !RTNEURAL_USE_EIGEN && !RTNEURAL_USE_XSIMD && !RTNEURAL_USE_ACCELERATE +#if !RTNEURAL_USE_EIGEN && !RTNEURAL_USE_XSIMD template Conv1D::Conv1D(int in_size, int out_size, int kernel_size, int dilation) diff --git a/RTNeural/conv1d/conv1d_accelerate.h b/RTNeural/conv1d/conv1d_accelerate.h deleted file mode 100644 index 1cec7e85..00000000 --- a/RTNeural/conv1d/conv1d_accelerate.h +++ /dev/null @@ -1,105 +0,0 @@ -#ifndef CONV1DACCELERATE_H_INCLUDED -#define CONV1DACCELERATE_H_INCLUDED - -#include "../Layer.h" -#include "../common.h" -#include - -namespace RTNeural -{ - -/** Dynamic implementation of a 1-dimensional convolution layer. */ -template -class Conv1D : public Layer -{ -public: - /** Constructs a convolution layer for the given dimensions. */ - Conv1D(int in_size, int out_size, int kernel_size, int dilation); - Conv1D(std::initializer_list sizes); - Conv1D(const Conv1D& other); - Conv1D& operator=(const Conv1D& other); - virtual ~Conv1D(); - - /** Resets the layer state. */ - void reset() override; - - /** Returns the name of this layer. */ - std::string getName() const noexcept override { return "conv1d"; } - - /** Performs forward propagation for this layer. */ - virtual inline void forward(const T* input, T* h) noexcept override - { - // @TODO: vectorize this! - for(int k = 0; k < Layer::in_size; ++k) - { - state[k][state_ptr] = input[k]; - state[k][state_ptr + state_size] = input[k]; - } - - conv_internal(h); - - state_ptr = (state_ptr == 0 ? state_size - 1 : state_ptr - 1); // iterate state pointer in reverse - } - - /** Sets the layer weights. */ - void setWeights(const std::vector>>& weights); - - /** Sets the layer biases. */ - void setBias(const std::vector& biasVals); - - /** Returns the size of the convolution kernel. */ - int getKernelSize() const noexcept { return kernel_size; } - - /** Returns the convolution dilation rate. */ - int getDilationRate() const noexcept { return dilation_rate; } - -private: - template - inline typename std::enable_if::value>::type - conv_internal(float* h) noexcept - { - float dotpr_out; - for(int i = 0; i < Layer::out_size; ++i) - { - h[i] = (T)0; - for(int k = 0; k < Layer::in_size; ++k) - { - vDSP_dotpr(&state[k][state_ptr], 1, kernelWeights[i][k], 1, &dotpr_out, state_size); - h[i] += dotpr_out; - } - } - - vDSP_vadd(h, 1, bias, 1, h, 1, Layer::out_size); - } - - template - inline typename std::enable_if::value>::type - conv_internal(double* h) noexcept - { - double dotpr_out; - for(int i = 0; i < Layer::out_size; ++i) - { - h[i] = (T)0; - for(int k = 0; k < Layer::in_size; ++k) - { - vDSP_dotprD(&state[k][state_ptr], 1, kernelWeights[i][k], 1, &dotpr_out, state_size); - h[i] += dotpr_out; - } - } - - vDSP_vaddD(h, 1, bias, 1, h, 1, Layer::out_size); - } - - const int dilation_rate; - const int kernel_size; - const int state_size; - - T*** kernelWeights; - T* bias; - T** state; - int state_ptr = 0; -}; - -} // namespace RTNeural - -#endif // CONV1DACCELERATE_H_INCLUDED diff --git a/RTNeural/conv1d/conv1d_accelerate.tpp b/RTNeural/conv1d/conv1d_accelerate.tpp deleted file mode 100644 index 33be6928..00000000 --- a/RTNeural/conv1d/conv1d_accelerate.tpp +++ /dev/null @@ -1,92 +0,0 @@ -#include "conv1d_accelerate.h" - -namespace RTNeural -{ - -template -Conv1D::Conv1D(int in_size, int out_size, int kernel_size, int dilation) - : Layer(in_size, out_size) - , dilation_rate(dilation) - , kernel_size(kernel_size) - , state_size(kernel_size * dilation) -{ - kernelWeights = new T**[out_size]; - for(int i = 0; i < out_size; ++i) - { - kernelWeights[i] = new T*[in_size]; - for(int k = 0; k < in_size; ++k) - { - kernelWeights[i][k] = new T[state_size]; - std::fill(kernelWeights[i][k], &kernelWeights[i][k][state_size], (T)0); - } - } - - bias = new T[out_size]; - - state = new T*[in_size]; - for(int k = 0; k < in_size; ++k) - state[k] = new T[2 * state_size]; -} - -template -Conv1D::Conv1D(std::initializer_list sizes) - : Conv1D(*sizes.begin(), *(sizes.begin() + 1), *(sizes.begin() + 2), *(sizes.begin() + 3)) -{ -} - -template -Conv1D::Conv1D(const Conv1D& other) - : Conv1D(other.in_size, other.out_size, other.kernel_size, other.dilation_rate) -{ -} - -template -Conv1D& Conv1D::operator=(const Conv1D& other) -{ - return *this = Conv1D(other); -} - -template -Conv1D::~Conv1D() -{ - for(int i = 0; i < Layer::out_size; ++i) - { - for(int k = 0; k < Layer::in_size; ++k) - delete[] kernelWeights[i][k]; - - delete[] kernelWeights[i]; - } - - delete[] kernelWeights; - delete[] bias; - - for(int k = 0; k < Layer::in_size; ++k) - delete[] state[k]; - delete[] state; -} - -template -void Conv1D::reset() -{ - state_ptr = 0; - for(int k = 0; k < Layer::in_size; ++k) - std::fill(state[k], &state[k][2 * state_size], (T)0); -} - -template -void Conv1D::setWeights(const std::vector>>& weights) -{ - for(int i = 0; i < Layer::out_size; ++i) - for(int k = 0; k < Layer::in_size; ++k) - for(int j = 0; j < kernel_size; ++j) - kernelWeights[i][k][j * dilation_rate] = weights[i][k][j]; -} - -template -void Conv1D::setBias(const std::vector& biasVals) -{ - for(int i = 0; i < Layer::out_size; ++i) - bias[i] = biasVals[i]; -} - -} // namespace RTNeural diff --git a/RTNeural/conv2d/conv2d.tpp b/RTNeural/conv2d/conv2d.tpp index 77d224b9..cb03fc03 100644 --- a/RTNeural/conv2d/conv2d.tpp +++ b/RTNeural/conv2d/conv2d.tpp @@ -1,6 +1,6 @@ #include "conv2d.h" -#if !RTNEURAL_USE_EIGEN && !RTNEURAL_USE_XSIMD && !RTNEURAL_USE_ACCELERATE +#if !RTNEURAL_USE_EIGEN && !RTNEURAL_USE_XSIMD namespace RTNeural { diff --git a/RTNeural/dense/dense.h b/RTNeural/dense/dense.h index 5ea6a656..19d6b9ce 100644 --- a/RTNeural/dense/dense.h +++ b/RTNeural/dense/dense.h @@ -9,8 +9,6 @@ #include "dense_eigen.h" #elif RTNEURAL_USE_XSIMD #include "dense_xsimd.h" -#elif RTNEURAL_USE_ACCELERATE -#include "dense_accelerate.h" #else #include "../Layer.h" diff --git a/RTNeural/dense/dense_accelerate.h b/RTNeural/dense/dense_accelerate.h deleted file mode 100644 index 77d1333a..00000000 --- a/RTNeural/dense/dense_accelerate.h +++ /dev/null @@ -1,116 +0,0 @@ -#ifndef DENSEACCELERATE_H_INCLUDED -#define DENSEACCELERATE_H_INCLUDED - -#include "../Layer.h" -#include - -namespace RTNeural -{ - -/** Dynamic implementation of a fully-connected (dense) layer. */ -template -class Dense : public Layer -{ -public: - /** Constructs a dense layer for a given input and output size. */ - Dense(int in_size, int out_size) - : Layer(in_size, out_size) - { - sums = new T[out_size]; - bias = new T[out_size]; - weights = new T*[out_size]; - for(int i = 0; i < out_size; ++i) - weights[i] = new T[in_size]; - } - - Dense(std::initializer_list sizes) - : Dense(*sizes.begin(), *(sizes.begin() + 1)) - { - } - - Dense(const Dense& other) - : Dense(other.in_size, other.out_size) - { - } - - Dense& operator=(const Dense& other) - { - return *this = Dense(other); - } - - virtual ~Dense() - { - delete[] bias; - delete[] sums; - for(int i = 0; i < Layer::out_size; ++i) - delete[] weights[i]; - delete[] weights; - } - - /** Returns the name of this layer. */ - std::string getName() const noexcept override { return "dense"; } - - /** Performs forward propagation for this layer. */ - inline void forward(const T* input, T* out) noexcept override - { - forward_internal(input, out); - } - - /** Sets the layer weights from a given vector. */ - void setWeights(const std::vector>& newWeights) - { - for(int i = 0; i < Layer::out_size; ++i) - for(int k = 0; k < Layer::in_size; ++k) - weights[i][k] = newWeights[i][k]; - } - - /** Sets the layer weights from a given array. */ - void setWeights(T** newWeights) - { - for(int i = 0; i < Layer::out_size; ++i) - for(int k = 0; k < Layer::in_size; ++k) - weights[i][k] = newWeights[i][k]; - } - - /** Sets the layer bias from a given array. */ - void setBias(const T* b) - { - for(int i = 0; i < Layer::out_size; ++i) - bias[i] = b[i]; - } - - /** Returns the weights value at the given indices. */ - T getWeight(int i, int k) const noexcept { return weights[i][k]; } - - /** Returns the bias value at the given index. */ - T getBias(int i) const noexcept { return bias[i]; } - -private: - template - inline typename std::enable_if::value>::type - forward_internal(const float* input, float* out) noexcept - { - for(int l = 0; l < Layer::out_size; ++l) - vDSP_dotpr(input, 1, weights[l], 1, &sums[l], Layer::in_size); - - vDSP_vadd(sums, 1, bias, 1, out, 1, Layer::out_size); - } - - template - inline typename std::enable_if::value>::type - forward_internal(const double* input, double* out) noexcept - { - for(int l = 0; l < Layer::out_size; ++l) - vDSP_dotprD(input, 1, weights[l], 1, &sums[l], Layer::in_size); - - vDSP_vaddD(sums, 1, bias, 1, out, 1, Layer::out_size); - } - - T* bias; - T** weights; - T* sums; -}; - -} // namespace RTNeural - -#endif // DENSEACCELERATE_H_INCLUDED diff --git a/RTNeural/dense/dense_eigen.h b/RTNeural/dense/dense_eigen.h index b3c5c8e4..09fb4bbe 100644 --- a/RTNeural/dense/dense_eigen.h +++ b/RTNeural/dense/dense_eigen.h @@ -50,7 +50,7 @@ class Dense : public Layer /** Performs forward propagation for this layer. */ inline void forward(const T* input, T* out) noexcept override { - for (int i = 0; i < Layer::in_size; ++i) + for(int i = 0; i < Layer::in_size; ++i) inVec(i, 0) = input[i]; /** @@ -59,7 +59,7 @@ class Dense : public Layer */ outVec.noalias() = weights * inVec; - for (int i = 0; i < Layer::out_size; ++i) + for(int i = 0; i < Layer::out_size; ++i) out[i] = outVec(i, 0); } @@ -149,7 +149,7 @@ class DenseT /** Performs forward propagation for this layer. */ inline void forward(const Eigen::Matrix& ins) noexcept { - for (int i = 0; i < in_size; ++i) + for(int i = 0; i < in_size; ++i) ins_internal(i, 0) = ins(i, 0); /** diff --git a/RTNeural/gru/gru.h b/RTNeural/gru/gru.h index 29d1394d..1d1bcb96 100644 --- a/RTNeural/gru/gru.h +++ b/RTNeural/gru/gru.h @@ -9,9 +9,6 @@ #elif RTNEURAL_USE_XSIMD #include "gru_xsimd.h" #include "gru_xsimd.tpp" -#elif RTNEURAL_USE_ACCELERATE -#include "gru_accelerate.h" -#include "gru_accelerate.tpp" #else #include "../Layer.h" #include "../common.h" diff --git a/RTNeural/gru/gru.tpp b/RTNeural/gru/gru.tpp index ed0fba64..98cbcebd 100644 --- a/RTNeural/gru/gru.tpp +++ b/RTNeural/gru/gru.tpp @@ -3,7 +3,7 @@ namespace RTNeural { -#if !RTNEURAL_USE_EIGEN && !RTNEURAL_USE_XSIMD && !RTNEURAL_USE_ACCELERATE +#if !RTNEURAL_USE_EIGEN && !RTNEURAL_USE_XSIMD template GRULayer::GRULayer(int in_size, int out_size) : Layer(in_size, out_size) diff --git a/RTNeural/gru/gru_accelerate.h b/RTNeural/gru/gru_accelerate.h deleted file mode 100644 index 4abfc1e4..00000000 --- a/RTNeural/gru/gru_accelerate.h +++ /dev/null @@ -1,177 +0,0 @@ -#ifndef GRUACCELERATE_H_INCLUDED -#define GRUACCELERATE_H_INCLUDED - -#include "../Layer.h" -#include "../common.h" -#include - -namespace RTNeural -{ - -/** Dynamic implementation of a gated recurrent unit (GRU) layer. */ -template -class GRULayer : public Layer -{ -public: - /** Constructs a GRU layer for a given input and output size. */ - GRULayer(int in_size, int out_size); - GRULayer(std::initializer_list sizes); - GRULayer(const GRULayer& other); - GRULayer& operator=(const GRULayer& other); - virtual ~GRULayer(); - - /** Resets the state of the GRU. */ - void reset() override { std::fill(ht1, ht1 + Layer::out_size, (T)0); } - - /** Returns the name of this layer. */ - std::string getName() const noexcept override { return "gru"; } - - /** Performs forward propagation for this layer. */ - virtual inline void forward(const T* input, T* h) noexcept override - { - forward_internal(input, h); - } - - /** Sets the layer kernel weights. */ - void setWVals(T** wVals); - - /** Sets the layer recurrent weights. */ - void setUVals(T** uVals); - - /** Sets the layer biases. */ - void setBVals(T** bVals); - - /** Sets the layer kernel weights. */ - void setWVals(const std::vector>& wVals); - - /** Sets the layer recurrent weights. */ - void setUVals(const std::vector>& uVals); - - /** Sets the layer biases. */ - void setBVals(const std::vector>& bVals); - - T getWVal(int i, int k) const noexcept; - T getUVal(int i, int k) const noexcept; - T getBVal(int i, int k) const noexcept; - -protected: - template - inline typename std::enable_if::value>::type - forward_internal(const float* input, float* h) noexcept - { - float dotpr_out; - for(int i = 0; i < Layer::out_size; ++i) - { - vDSP_dotpr(zWeights.W[i], 1, input, 1, &dotpr_out, Layer::in_size); - zVec[i] = dotpr_out; - vDSP_dotpr(zWeights.U[i], 1, ht1, 1, &dotpr_out, Layer::out_size); - zVec[i] += dotpr_out; - - vDSP_dotpr(rWeights.W[i], 1, input, 1, &dotpr_out, Layer::in_size); - rVec[i] = dotpr_out; - vDSP_dotpr(rWeights.U[i], 1, ht1, 1, &dotpr_out, Layer::out_size); - rVec[i] += dotpr_out; - - vDSP_dotpr(cWeights.W[i], 1, input, 1, &dotpr_out, Layer::in_size); - cVec[i] = dotpr_out; - vDSP_dotpr(cWeights.U[i], 1, ht1, 1, &dotpr_out, Layer::out_size); - cTmp[i] = dotpr_out; - } - - vDSP_vadd(zVec, 1, zWeights.b[0], 1, zVec, 1, Layer::out_size); - vDSP_vadd(zVec, 1, zWeights.b[1], 1, zVec, 1, Layer::out_size); - sigmoid(zVec, zVec, Layer::out_size); - - vDSP_vadd(rVec, 1, rWeights.b[0], 1, rVec, 1, Layer::out_size); - vDSP_vadd(rVec, 1, rWeights.b[1], 1, rVec, 1, Layer::out_size); - sigmoid(rVec, rVec, Layer::out_size); - - vDSP_vadd(cTmp, 1, cWeights.b[1], 1, cTmp, 1, Layer::out_size); - vDSP_vmul(cTmp, 1, rVec, 1, cTmp, 1, Layer::out_size); - vDSP_vadd(cTmp, 1, cVec, 1, cVec, 1, Layer::out_size); - vDSP_vadd(cVec, 1, cWeights.b[0], 1, cVec, 1, Layer::out_size); - const auto dim_int = static_cast(Layer::out_size); - vvtanhf(cVec, cVec, &dim_int); - - vDSP_vsub(zVec, 1, ones, 1, h, 1, Layer::out_size); - vDSP_vmul(h, 1, cVec, 1, h, 1, Layer::out_size); - vDSP_vmul(zVec, 1, ht1, 1, ht1, 1, Layer::out_size); - vDSP_vadd(h, 1, ht1, 1, h, 1, Layer::out_size); - - cblas_scopy((int)Layer::out_size, h, 1, ht1, 1); - } - - template - inline typename std::enable_if::value>::type - forward_internal(const double* input, double* h) noexcept - { - double dotpr_out; - for(int i = 0; i < Layer::out_size; ++i) - { - vDSP_dotprD(zWeights.W[i], 1, input, 1, &dotpr_out, Layer::in_size); - zVec[i] = dotpr_out; - vDSP_dotprD(zWeights.U[i], 1, ht1, 1, &dotpr_out, Layer::out_size); - zVec[i] += dotpr_out; - - vDSP_dotprD(rWeights.W[i], 1, input, 1, &dotpr_out, Layer::in_size); - rVec[i] = dotpr_out; - vDSP_dotprD(rWeights.U[i], 1, ht1, 1, &dotpr_out, Layer::out_size); - rVec[i] += dotpr_out; - - vDSP_dotprD(cWeights.W[i], 1, input, 1, &dotpr_out, Layer::in_size); - cVec[i] = dotpr_out; - vDSP_dotprD(cWeights.U[i], 1, ht1, 1, &dotpr_out, Layer::out_size); - cTmp[i] = dotpr_out; - } - - vDSP_vaddD(zVec, 1, zWeights.b[0], 1, zVec, 1, Layer::out_size); - vDSP_vaddD(zVec, 1, zWeights.b[1], 1, zVec, 1, Layer::out_size); - sigmoid(zVec, zVec, Layer::out_size); - - vDSP_vaddD(rVec, 1, rWeights.b[0], 1, rVec, 1, Layer::out_size); - vDSP_vaddD(rVec, 1, rWeights.b[1], 1, rVec, 1, Layer::out_size); - sigmoid(rVec, rVec, Layer::out_size); - - vDSP_vaddD(cTmp, 1, cWeights.b[1], 1, cTmp, 1, Layer::out_size); - vDSP_vmulD(cTmp, 1, rVec, 1, cTmp, 1, Layer::out_size); - vDSP_vaddD(cTmp, 1, cVec, 1, cVec, 1, Layer::out_size); - vDSP_vaddD(cVec, 1, cWeights.b[0], 1, cVec, 1, Layer::out_size); - const auto dim_int = static_cast(Layer::out_size); - vvtanh(cVec, cVec, &dim_int); - - vDSP_vsubD(zVec, 1, ones, 1, h, 1, Layer::out_size); - vDSP_vmulD(h, 1, cVec, 1, h, 1, Layer::out_size); - vDSP_vmulD(zVec, 1, ht1, 1, ht1, 1, Layer::out_size); - vDSP_vaddD(h, 1, ht1, 1, h, 1, Layer::out_size); - - cblas_dcopy((int)Layer::out_size, h, 1, ht1, 1); - } - - T* ht1; - - struct WeightSet - { - WeightSet(int in_size, int out_size); - ~WeightSet(); - - T** W; - T** U; - T* b[2]; - const int out_size; - }; - - WeightSet zWeights; - WeightSet rWeights; - WeightSet cWeights; - - T* zVec; - T* rVec; - T* cVec; - T* cTmp; - - T* ones; -}; - -} // namespace RTNeural - -#endif // GRUACCELERATE_H_INCLUDED diff --git a/RTNeural/gru/gru_accelerate.tpp b/RTNeural/gru/gru_accelerate.tpp deleted file mode 100644 index aaf30e37..00000000 --- a/RTNeural/gru/gru_accelerate.tpp +++ /dev/null @@ -1,223 +0,0 @@ -#include "gru_accelerate.h" - -namespace RTNeural -{ - -template -GRULayer::GRULayer(int in_size, int out_size) - : Layer(in_size, out_size) - , zWeights(in_size, out_size) - , rWeights(in_size, out_size) - , cWeights(in_size, out_size) -{ - ht1 = new T[out_size]; - zVec = new T[out_size]; - rVec = new T[out_size]; - cVec = new T[out_size]; - cTmp = new T[out_size]; - - ones = new T[out_size]; - std::fill(ones, &ones[out_size], (T)1); -} - -template -GRULayer::GRULayer(std::initializer_list sizes) - : GRULayer(*sizes.begin(), *(sizes.begin() + 1)) -{ -} - -template -GRULayer::GRULayer(const GRULayer& other) - : GRULayer(other.in_size, other.out_size) -{ -} - -template -GRULayer& GRULayer::operator=(const GRULayer& other) -{ - return *this = GRULayer(other); -} - -template -GRULayer::~GRULayer() -{ - delete[] ht1; - delete[] zVec; - delete[] rVec; - delete[] cVec; - delete[] cTmp; - - delete[] ones; -} - -template -GRULayer::WeightSet::WeightSet(int in_size, int out_size) - : out_size(out_size) -{ - W = new T*[out_size]; - U = new T*[out_size]; - b[0] = new T[out_size]; - b[1] = new T[out_size]; - - for(int i = 0; i < out_size; ++i) - { - W[i] = new T[in_size]; - U[i] = new T[out_size]; - } -} - -template -GRULayer::WeightSet::~WeightSet() -{ - delete[] b[0]; - delete[] b[1]; - - for(int i = 0; i < out_size; ++i) - { - delete[] W[i]; - delete[] U[i]; - } - - delete[] W; - delete[] U; -} - -template -void GRULayer::setWVals(const std::vector>& wVals) -{ - for(int i = 0; i < Layer::in_size; ++i) - { - for(int k = 0; k < Layer::out_size; ++k) - { - zWeights.W[k][i] = wVals[i][k]; - rWeights.W[k][i] = wVals[i][k + Layer::out_size]; - cWeights.W[k][i] = wVals[i][k + Layer::out_size * 2]; - } - } -} - -template -void GRULayer::setWVals(T** wVals) -{ - for(int i = 0; i < Layer::in_size; ++i) - { - for(int k = 0; k < Layer::out_size; ++k) - { - zWeights.W[k][i] = wVals[i][k]; - rWeights.W[k][i] = wVals[i][k + Layer::out_size]; - cWeights.W[k][i] = wVals[i][k + Layer::out_size * 2]; - } - } -} - -template -void GRULayer::setUVals(const std::vector>& uVals) -{ - for(int i = 0; i < Layer::out_size; ++i) - { - for(int k = 0; k < Layer::out_size; ++k) - { - zWeights.U[k][i] = uVals[i][k]; - rWeights.U[k][i] = uVals[i][k + Layer::out_size]; - cWeights.U[k][i] = uVals[i][k + Layer::out_size * 2]; - } - } -} - -template -void GRULayer::setUVals(T** uVals) -{ - for(int i = 0; i < Layer::out_size; ++i) - { - for(int k = 0; k < Layer::out_size; ++k) - { - zWeights.U[k][i] = uVals[i][k]; - rWeights.U[k][i] = uVals[i][k + Layer::out_size]; - cWeights.U[k][i] = uVals[i][k + Layer::out_size * 2]; - } - } -} - -template -void GRULayer::setBVals(const std::vector>& bVals) -{ - for(int i = 0; i < 2; ++i) - { - for(int k = 0; k < Layer::out_size; ++k) - { - zWeights.b[i][k] = bVals[i][k]; - rWeights.b[i][k] = bVals[i][k + Layer::out_size]; - cWeights.b[i][k] = bVals[i][k + Layer::out_size * 2]; - } - } -} - -template -void GRULayer::setBVals(T** bVals) -{ - for(int i = 0; i < 2; ++i) - { - for(int k = 0; k < Layer::out_size; ++k) - { - zWeights.b[i][k] = bVals[i][k]; - rWeights.b[i][k] = bVals[i][k + Layer::out_size]; - cWeights.b[i][k] = bVals[i][k + Layer::out_size * 2]; - } - } -} - -template -T GRULayer::getWVal(int i, int k) const noexcept -{ - T** set = zWeights.W; - if(k > 2 * Layer::out_size) - { - k -= 2 * Layer::out_size; - set = cWeights.W; - } - else if(k > Layer::out_size) - { - k -= Layer::out_size; - set = rWeights.W; - } - - return set[i][k]; -} - -template -T GRULayer::getUVal(int i, int k) const noexcept -{ - T** set = zWeights.U; - if(k > 2 * Layer::out_size) - { - k -= 2 * Layer::out_size; - set = cWeights.U; - } - else if(k > Layer::out_size) - { - k -= Layer::out_size; - set = rWeights.U; - } - - return set[i][k]; -} - -template -T GRULayer::getBVal(int i, int k) const noexcept -{ - T** set = zWeights.b; - if(k > 2 * Layer::out_size) - { - k -= 2 * Layer::out_size; - set = cWeights.b; - } - else if(k > Layer::out_size) - { - k -= Layer::out_size; - set = rWeights.b; - } - - return set[i][k]; -} - -} // namespace RTNeural diff --git a/RTNeural/gru/gru_eigen.h b/RTNeural/gru/gru_eigen.h index 619e25ba..38f27e7c 100644 --- a/RTNeural/gru/gru_eigen.h +++ b/RTNeural/gru/gru_eigen.h @@ -39,7 +39,7 @@ class GRULayer : public Layer /** Performs forward propagation for this layer. */ inline void forward(const T* input, T* h) noexcept override { - for (int i = 0; i < Layer::in_size; ++i) + for(int i = 0; i < Layer::in_size; ++i) { extendedInVec(i) = input[i]; } @@ -60,17 +60,14 @@ class GRULayer : public Layer * gamma = sigmoid( | z | = sigmoid(alpha[0 : 2*out_sizet] + beta[0 : 2*out_sizet]) * | r | ) */ - gammaVec.noalias() = alphaVec.segment(0, 2 * Layer::out_size) + - betaVec.segment(0, 2 * Layer::out_size); + gammaVec.noalias() = alphaVec.segment(0, 2 * Layer::out_size) + betaVec.segment(0, 2 * Layer::out_size); sigmoid(gammaVec); /** * c = tanh( alpha[2*out_sizet : 3*out_sizet] + r.cwiseProduct(beta[2*out_sizet : 3*out_sizet] ) * i.e. c = tanh( Wc * input + bc[0] + r.cwiseProduct(Uc * h(t-1) + bc[1]) ) */ - cVec.noalias() = alphaVec.segment(2 * Layer::out_size, Layer::out_size) + - gammaVec.segment(Layer::out_size, Layer::out_size).cwiseProduct( - betaVec.segment(2 * Layer::out_size, Layer::out_size)); + cVec.noalias() = alphaVec.segment(2 * Layer::out_size, Layer::out_size) + gammaVec.segment(Layer::out_size, Layer::out_size).cwiseProduct(betaVec.segment(2 * Layer::out_size, Layer::out_size)); cVec = cVec.array().tanh(); /** @@ -78,11 +75,9 @@ class GRULayer : public Layer * = c - z.cwiseProduct(c) + z.cwiseProduct(ht(t-1)) * = c + z.cwiseProduct(h(t-1) - c) */ - extendedHt1.segment(0, Layer::out_size) = - cVec + gammaVec.segment(0, Layer::out_size).cwiseProduct( - extendedHt1.segment(0, Layer::out_size) - cVec); + extendedHt1.segment(0, Layer::out_size) = cVec + gammaVec.segment(0, Layer::out_size).cwiseProduct(extendedHt1.segment(0, Layer::out_size) - cVec); - for (int i = 0; i < Layer::out_size; ++i) + for(int i = 0; i < Layer::out_size; ++i) { h[i] = extendedHt1(i); } @@ -145,7 +140,6 @@ class GRULayer : public Layer Eigen::Matrix betaVec; Eigen::Matrix gammaVec; Eigen::Matrix cVec; - }; //==================================================== @@ -199,7 +193,7 @@ class GRULayerT /** Performs forward propagation for this layer. */ inline void forward(const in_type& ins) noexcept { - for (int i = 0; i < in_sizet; ++i) + for(int i = 0; i < in_sizet; ++i) { extendedInVec(i) = ins(i); } @@ -220,16 +214,13 @@ class GRULayerT * gamma = sigmoid( | z | = sigmoid(alpha[0 : 2*out_sizet] + beta[0 : 2*out_sizet]) * | r | ) */ - gammaVec = sigmoid(alphaVec.segment(0, 2 * out_sizet) + - betaVec.segment(0, 2 * out_sizet)); + gammaVec = sigmoid(alphaVec.segment(0, 2 * out_sizet) + betaVec.segment(0, 2 * out_sizet)); /** * c = tanh( alpha[2*out_sizet : 3*out_sizet] + r.cwiseProduct(beta[2*out_sizet : 3*out_sizet] ) * i.e. c = tanh( Wc * input + bc[0] + r.cwiseProduct(Uc * h(t-1) + bc[1]) ) */ - cVec.noalias() = alphaVec.segment(2 * out_sizet, out_sizet) + - gammaVec.segment(out_sizet, out_sizet).cwiseProduct( - betaVec.segment(2 * out_sizet, out_sizet)); + cVec.noalias() = alphaVec.segment(2 * out_sizet, out_sizet) + gammaVec.segment(out_sizet, out_sizet).cwiseProduct(betaVec.segment(2 * out_sizet, out_sizet)); cVec = cVec.array().tanh(); /** @@ -237,9 +228,7 @@ class GRULayerT * = c - z.cwiseProduct(c) + z.cwiseProduct(ht(t-1)) * = c + z.cwiseProduct(h(t-1) - c) */ - extendedHt1.segment(0, out_sizet) = - cVec + gammaVec.segment(0, out_sizet).cwiseProduct( - extendedHt1.segment(0, out_sizet) - cVec); + extendedHt1.segment(0, out_sizet) = cVec + gammaVec.segment(0, out_sizet).cwiseProduct(extendedHt1.segment(0, out_sizet) - cVec); computeOutput(); } @@ -274,7 +263,7 @@ class GRULayerT inline std::enable_if_t computeOutput() noexcept { - for (int i = 0; i < out_sizet; ++i) + for(int i = 0; i < out_sizet; ++i) { outs(i) = extendedHt1(i); } @@ -284,14 +273,14 @@ class GRULayerT inline std::enable_if_t computeOutput() noexcept { - for (int i = 0; i < out_sizet; ++i) + for(int i = 0; i < out_sizet; ++i) { outs_delayed[delayWriteIdx][i] = extendedHt1(i); } processDelay(outs_delayed, outs, delayWriteIdx); - for (int i = 0; i < out_sizet; ++i) + for(int i = 0; i < out_sizet; ++i) { extendedHt1(i) = outs(i); } diff --git a/RTNeural/gru/gru_eigen.tpp b/RTNeural/gru/gru_eigen.tpp index fc09ecfa..f40f0c3a 100644 --- a/RTNeural/gru/gru_eigen.tpp +++ b/RTNeural/gru/gru_eigen.tpp @@ -11,15 +11,15 @@ GRULayer::GRULayer(int in_size, int out_size) { wCombinedWeights = Eigen::Matrix::Zero(3 * out_size, in_size + 1); uCombinedWeights = Eigen::Matrix::Zero(3 * out_size, out_size + 1); - extendedInVec = Eigen::Matrix::Zero(in_size + 1); - extendedHt1 = Eigen::Matrix::Zero(out_size + 1); + extendedInVec = Eigen::Matrix::Zero(in_size + 1); + extendedHt1 = Eigen::Matrix::Zero(out_size + 1); extendedInVec(Layer::in_size) = (T)1; extendedHt1(Layer::out_size) = (T)1; alphaVec = Eigen::Matrix::Zero(3 * out_size); - betaVec = Eigen::Matrix::Zero(3 * out_size); + betaVec = Eigen::Matrix::Zero(3 * out_size); gammaVec = Eigen::Matrix::Zero(2 * out_size); - cVec = Eigen::Matrix::Zero(out_size); + cVec = Eigen::Matrix::Zero(out_size); } template @@ -124,7 +124,7 @@ template T GRULayer::getBVal(int i, int k) const noexcept { T val; - if (i == 0) + if(i == 0) { val = wCombinedWeights[k][Layer::in_size]; } diff --git a/RTNeural/lstm/lstm.h b/RTNeural/lstm/lstm.h index f312ca18..3dea391e 100644 --- a/RTNeural/lstm/lstm.h +++ b/RTNeural/lstm/lstm.h @@ -7,9 +7,6 @@ #elif RTNEURAL_USE_XSIMD #include "lstm_xsimd.h" #include "lstm_xsimd.tpp" -#elif RTNEURAL_USE_ACCELERATE -#include "lstm_accelerate.h" -#include "lstm_accelerate.tpp" #else #include "../Layer.h" #include "../common.h" diff --git a/RTNeural/lstm/lstm.tpp b/RTNeural/lstm/lstm.tpp index de0cc3b7..0c1d2473 100644 --- a/RTNeural/lstm/lstm.tpp +++ b/RTNeural/lstm/lstm.tpp @@ -3,7 +3,7 @@ namespace RTNeural { -#if !RTNEURAL_USE_EIGEN && !RTNEURAL_USE_XSIMD && !RTNEURAL_USE_ACCELERATE +#if !RTNEURAL_USE_EIGEN && !RTNEURAL_USE_XSIMD template LSTMLayer::LSTMLayer(int in_size, int out_size) diff --git a/RTNeural/lstm/lstm_accelerate.h b/RTNeural/lstm/lstm_accelerate.h deleted file mode 100644 index 800d3d61..00000000 --- a/RTNeural/lstm/lstm_accelerate.h +++ /dev/null @@ -1,175 +0,0 @@ -#ifndef LSTMACCELERATE_H_INCLUDED -#define LSTMACCELERATE_H_INCLUDED - -#include "../common.h" - -namespace RTNeural -{ - -/** Dynamic implementation of a LSTM layer. */ -template -class LSTMLayer : public Layer -{ -public: - /** Constructs a LSTM layer for a given input and output size. */ - LSTMLayer(int in_size, int out_size); - LSTMLayer(std::initializer_list sizes); - LSTMLayer(const LSTMLayer& other); - LSTMLayer& operator=(const LSTMLayer& other); - virtual ~LSTMLayer(); - - /** Resets the state of the LSTM. */ - void reset() override; - - /** Returns the name of this layer. */ - std::string getName() const noexcept override { return "lstm"; } - - /** Performs forward propagation for this layer. */ - virtual inline void forward(const T* input, T* h) noexcept override - { - forward_internal(input, h); - } - - /** Sets the layer kernel weights. */ - void setWVals(const std::vector>& wVals); - - /** Sets the layer recurrent weights. */ - void setUVals(const std::vector>& uVals); - - /** Sets the layer biases. */ - void setBVals(const std::vector& bVals); - -protected: - template - inline typename std::enable_if::value>::type - forward_internal(const float* input, float* h) noexcept - { - float dotpr_out; - for(int i = 0; i < Layer::out_size; ++i) - { - vDSP_dotpr(fWeights.W[i], 1, input, 1, &dotpr_out, Layer::in_size); - fVec[i] = dotpr_out; - vDSP_dotpr(fWeights.U[i], 1, ht1, 1, &dotpr_out, Layer::out_size); - fVec[i] += dotpr_out; - - vDSP_dotpr(iWeights.W[i], 1, input, 1, &dotpr_out, Layer::in_size); - iVec[i] = dotpr_out; - vDSP_dotpr(iWeights.U[i], 1, ht1, 1, &dotpr_out, Layer::out_size); - iVec[i] += dotpr_out; - - vDSP_dotpr(oWeights.W[i], 1, input, 1, &dotpr_out, Layer::in_size); - oVec[i] = dotpr_out; - vDSP_dotpr(oWeights.U[i], 1, ht1, 1, &dotpr_out, Layer::out_size); - oVec[i] += dotpr_out; - - vDSP_dotpr(cWeights.W[i], 1, input, 1, &dotpr_out, Layer::in_size); - ctVec[i] = dotpr_out; - vDSP_dotpr(cWeights.U[i], 1, ht1, 1, &dotpr_out, Layer::out_size); - ctVec[i] += dotpr_out; - } - - vDSP_vadd(fVec, 1, fWeights.b, 1, fVec, 1, Layer::out_size); - sigmoid(fVec, fVec, Layer::out_size); - - vDSP_vadd(iVec, 1, iWeights.b, 1, iVec, 1, Layer::out_size); - sigmoid(iVec, iVec, Layer::out_size); - - vDSP_vadd(oVec, 1, oWeights.b, 1, oVec, 1, Layer::out_size); - sigmoid(oVec, oVec, Layer::out_size); - - vDSP_vadd(ctVec, 1, cWeights.b, 1, ctVec, 1, Layer::out_size); - const auto dim_int = static_cast(Layer::out_size); - vvtanhf(ctVec, ctVec, &dim_int); - - vDSP_vmul(fVec, 1, ct1, 1, cVec, 1, Layer::out_size); - vDSP_vmul(iVec, 1, ctVec, 1, ht1, 1, Layer::out_size); - vDSP_vadd(cVec, 1, ht1, 1, cVec, 1, Layer::out_size); - - vvtanhf(h, cVec, &dim_int); - vDSP_vmul(h, 1, oVec, 1, h, 1, Layer::out_size); - - cblas_scopy(Layer::out_size, cVec, 1, ct1, 1); - cblas_scopy(Layer::out_size, h, 1, ht1, 1); - } - - template - inline typename std::enable_if::value>::type - forward_internal(const double* input, double* h) noexcept - { - double dotpr_out; - for(int i = 0; i < Layer::out_size; ++i) - { - vDSP_dotprD(fWeights.W[i], 1, input, 1, &dotpr_out, Layer::in_size); - fVec[i] = dotpr_out; - vDSP_dotprD(fWeights.U[i], 1, ht1, 1, &dotpr_out, Layer::out_size); - fVec[i] += dotpr_out; - - vDSP_dotprD(iWeights.W[i], 1, input, 1, &dotpr_out, Layer::in_size); - iVec[i] = dotpr_out; - vDSP_dotprD(iWeights.U[i], 1, ht1, 1, &dotpr_out, Layer::out_size); - iVec[i] += dotpr_out; - - vDSP_dotprD(oWeights.W[i], 1, input, 1, &dotpr_out, Layer::in_size); - oVec[i] = dotpr_out; - vDSP_dotprD(oWeights.U[i], 1, ht1, 1, &dotpr_out, Layer::out_size); - oVec[i] += dotpr_out; - - vDSP_dotprD(cWeights.W[i], 1, input, 1, &dotpr_out, Layer::in_size); - ctVec[i] = dotpr_out; - vDSP_dotprD(cWeights.U[i], 1, ht1, 1, &dotpr_out, Layer::out_size); - ctVec[i] += dotpr_out; - } - - vDSP_vaddD(fVec, 1, fWeights.b, 1, fVec, 1, Layer::out_size); - sigmoid(fVec, fVec, Layer::out_size); - - vDSP_vaddD(iVec, 1, iWeights.b, 1, iVec, 1, Layer::out_size); - sigmoid(iVec, iVec, Layer::out_size); - - vDSP_vaddD(oVec, 1, oWeights.b, 1, oVec, 1, Layer::out_size); - sigmoid(oVec, oVec, Layer::out_size); - - vDSP_vaddD(ctVec, 1, cWeights.b, 1, ctVec, 1, Layer::out_size); - const auto dim_int = static_cast(Layer::out_size); - vvtanh(ctVec, ctVec, &dim_int); - - vDSP_vmulD(fVec, 1, ct1, 1, cVec, 1, Layer::out_size); - vDSP_vmulD(iVec, 1, ctVec, 1, ht1, 1, Layer::out_size); - vDSP_vaddD(cVec, 1, ht1, 1, cVec, 1, Layer::out_size); - - vvtanh(h, cVec, &dim_int); - vDSP_vmulD(h, 1, oVec, 1, h, 1, Layer::out_size); - - cblas_dcopy((int)Layer::out_size, cVec, 1, ct1, 1); - cblas_dcopy((int)Layer::out_size, h, 1, ht1, 1); - } - - T* ht1; - T* ct1; - - struct WeightSet - { - WeightSet(int in_size, int out_size); - ~WeightSet(); - - T** W; - T** U; - T* b; - const int out_size; - }; - - WeightSet fWeights; - WeightSet iWeights; - WeightSet oWeights; - WeightSet cWeights; - - T* fVec; - T* iVec; - T* oVec; - T* ctVec; - T* cVec; -}; - -} // namespace RTNeural - -#endif // LSTMACCELERATE_H_INCLUDED diff --git a/RTNeural/lstm/lstm_accelerate.tpp b/RTNeural/lstm/lstm_accelerate.tpp deleted file mode 100644 index 12afdfe9..00000000 --- a/RTNeural/lstm/lstm_accelerate.tpp +++ /dev/null @@ -1,134 +0,0 @@ -#include "lstm_accelerate.h" - -namespace RTNeural -{ - -template -LSTMLayer::LSTMLayer(int in_size, int out_size) - : Layer(in_size, out_size) - , fWeights(in_size, out_size) - , iWeights(in_size, out_size) - , oWeights(in_size, out_size) - , cWeights(in_size, out_size) -{ - ht1 = new T[out_size]; - ct1 = new T[out_size]; - - fVec = new T[out_size]; - iVec = new T[out_size]; - oVec = new T[out_size]; - ctVec = new T[out_size]; - cVec = new T[out_size]; -} - -template -LSTMLayer::LSTMLayer(std::initializer_list sizes) - : LSTMLayer(*sizes.begin(), *(sizes.begin() + 1)) -{ -} - -template -LSTMLayer::LSTMLayer(const LSTMLayer& other) - : LSTMLayer(other.in_size, other.out_size) -{ -} - -template -LSTMLayer& LSTMLayer::operator=(const LSTMLayer& other) -{ - return *this = LSTMLayer(other); -} - -template -LSTMLayer::~LSTMLayer() -{ - delete[] ht1; - delete[] ct1; - - delete[] fVec; - delete[] iVec; - delete[] oVec; - delete[] ctVec; - delete[] cVec; -} - -template -void LSTMLayer::reset() -{ - std::fill(ht1, ht1 + Layer::out_size, (T)0); - std::fill(ct1, ct1 + Layer::out_size, (T)0); -} - -template -LSTMLayer::WeightSet::WeightSet(int in_size, int out_size) - : out_size(out_size) -{ - W = new T*[out_size]; - U = new T*[out_size]; - b = new T[out_size]; - - for(int i = 0; i < out_size; ++i) - { - W[i] = new T[in_size]; - U[i] = new T[out_size]; - } -} - -template -LSTMLayer::WeightSet::~WeightSet() -{ - delete[] b; - - for(int i = 0; i < out_size; ++i) - { - delete[] W[i]; - delete[] U[i]; - } - - delete[] W; - delete[] U; -} - -template -void LSTMLayer::setWVals(const std::vector>& wVals) -{ - for(int i = 0; i < Layer::in_size; ++i) - { - for(int k = 0; k < Layer::out_size; ++k) - { - iWeights.W[k][i] = wVals[i][k]; - fWeights.W[k][i] = wVals[i][k + Layer::out_size]; - cWeights.W[k][i] = wVals[i][k + Layer::out_size * 2]; - oWeights.W[k][i] = wVals[i][k + Layer::out_size * 3]; - } - } -} - -template -void LSTMLayer::setUVals(const std::vector>& uVals) -{ - for(int i = 0; i < Layer::out_size; ++i) - { - for(int k = 0; k < Layer::out_size; ++k) - { - iWeights.U[k][i] = uVals[i][k]; - fWeights.U[k][i] = uVals[i][k + Layer::out_size]; - cWeights.U[k][i] = uVals[i][k + Layer::out_size * 2]; - oWeights.U[k][i] = uVals[i][k + Layer::out_size * 3]; - } - } -} - -template -void LSTMLayer::setBVals(const std::vector& bVals) -{ - for(int k = 0; k < Layer::out_size; ++k) - { - iWeights.b[k] = bVals[k]; - fWeights.b[k] = bVals[k + Layer::out_size]; - cWeights.b[k] = bVals[k + Layer::out_size * 2]; - oWeights.b[k] = bVals[k + Layer::out_size * 3]; - } -} - -} // namespace RTNeural diff --git a/RTNeural/lstm/lstm_eigen.h b/RTNeural/lstm/lstm_eigen.h index 186afb16..c14f52ea 100644 --- a/RTNeural/lstm/lstm_eigen.h +++ b/RTNeural/lstm/lstm_eigen.h @@ -48,21 +48,19 @@ class LSTMLayer : public Layer */ fioctVecs.noalias() = combinedWeights * extendedInVecHt1; - fioVecs = fioctVecs.segment(0, Layer::out_size * 3); ctVec = fioctVecs.segment(Layer::out_size * 3, Layer::out_size) - .array().tanh(); + .array() + .tanh(); sigmoid(fioVecs); - ct1 = fioVecs.segment(0, Layer::out_size).cwiseProduct(ct1) + - fioVecs.segment(Layer::out_size, Layer::out_size) - .cwiseProduct(ctVec); + ct1 = fioVecs.segment(0, Layer::out_size).cwiseProduct(ct1) + fioVecs.segment(Layer::out_size, Layer::out_size).cwiseProduct(ctVec); cTanhVec = ct1.array().tanh(); ht1 = fioVecs.segment(Layer::out_size * 2, Layer::out_size).cwiseProduct(cTanhVec); - for (int i = 0; i < Layer::out_size; ++i) + for(int i = 0; i < Layer::out_size; ++i) { h[i] = extendedInVecHt1(Layer::in_size + i) = ht1(i); } @@ -117,11 +115,11 @@ template ; - using extended_in_out_type = Eigen::Matrix; - using four_out_type = Eigen::Matrix; - using three_out_type = Eigen::Matrix; + using extended_in_out_type = Eigen::Matrix; + using four_out_type = Eigen::Matrix; + using three_out_type = Eigen::Matrix; - using in_type = Eigen::Matrix; + using in_type = Eigen::Matrix; using out_type = Eigen::Matrix; public: @@ -152,7 +150,7 @@ class LSTMLayerT /** Performs forward propagation for this layer. */ inline void forward(const in_type& ins) noexcept { - for (int i = 0; i < in_sizet; ++i) + for(int i = 0; i < in_sizet; ++i) { extendedInHt1Vec(i) = ins(i); } @@ -203,7 +201,7 @@ class LSTMLayerT { computeOutputsInternal(cVec, outs); - for (int i = 0; i < out_sizet; ++i) + for(int i = 0; i < out_sizet; ++i) { extendedInHt1Vec(in_sizet + i) = outs(i); } @@ -218,7 +216,7 @@ class LSTMLayerT processDelay(ct_delayed, cVec, delayWriteIdx); processDelay(outs_delayed, outs, delayWriteIdx); - for (int i = 0; i < out_sizet; ++i) + for(int i = 0; i < out_sizet; ++i) { extendedInHt1Vec(in_sizet + i) = outs(i); } @@ -229,9 +227,9 @@ class LSTMLayerT { cVecLocal.noalias() = fioVecs.segment(0, out_sizet) - .cwiseProduct(cVec) + .cwiseProduct(cVec) + fioVecs.segment(out_sizet, out_sizet) - .cwiseProduct(ctVec); + .cwiseProduct(ctVec); cTanhVec = cVecLocal.array().tanh(); outsVec.noalias() = fioVecs.segment(out_sizet * 2, out_sizet).cwiseProduct(cTanhVec); @@ -265,10 +263,10 @@ class LSTMLayerT // kernel weights weights_combined_type combinedWeights; - extended_in_out_type extendedInHt1Vec; - four_out_type fioctsVecs; - three_out_type fioVecs; - out_type cTanhVec; + extended_in_out_type extendedInHt1Vec; + four_out_type fioctsVecs; + three_out_type fioVecs; + out_type cTanhVec; // intermediate values out_type ctVec; diff --git a/RTNeural/lstm/lstm_eigen.tpp b/RTNeural/lstm/lstm_eigen.tpp index c09a82cb..66f974bc 100644 --- a/RTNeural/lstm/lstm_eigen.tpp +++ b/RTNeural/lstm/lstm_eigen.tpp @@ -7,13 +7,13 @@ template LSTMLayer::LSTMLayer(int in_size, int out_size) : Layer(in_size, out_size) { - combinedWeights = Eigen::Matrix::Zero(4 * out_size, in_size + out_size + 1); + combinedWeights = Eigen::Matrix::Zero(4 * out_size, in_size + out_size + 1); extendedInVecHt1 = Eigen::Matrix::Zero(in_size + out_size + 1); extendedInVecHt1(in_size + out_size) = (T)1; fioctVecs = Eigen::Matrix::Zero(4 * out_size); - fioVecs = Eigen::Matrix::Zero(3 * out_size); - ctVec = Eigen::Matrix::Zero(out_size); + fioVecs = Eigen::Matrix::Zero(3 * out_size); + ctVec = Eigen::Matrix::Zero(out_size); cTanhVec = Eigen::Matrix::Zero(out_size, 1); @@ -98,13 +98,13 @@ template ::LSTMLayerT() : outs(outs_internal) { - combinedWeights = weights_combined_type::Zero(); + combinedWeights = weights_combined_type::Zero(); extendedInHt1Vec = extended_in_out_type::Zero(); - fioctsVecs = four_out_type::Zero(); - fioVecs = three_out_type::Zero(); + fioctsVecs = four_out_type::Zero(); + fioVecs = three_out_type::Zero(); - ctVec = out_type::Zero(); - cTanhVec = out_type::Zero(); + ctVec = out_type::Zero(); + cTanhVec = out_type::Zero(); reset(); } diff --git a/cmake/ChooseBackend.cmake b/cmake/ChooseBackend.cmake index 28fb64bb..39e54caa 100644 --- a/cmake/ChooseBackend.cmake +++ b/cmake/ChooseBackend.cmake @@ -1,6 +1,5 @@ option(RTNEURAL_EIGEN "Use Eigen library for vector operations" OFF) option(RTNEURAL_XSIMD "Use xsimd library for vector operations" OFF) -option(RTNEURAL_ACCELERATE "Use Accelerate library for vector operations (Apple only)" OFF) option(RTNEURAL_STL "Use STL for all operations" OFF) if(RTNEURAL_EIGEN) message(STATUS "RTNeural -- Using Eigen backend") @@ -10,13 +9,6 @@ elseif(RTNEURAL_XSIMD) message(STATUS "RTNeural -- Using xsimd backend") target_compile_definitions(RTNeural PUBLIC RTNEURAL_USE_XSIMD=1) target_include_directories(RTNeural PUBLIC modules/xsimd/include) -elseif(RTNEURAL_ACCELERATE) - if(NOT APPLE) - message(FATAL_ERROR "RTNeural -- Accelerate is only supported on Apple platforms!") - endif() - message(STATUS "RTNeural -- Using Accelerate backend") - target_compile_definitions(RTNeural PUBLIC RTNEURAL_USE_ACCELERATE=1) - target_link_libraries(RTNeural PUBLIC "-framework Accelerate") elseif(RTNEURAL_STL) message(STATUS "RTNeural -- Using STL backend") else() diff --git a/tests/conv2d_model.h b/tests/conv2d_model.h index 01ce6fa2..8796744b 100644 --- a/tests/conv2d_model.h +++ b/tests/conv2d_model.h @@ -140,7 +140,6 @@ int conv2d_test() std::cout << "SUCCESS NON TEMPLATED!" << std::endl << std::endl; -#if MODELT_AVAILABLE // templated model std::vector yDataT(num_frames * num_features_out, (TestType)0); { @@ -183,7 +182,6 @@ int conv2d_test() std::cout << "SUCCESS TEMPLATED!" << std::endl; -#endif // MODELT_AVAILABLE #endif // ! RTNEURAL_USE_AVX return 0; } diff --git a/tests/model_test.hpp b/tests/model_test.hpp index 4e27d5f4..4fa1f65a 100644 --- a/tests/model_test.hpp +++ b/tests/model_test.hpp @@ -39,7 +39,6 @@ int model_test() processModel(*modelRef.get(), xData, yRefData); } -#if MODELT_AVAILABLE // templated model std::vector yData(xData.size(), (TestType)0); { @@ -81,7 +80,6 @@ int model_test() std::cout << "Maximum error: " << max_error << std::endl; return 1; } -#endif std::cout << "SUCCESS" << std::endl; return 0; diff --git a/tests/templated_tests.hpp b/tests/templated_tests.hpp index 2fbc7f92..27ff88e8 100644 --- a/tests/templated_tests.hpp +++ b/tests/templated_tests.hpp @@ -61,7 +61,6 @@ int templatedTests(std::string arg) using namespace RTNeural; using TestType = double; -#if MODELT_AVAILABLE int result = 0; if(arg == "dense") @@ -135,8 +134,4 @@ int templatedTests(std::string arg) } return result; - -#else // @TODO - return 0; -#endif // MODELT_AVAILABLE }