diff --git a/CHANGELOG.md b/CHANGELOG.md index 0cec85a4..ec239f55 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ All notable changes to this project will be documented in this file. ## [unreleased] +🐛 **fix:** use buffers for neuron selection in SelectNeurons1D ([#50](https://github.com/owkin/GrAIdient/pull/50))\ 🪜 **feat:** Softmax1D, DotProduct1D & Constant1D ([#49](https://github.com/owkin/GrAIdient/pull/49))\ 🪜 **feat:** remove activation from layer ([#47](https://github.com/owkin/GrAIdient/pull/47))\ 🎉 **refactor:** logo ([#46](https://github.com/owkin/GrAIdient/pull/46))\ diff --git a/Sources/GrAIdient/Layer1D/SelectNeurons1D.swift b/Sources/GrAIdient/Layer1D/SelectNeurons1D.swift index 44d09429..beb948f8 100644 --- a/Sources/GrAIdient/Layer1D/SelectNeurons1D.swift +++ b/Sources/GrAIdient/Layer1D/SelectNeurons1D.swift @@ -17,6 +17,17 @@ public class SelectNeurons1D: Layer1D /// List of coefficients to scale each selected neuron. let _coeffs: [Double] + /// + /// Indices of selected neurons. + /// Shape ~ (nbNeurons,). + /// + var _neuronsBuffer: MetalPrivateBuffer! = nil + /// + /// Coefficients of selected neurons. + /// Shape ~ (nbNeurons,). + /// + var _coeffsBuffer: MetalPrivateBuffer! = nil + private enum Keys: String, CodingKey { case neurons @@ -110,6 +121,50 @@ public class SelectNeurons1D: Layer1D return layer } + /// + /// Clean state resources in the GPU execution context. + /// + /// We clean the neurons' state (forward and backward). + /// + public override func resetKernelGPU() + { + super.resetKernelGPU() + _neuronsBuffer = nil + _coeffsBuffer = nil + } + + /// + /// Initialize state resources in the GPU execution context. + /// + /// We initialize the neurons' forward state. + /// + public override func checkStateForwardGPU(batchSize: Int) throws + { + try super.checkStateForwardGPU(batchSize: batchSize) + + if _neuronsBuffer == nil + { + _neuronsBuffer = MetalPrivateBuffer( + nbNeurons, deviceID: deviceID + ) + _coeffsBuffer = MetalPrivateBuffer( + nbNeurons, deviceID: deviceID + ) + + let neuronsPtr = _neuronsBuffer.shared.buffer + let coeffsPtr = _coeffsBuffer.shared.buffer + + for (num, neuron) in _neurons.enumerated() + { + neuronsPtr[num] = UInt32(neuron) + coeffsPtr[num] = Float(_coeffs[num]) + } + + MetalKernel.get.upload([_neuronsBuffer]) + MetalKernel.get.upload([_coeffsBuffer]) + } + } + /// /// Apply the forward pass of the Gradient Checking in CPU execution context. /// @@ -189,16 +244,6 @@ public class SelectNeurons1D: Layer1D let pNbNeurons: [UInt32] = [UInt32(nbNeurons)] let pNbNeuronsPrev: [UInt32] = [UInt32(layerPrev.nbNeurons)] let pNbBatch: [UInt32] = [UInt32(batchSize)] - var pNeurons = [UInt32]() - for neuron in _neurons - { - pNeurons.append(UInt32(neuron)) - } - var pCoeffs = [Float]() - for coeff in _coeffs - { - pCoeffs.append(Float(coeff)) - } let command = MetalKernel.get.createCommand( "selectNeurons1DForward", deviceID: deviceID @@ -206,8 +251,8 @@ public class SelectNeurons1D: Layer1D command.setBuffer(layerPrev.outs.metal, atIndex: 0) command.setBytes(pNbNeurons, atIndex: 1) command.setBytes(pNbNeuronsPrev, atIndex: 2) - command.setBytes(pNeurons, atIndex: 3) - command.setBytes(pCoeffs, atIndex: 4) + command.setBuffer(_neuronsBuffer.metal, atIndex: 3) + command.setBuffer(_coeffsBuffer.metal, atIndex: 4) command.setBytes(pNbBatch, atIndex: 5) command.setBuffer(outs.metal, atIndex: 6) @@ -260,19 +305,8 @@ public class SelectNeurons1D: Layer1D let pNbNeurons: [UInt32] = [UInt32(nbNeurons)] let pNbNeuronsPrev: [UInt32] = [UInt32(layerPrev.nbNeurons)] let pNbBatch: [UInt32] = [UInt32(batchSize)] - var pNeurons = [UInt32]() - for neuron in _neurons - { - pNeurons.append(UInt32(neuron)) - } - var pCoeffs = [Float]() - for coeff in _coeffs - { - pCoeffs.append(Float(coeff)) - } var command: MetalCommand - if layerPrev.dirty { let nbElems = layerPrev.delta.nbElems @@ -294,8 +328,8 @@ public class SelectNeurons1D: Layer1D command.setBuffer(delta.metal, atIndex: 0) command.setBytes(pNbNeurons, atIndex: 1) command.setBytes(pNbNeuronsPrev, atIndex: 2) - command.setBytes(pNeurons, atIndex: 3) - command.setBytes(pCoeffs, atIndex: 4) + command.setBuffer(_neuronsBuffer.metal, atIndex: 3) + command.setBuffer(_coeffsBuffer.metal, atIndex: 4) command.setBytes(pNbBatch, atIndex: 5) command.setBuffer(layerPrev.delta.metal, atIndex: 6)