Cache last channel layout when calling AudioFile.write. (#338)

* Support writing (1, 1)-shaped and (2, 2)-shaped buffers to files. * Add docs.
spotify · Jun 18, 2024 · 67511bf · 67511bf
1 parent e351860
commit 67511bf
Show file tree

Hide file tree

Showing 6 changed files with 184 additions and 20 deletions.
diff --git a/docs/reference/pedalboard.io.html b/docs/reference/pedalboard.io.html
@@ -981,6 +981,29 @@ <h1>The <code class="docutils literal notranslate"><span class="pre">pedalboard.
 <span class="sig-name descname"><span class="pre">write</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">samples</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">ndarray</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.12)"><span class="pre">None</span></a></span></span><a class="headerlink" href="#pedalboard.io.WriteableAudioFile.write" title="Permalink to this definition">#</a></dt>
 <dd><p>Encode an array of audio data and write it to this file. The number of channels in the array must match the number of channels used to open the file. The array may contain audio in any shape. If the file’s bit depth or format does not match the provided data type, the audio will be automatically converted.</p>
 <p>Arrays of type int8, int16, int32, float32, and float64 are supported. If an array of an unsupported <code class="docutils literal notranslate"><span class="pre">dtype</span></code> is provided, a <code class="docutils literal notranslate"><span class="pre">TypeError</span></code> will be raised.</p>
+<div class="admonition warning">
+<p class="admonition-title">Warning</p>
+<p>If an array of shape <code class="docutils literal notranslate"><span class="pre">(num_channels,</span> <span class="pre">num_channels)</span></code> is passed to this method before any other audio data is provided, an exception will be thrown, as the method will not be able to infer which dimension of the input corresponds to the number of channels and which dimension corresponds to the number of samples.</p>
+<p>To avoid this, first call this method with an array where the number of samples does not match the number of channels.</p>
+<p>The channel layout from the most recently provided input will be cached on the <code class="xref py py-class docutils literal notranslate"><span class="pre">WritableAudioFile</span></code> object and will be used if necessary to disambiguate the array layout:</p>
+<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="k">with</span> <span class="n">AudioFile</span><span class="p">(</span><span class="s2">&quot;my_file.mp3&quot;</span><span class="p">,</span> <span class="s2">&quot;w&quot;</span><span class="p">,</span> <span class="mi">44100</span><span class="p">,</span> <span class="n">num_channels</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span> <span class="k">as</span> <span class="n">f</span><span class="p">:</span>
+    <span class="c1"># This will throw an exception:</span>
+    <span class="n">f</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">((</span><span class="mi">2</span><span class="p">,</span> <span class="mi">2</span><span class="p">)))</span>
+    <span class="c1"># But this will work:</span>
+    <span class="n">f</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">((</span><span class="mi">2</span><span class="p">,</span> <span class="mi">1</span><span class="p">)))</span>
+    <span class="c1"># And now `f` expects an input shape of (num_channels, num_samples), so this works:</span>
+    <span class="n">f</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">((</span><span class="mi">2</span><span class="p">,</span> <span class="mi">2</span><span class="p">)))</span>
+
+<span class="c1"># Also an option: pass (0, num_channels) or (num_channels, 0) first</span>
+<span class="c1"># to hint that the input will be in that shape without writing anything:</span>
+<span class="k">with</span> <span class="n">AudioFile</span><span class="p">(</span><span class="s2">&quot;my_file.mp3&quot;</span><span class="p">,</span> <span class="s2">&quot;w&quot;</span><span class="p">,</span> <span class="mi">44100</span><span class="p">,</span> <span class="n">num_channels</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span> <span class="k">as</span> <span class="n">f</span><span class="p">:</span>
+    <span class="c1"># Pass a hint, but write nothing:</span>
+    <span class="n">f</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">((</span><span class="mi">2</span><span class="p">,</span> <span class="mi">0</span><span class="p">)))</span>
+    <span class="c1"># And now `f` expects an input shape of (num_channels, num_samples), so this works:</span>
+    <span class="n">f</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">zeros</span><span class="p">((</span><span class="mi">2</span><span class="p">,</span> <span class="mi">2</span><span class="p">)))</span>
+</pre></div>
+</div>
+</div>
 </dd></dl>
 
 <dl class="py property">

diff --git a/docs/searchindex.js b/docs/searchindex.js
diff --git a/pedalboard/BufferUtils.h b/pedalboard/BufferUtils.h
@@ -29,20 +29,44 @@ enum class ChannelLayout {
 
 template <typename T>
 ChannelLayout
-detectChannelLayout(const py::array_t<T, py::array::c_style> inputArray) {
+detectChannelLayout(const py::array_t<T, py::array::c_style> inputArray,
+                    std::optional<int> channelCountHint = {}) {
   py::buffer_info inputInfo = inputArray.request();
 
   if (inputInfo.ndim == 1) {
     return ChannelLayout::NotInterleaved;
   } else if (inputInfo.ndim == 2) {
+    if (channelCountHint) {
+      if (inputInfo.shape[0] == inputInfo.shape[1] && inputInfo.shape[0] > 1) {
+        throw std::runtime_error(
+            "Unable to determine channel layout from shape: (" +
+            std::to_string(inputInfo.shape[0]) + ", " +
+            std::to_string(inputInfo.shape[1]) + ").");
+      } else if (inputInfo.shape[0] == *channelCountHint) {
+        return ChannelLayout::NotInterleaved;
+      } else if (inputInfo.shape[1] == *channelCountHint) {
+        return ChannelLayout::Interleaved;
+      } else {
+        throw std::runtime_error(
+            "Unable to determine channel layout from shape: (" +
+            std::to_string(inputInfo.shape[0]) + ", " +
+            std::to_string(inputInfo.shape[1]) + ").");
+      }
+    }
+
     // Try to auto-detect the channel layout from the shape
     if (inputInfo.shape[1] < inputInfo.shape[0]) {
       return ChannelLayout::Interleaved;
     } else if (inputInfo.shape[0] < inputInfo.shape[1]) {
       return ChannelLayout::NotInterleaved;
+    } else if (inputInfo.shape[0] == 1 || inputInfo.shape[1] == 1) {
+      // Do we only have one sample? Then the layout doesn't matter:
+      return ChannelLayout::NotInterleaved;
     } else {
       throw std::runtime_error(
-          "Unable to determine channel layout from shape!");
+          "Unable to determine channel layout from shape: (" +
+          std::to_string(inputInfo.shape[0]) + ", " +
+          std::to_string(inputInfo.shape[1]) + ").");
     }
   } else {
     throw std::runtime_error("Number of input dimensions must be 1 or 2 (got " +

diff --git a/pedalboard/io/WriteableAudioFile.h b/pedalboard/io/WriteableAudioFile.h
@@ -484,7 +484,25 @@ class WriteableAudioFile
 
     unsigned int numChannels = 0;
     unsigned int numSamples = 0;
-    ChannelLayout inputChannelLayout = detectChannelLayout(inputArray);
+
+    if (lastChannelLayout) {
+      try {
+        lastChannelLayout = detectChannelLayout(inputArray, {getNumChannels()});
+      } catch (...) {
+        // Use the last cached layout.
+      }
+    } else {
+      // We have no cached layout; detect it now and raise if necessary:
+      try {
+        lastChannelLayout = detectChannelLayout(inputArray, {getNumChannels()});
+      } catch (const std::exception &e) {
+        throw std::runtime_error(
+            std::string(e.what()) +
+            " Provide a non-square array first to allow Pedalboard to "
+            "determine which dimension corresponds with the number of channels "
+            "and which dimension corresponds with the number of samples.");
+      }
+    }
 
     // Release the GIL when we do the writing, after we
     // already have a reference to the input array:
@@ -494,24 +512,15 @@ class WriteableAudioFile
       numSamples = inputInfo.shape[0];
       numChannels = 1;
     } else if (inputInfo.ndim == 2) {
-      // Try to auto-detect the channel layout from the shape
-      if (inputInfo.shape[0] == getNumChannels() &&
-          inputInfo.shape[1] == getNumChannels()) {
-        throw std::runtime_error(
-            "Unable to determine shape of audio input! Both dimensions have "
-            "the same shape. Expected " +
-            std::to_string(getNumChannels()) +
-            "-channel audio, with one dimension larger than the other.");
-      } else if (inputInfo.shape[1] == getNumChannels()) {
+      switch (*lastChannelLayout) {
+      case ChannelLayout::Interleaved:
         numSamples = inputInfo.shape[0];
         numChannels = inputInfo.shape[1];
-      } else if (inputInfo.shape[0] == getNumChannels()) {
+        break;
+      case ChannelLayout::NotInterleaved:
         numSamples = inputInfo.shape[1];
         numChannels = inputInfo.shape[0];
-      } else {
-        throw std::runtime_error(
-            "Unable to determine shape of audio input! Expected " +
-            std::to_string(getNumChannels()) + "-channel audio.");
+        break;
       }
     } else {
       throw std::runtime_error(
@@ -534,7 +543,7 @@ class WriteableAudioFile
     // differently. This loop is duplicated here to move the if statement
     // outside of the tight loop, as we don't need to re-check that the input
     // channel is still the same on every iteration of the loop.
-    switch (inputChannelLayout) {
+    switch (*lastChannelLayout) {
     case ChannelLayout::Interleaved: {
       std::vector<std::vector<SampleType>> deinterleaveBuffers;
 
@@ -867,6 +876,7 @@ class WriteableAudioFile
   PythonOutputStream *unsafeOutputStream = nullptr;
   juce::ReadWriteLock objectLock;
   int framesWritten = 0;
+  std::optional<ChannelLayout> lastChannelLayout = {};
 };
 
 inline py::class_<WriteableAudioFile, AudioFile,
@@ -1019,7 +1029,42 @@ inline void init_writeable_audio_file(
           "converted.\n\n"
           "Arrays of type int8, int16, int32, float32, and float64 are "
           "supported. If an array of an unsupported ``dtype`` is provided, a "
-          "``TypeError`` will be raised.")
+          "``TypeError`` will be raised.\n\n"
+          ".. warning::\n    If an array of shape ``(num_channels, "
+          "num_channels)`` is passed to this method before any other audio "
+          "data is provided, an exception will be thrown, as the method will "
+          "not be able to infer which dimension of the input corresponds to "
+          "the number of channels and which dimension corresponds to the "
+          "number of samples.\n\n    To avoid this, first call this method "
+          "with an array where the number of samples does not match the "
+          "number of channels.\n\n    The channel layout from the most "
+          "recently "
+          "provided input will be cached on the :py:class:`WritableAudioFile` "
+          "object and will be used if necessary to disambiguate the array "
+          "layout:\n\n"
+          "    .. code-block:: python\n\n"
+          "        with AudioFile(\"my_file.mp3\", \"w\", 44100, "
+          "num_channels=2) as f:\n"
+          "            # This will throw an exception:\n"
+          "            f.write(np.zeros((2, 2)))  \n"
+          "            # But this will work:\n"
+          "            f.write(np.zeros((2, 1)))\n"
+          "            # And now `f` expects an input shape of (num_channels, "
+          "num_samples), so this works:\n"
+          "            f.write(np.zeros((2, 2)))  \n"
+          "\n"
+          "        # Also an option: pass (0, num_channels) or (num_channels, "
+          "0) first\n"
+          "        # to hint that the input will be in that shape "
+          "without writing anything:\n"
+          "        with AudioFile(\"my_file.mp3\", \"w\", 44100, "
+          "num_channels=2) as f:\n"
+          "            # Pass a hint, but write nothing:\n"
+          "            f.write(np.zeros((2, 0)))  \n"
+          "            # And now `f` expects an input shape of (num_channels, "
+          "num_samples), so this works:\n"
+          "            f.write(np.zeros((2, 2)))  \n"
+          "\n")
       .def("flush", &WriteableAudioFile::flush,
            "Attempt to flush this audio file's contents to disk. Not all "
            "formats support flushing, so this may throw a RuntimeError. (If "

diff --git a/pedalboard_native/io/__init__.pyi b/pedalboard_native/io/__init__.pyi
@@ -984,6 +984,31 @@ class WriteableAudioFile(AudioFile):
         Encode an array of audio data and write it to this file. The number of channels in the array must match the number of channels used to open the file. The array may contain audio in any shape. If the file's bit depth or format does not match the provided data type, the audio will be automatically converted.
 
         Arrays of type int8, int16, int32, float32, and float64 are supported. If an array of an unsupported ``dtype`` is provided, a ``TypeError`` will be raised.
+
+        .. warning::
+            If an array of shape ``(num_channels, num_channels)`` is passed to this method before any other audio data is provided, an exception will be thrown, as the method will not be able to infer which dimension of the input corresponds to the number of channels and which dimension corresponds to the number of samples.
+
+            To avoid this, first call this method with an array where the number of samples does not match the number of channels.
+
+            The channel layout from the most recently provided input will be cached on the :py:class:`WritableAudioFile` object and will be used if necessary to disambiguate the array layout:
+
+            .. code-block:: python
+
+                with AudioFile("my_file.mp3", "w", 44100, num_channels=2) as f:
+                    # This will throw an exception:
+                    f.write(np.zeros((2, 2)))
+                    # But this will work:
+                    f.write(np.zeros((2, 1)))
+                    # And now `f` expects an input shape of (num_channels, num_samples), so this works:
+                    f.write(np.zeros((2, 2)))
+
+                # Also an option: pass (0, num_channels) or (num_channels, 0) first
+                # to hint that the input will be in that shape without writing anything:
+                with AudioFile("my_file.mp3", "w", 44100, num_channels=2) as f:
+                    # Pass a hint, but write nothing:
+                    f.write(np.zeros((2, 0)))
+                    # And now `f` expects an input shape of (num_channels, num_samples), so this works:
+                    f.write(np.zeros((2, 2)))
         """
 
     @property

diff --git a/tests/test_io.py b/tests/test_io.py
@@ -1526,3 +1526,50 @@ def tell(self) -> int:
         with pedalboard.io.AudioFile(ILieAboutSeekability(), "w", 44100, 2) as f:
             f.write(cached_rand(2, 44100))
     assert "What's a seek?" in str(e)
+
+
+def test_write_differently_shaped_empty_buffers():
+    buf = io.BytesIO()
+    with pedalboard.io.AudioFile(buf, "w", 44100, 1, format="wav") as f:
+        f.write(np.zeros((0, 1), dtype=np.float32))
+        f.write(np.zeros((1, 0), dtype=np.float32))
+        f.write(np.zeros((0,), dtype=np.float32))
+        assert f.tell() == 0
+
+
+def test_write_one_by_one_buffer():
+    buf = io.BytesIO()
+    with pedalboard.io.AudioFile(buf, "w", 44100, 1, format="wav") as f:
+        # Writing a single sample at a time should work:
+        f.write(np.zeros((1, 1), dtype=np.float32))
+        # Writing that same sample as a flat 1D array should work too:
+        f.write(np.zeros((1,), dtype=np.float32))
+
+
+def test_write_two_by_two_buffer():
+    buf = io.BytesIO()
+    with pedalboard.io.AudioFile(buf, "w", 44100, 2, format="wav") as f:
+        # Writing a 2x2 buffer should not work right off the bat, as we
+        # can't tell which dimension is channels and which dimension is
+        # samples:
+        with pytest.raises(RuntimeError) as e:
+            f.write(np.zeros((2, 2), dtype=np.float32))
+        assert "Provide a non-square array first" in str(e)
+
+        # ...but if we write a non-square buffer, it should work:
+        f.write(np.zeros((2, 1), dtype=np.float32))
+        # ...and now square buffers are interpreted as having the same channel layout:
+        f.write(np.zeros((2, 2), dtype=np.float32))
+
+        assert f.tell() == 3
+
+
+def test_write_two_by_two_buffer_with_hint():
+    buf = io.BytesIO()
+    with pedalboard.io.AudioFile(buf, "w", 44100, 2, format="wav") as f:
+        # ...if we pass an empty array of the right shape, that shape hint should be saved:
+        f.write(np.zeros((2, 0), dtype=np.float32))
+        # ...and now square buffers are interpreted as having the same channel layout:
+        f.write(np.zeros((2, 2), dtype=np.float32))
+
+        assert f.tell() == 2