Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add AVX2 version of GetResidualCost and SSE2 version of SetCoeffs #1902

Merged
merged 7 commits into from
Dec 21, 2021
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
135 changes: 113 additions & 22 deletions src/ImageSharp/Formats/Webp/Lossy/Vp8Residual.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@

using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
#endif

namespace SixLabors.ImageSharp.Formats.Webp.Lossy
{
Expand All @@ -11,6 +16,16 @@ namespace SixLabors.ImageSharp.Formats.Webp.Lossy
/// </summary>
internal class Vp8Residual
{
#if SUPPORTS_RUNTIME_INTRINSICS
private static readonly Vector256<byte> Cst2 = Vector256.Create((byte)2);

private static readonly Vector256<byte> Cst67 = Vector256.Create((byte)67);
#endif

private readonly byte[] scratch = new byte[32];

private readonly ushort[] scratchUShort = new ushort[16];

public int First { get; set; }

public int Last { get; set; }
Expand All @@ -37,14 +52,39 @@ public void Init(int first, int coeffType, Vp8EncProba prob)

public void SetCoeffs(Span<short> coeffs)
{
int n;
this.Last = -1;
for (n = 15; n >= 0; --n)
#if SUPPORTS_RUNTIME_INTRINSICS
if (Sse2.IsSupported)
{
ref short coeffsRef = ref MemoryMarshal.GetReference(coeffs);
Vector128<byte> c0 = Unsafe.As<short, Vector128<byte>>(ref coeffsRef);
Vector128<byte> c1 = Unsafe.As<short, Vector128<byte>>(ref Unsafe.Add(ref coeffsRef, 8));

// Use SSE2 to compare 16 values with a single instruction.
Vector128<sbyte> m0 = Sse2.PackSignedSaturate(c0.AsInt16(), c1.AsInt16());
Vector128<sbyte> m1 = Sse2.CompareEqual(m0, Vector128<sbyte>.Zero);

// Get the comparison results as a bitmask into 16bits. Negate the mask to get
// the position of entries that are not equal to zero. We don't need to mask
// out least significant bits according to res->first, since coeffs[0] is 0
// if res->first > 0.
uint mask = 0x0000ffffu ^ (uint)Sse2.MoveMask(m1);

// The position of the most significant non-zero bit indicates the position of
// the last non-zero value.
this.Last = mask != 0 ? Numerics.Log2(mask) : -1;
}
else
#endif
{
if (coeffs[n] != 0)
int n;
this.Last = -1;
for (n = 15; n >= 0; --n)
{
this.Last = n;
break;
if (coeffs[n] != 0)
{
this.Last = n;
break;
}
}
}

Expand Down Expand Up @@ -129,27 +169,78 @@ public int GetResidualCost(int ctx0)
return LossyUtils.Vp8BitCost(0, (byte)p0);
}

int v;
for (; n < this.Last; ++n)
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx2.IsSupported)
{
Span<byte> ctxs = this.scratch.AsSpan(0, 16);
Span<byte> levels = this.scratch.AsSpan(16, 16);
Span<ushort> absLevels = this.scratchUShort.AsSpan();

// Precompute clamped levels and contexts, packed to 8b.
ref short outputRef = ref MemoryMarshal.GetReference<short>(this.Coeffs);
Vector256<short> c0 = Unsafe.As<short, Vector256<byte>>(ref outputRef).AsInt16();
Vector256<short> d0 = Avx2.Subtract(Vector256<short>.Zero, c0);
Vector256<short> e0 = Avx2.Max(c0, d0); // abs(v), 16b
Vector256<sbyte> f = Avx2.PackSignedSaturate(e0, e0);
Vector256<byte> g = Avx2.Min(f.AsByte(), Cst2);
Vector256<byte> h = Avx2.Min(f.AsByte(), Cst67); // clampLevel in [0..67]

ref byte ctxsRef = ref MemoryMarshal.GetReference(ctxs);
ref byte levelsRef = ref MemoryMarshal.GetReference(levels);
ref ushort absLevelsRef = ref MemoryMarshal.GetReference(absLevels);
Unsafe.As<byte, Vector128<byte>>(ref ctxsRef) = g.GetLower();
Unsafe.As<byte, Vector128<byte>>(ref levelsRef) = h.GetLower();
Unsafe.As<ushort, Vector256<ushort>>(ref absLevelsRef) = e0.AsUInt16();

int level;
int flevel;
for (; n < this.Last; ++n)
{
int ctx = ctxs[n];
level = levels[n];
flevel = absLevels[n];
cost += WebpLookupTables.Vp8LevelFixedCosts[flevel] + t.Costs[level];
t = costs[n + 1].Costs[ctx];
}

// Last coefficient is always non-zero.
level = levels[n];
flevel = absLevels[n];
cost += WebpLookupTables.Vp8LevelFixedCosts[flevel] + t.Costs[level];
if (n < 15)
{
int b = WebpConstants.Vp8EncBands[n + 1];
int ctx = ctxs[n];
int lastP0 = this.Prob[b].Probabilities[ctx].Probabilities[0];
cost += LossyUtils.Vp8BitCost(0, (byte)lastP0);
}

return cost;
}
#endif
{
int v;
for (; n < this.Last; ++n)
{
v = Math.Abs(this.Coeffs[n]);
int ctx = v >= 2 ? 2 : v;
cost += LevelCost(t.Costs, v);
t = costs[n + 1].Costs[ctx];
}

// Last coefficient is always non-zero
v = Math.Abs(this.Coeffs[n]);
int ctx = v >= 2 ? 2 : v;
cost += LevelCost(t.Costs, v);
t = costs[n + 1].Costs[ctx];
}
if (n < 15)
{
int b = WebpConstants.Vp8EncBands[n + 1];
int ctx = v == 1 ? 1 : 2;
int lastP0 = this.Prob[b].Probabilities[ctx].Probabilities[0];
cost += LossyUtils.Vp8BitCost(0, (byte)lastP0);
}

// Last coefficient is always non-zero
v = Math.Abs(this.Coeffs[n]);
cost += LevelCost(t.Costs, v);
if (n < 15)
{
int b = WebpConstants.Vp8EncBands[n + 1];
int ctx = v == 1 ? 1 : 2;
int lastP0 = this.Prob[b].Probabilities[ctx].Probabilities[0];
cost += LossyUtils.Vp8BitCost(0, (byte)lastP0);
return cost;
}

return cost;
}

[MethodImpl(InliningOptions.ShortMethod)]
Expand Down
37 changes: 37 additions & 0 deletions tests/ImageSharp.Tests/Formats/WebP/Vp8ResidualTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.

using SixLabors.ImageSharp.Formats.Webp.Lossy;
using SixLabors.ImageSharp.Tests.TestUtilities;
using Xunit;

namespace SixLabors.ImageSharp.Tests.Formats.WebP
{
[Trait("Format", "Webp")]
public class Vp8ResidualTests
{
private static void RunSetCoeffsTest()
{
// arrange
var residual = new Vp8Residual();
short[] coeffs = { 110, 0, -2, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0 };

// act
residual.SetCoeffs(coeffs);

// assert
Assert.Equal(9, residual.Last);
}

[Fact]
public void RunSetCoeffsTest_Works() => RunSetCoeffsTest();

#if SUPPORTS_RUNTIME_INTRINSICS
[Fact]
public void RunSetCoeffsTest_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunSetCoeffsTest, HwIntrinsics.AllowAll);

[Fact]
public void RunSetCoeffsTest_WithoutHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunSetCoeffsTest, HwIntrinsics.DisableHWIntrinsic);
#endif
}
}
20 changes: 20 additions & 0 deletions tests/ImageSharp.Tests/Formats/WebP/WebpEncoderTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
using SixLabors.ImageSharp.Formats.Webp;
using SixLabors.ImageSharp.Metadata;
using SixLabors.ImageSharp.PixelFormats;
using SixLabors.ImageSharp.Tests.TestUtilities;
using SixLabors.ImageSharp.Tests.TestUtilities.ImageComparison;
using Xunit;
using static SixLabors.ImageSharp.Tests.TestImages.Webp;
Expand All @@ -14,6 +15,8 @@ namespace SixLabors.ImageSharp.Tests.Formats.Webp
[Trait("Format", "Webp")]
public class WebpEncoderTests
{
private static string TestImageLossyFullPath => Path.Combine(TestEnvironment.InputImagesDirectoryFullPath, Lossy.NoFilter06);

[Theory]
[WithFile(Flag, PixelTypes.Rgba32, WebpFileFormatType.Lossless)] // if its not a webp input image, it should default to lossless.
[WithFile(Lossless.NoTransform1, PixelTypes.Rgba32, WebpFileFormatType.Lossless)]
Expand Down Expand Up @@ -288,6 +291,23 @@ public void Encode_Lossy_WorksWithTestPattern<TPixel>(TestImageProvider<TPixel>
image.VerifyEncoder(provider, "webp", string.Empty, encoder, ImageComparer.Tolerant(0.04f));
}

public static void RunEncodeLossy_WithPeakImage()
{
var provider = TestImageProvider<Rgba32>.File(TestImageLossyFullPath);
using Image<Rgba32> image = provider.GetImage();

var encoder = new WebpEncoder() { FileFormat = WebpFileFormatType.Lossy };
image.VerifyEncoder(provider, "webp", string.Empty, encoder, ImageComparer.Tolerant(0.04f));
}

#if SUPPORTS_RUNTIME_INTRINSICS
[Fact]
public void RunEncodeLossy_WithPeakImage_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunEncodeLossy_WithPeakImage, HwIntrinsics.AllowAll);
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note: I could not figure out how to write a single test for GetResidualCost, it would be rather complicated. Instead i have decided to do lossy encoding test with and without hardware intrinsics


[Fact]
public void RunEncodeLossy_WithPeakImage_WithoutHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunEncodeLossy_WithPeakImage, HwIntrinsics.DisableHWIntrinsic);
#endif

private static ImageComparer GetComparer(int quality)
{
float tolerance = 0.01f; // ~1.0%
Expand Down