Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SSE2 / AVX2 of webp AlphaDecoding #1930

Merged
merged 10 commits into from
Jan 18, 2022
82 changes: 73 additions & 9 deletions src/ImageSharp/Formats/Webp/AlphaDecoder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@
using SixLabors.ImageSharp.Formats.Webp.BitReader;
using SixLabors.ImageSharp.Formats.Webp.Lossless;
using SixLabors.ImageSharp.Memory;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
#endif

namespace SixLabors.ImageSharp.Formats.Webp
{
Expand Down Expand Up @@ -307,34 +311,94 @@ private static void ColorIndexInverseTransformAlpha(

private static void HorizontalUnfilter(Span<byte> prev, Span<byte> input, Span<byte> dst, int width)
{
byte pred = (byte)(prev == null ? 0 : prev[0]);
#if SUPPORTS_RUNTIME_INTRINSICS
if (Sse2.IsSupported)
{
dst[0] = (byte)(input[0] + (prev.IsEmpty ? 0 : prev[0]));
if (width <= 1)
{
return;
}

nint i;
Vector128<int> last = Vector128<int>.Zero.WithElement(0, dst[0]);
ref byte srcRef = ref MemoryMarshal.GetReference(input);
for (i = 1; i + 8 <= width; i += 8)
{
var a0 = Vector128.Create(Unsafe.As<byte, long>(ref Unsafe.Add(ref srcRef, i)), 0);
Vector128<byte> a1 = Sse2.Add(a0.AsByte(), last.AsByte());
Vector128<byte> a2 = Sse2.ShiftLeftLogical128BitLane(a1, 1);
Vector128<byte> a3 = Sse2.Add(a1, a2);
Vector128<byte> a4 = Sse2.ShiftLeftLogical128BitLane(a3, 2);
Vector128<byte> a5 = Sse2.Add(a3, a4);
Vector128<byte> a6 = Sse2.ShiftLeftLogical128BitLane(a5, 4);
Vector128<byte> a7 = Sse2.Add(a5, a6);

ref byte outputRef = ref Unsafe.Add(ref MemoryMarshal.GetReference(dst), i);
Unsafe.As<byte, Vector64<byte>>(ref outputRef) = a7.GetLower();
last = Sse2.ShiftRightLogical(a7.AsInt64(), 56).AsInt32();
}

for (int i = 0; i < width; i++)
for (; i < width; ++i)
{
dst[(int)i] = (byte)(input[(int)i] + dst[(int)i - 1]);
}
}
else
#endif
{
byte val = (byte)(pred + input[i]);
pred = val;
dst[i] = val;
byte pred = (byte)(prev.IsEmpty ? 0 : prev[0]);

for (int i = 0; i < width; i++)
{
byte val = (byte)(pred + input[i]);
pred = val;
dst[i] = val;
}
}
}

private static void VerticalUnfilter(Span<byte> prev, Span<byte> input, Span<byte> dst, int width)
{
if (prev == null)
if (prev.IsEmpty)
{
HorizontalUnfilter(null, input, dst, width);
}
else
{
for (int i = 0; i < width; i++)
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx2.IsSupported)
{
dst[i] = (byte)(prev[i] + input[i]);
nint i;
int maxPos = width & ~31;
for (i = 0; i < maxPos; i += 32)
{
Vector256<int> a0 = Unsafe.As<byte, Vector256<int>>(ref Unsafe.Add(ref MemoryMarshal.GetReference(input), i));
Vector256<int> b0 = Unsafe.As<byte, Vector256<int>>(ref Unsafe.Add(ref MemoryMarshal.GetReference(prev), i));
Vector256<byte> c0 = Avx2.Add(a0.AsByte(), b0.AsByte());
ref byte outputRef = ref Unsafe.Add(ref MemoryMarshal.GetReference(dst), i);
Unsafe.As<byte, Vector256<byte>>(ref outputRef) = c0;
}

for (; i < width; i++)
{
dst[(int)i] = (byte)(prev[(int)i] + input[(int)i]);
}
}
else
#endif
{
for (int i = 0; i < width; i++)
{
dst[i] = (byte)(prev[i] + input[i]);
}
}
}
}

private static void GradientUnfilter(Span<byte> prev, Span<byte> input, Span<byte> dst, int width)
{
if (prev == null)
if (prev.IsEmpty)
{
HorizontalUnfilter(null, input, dst, width);
}
Expand Down
1 change: 0 additions & 1 deletion src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
// Licensed under the Apache License, Version 2.0.

using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using SixLabors.ImageSharp.Memory;
Expand Down
10 changes: 5 additions & 5 deletions tests/ImageSharp.Tests/Formats/GeneralFormatTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,11 @@ public void DecodeThenEncodeImageFromStreamShouldSucceed()
public static readonly TheoryData<string> QuantizerNames =
new()
{
nameof(KnownQuantizers.Octree),
nameof(KnownQuantizers.WebSafe),
nameof(KnownQuantizers.Werner),
nameof(KnownQuantizers.Wu)
};
nameof(KnownQuantizers.Octree),
nameof(KnownQuantizers.WebSafe),
nameof(KnownQuantizers.Werner),
nameof(KnownQuantizers.Wu)
};

[Theory]
[WithFile(TestImages.Png.CalliphoraPartial, nameof(QuantizerNames), PixelTypes.Rgba32)]
Expand Down
30 changes: 30 additions & 0 deletions tests/ImageSharp.Tests/Formats/WebP/WebpDecoderTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ public class WebpDecoderTests

private static MagickReferenceDecoder ReferenceDecoder => new();

private static string TestImageLossyHorizontalFilterPath => Path.Combine(TestEnvironment.InputImagesDirectoryFullPath, Lossy.AlphaCompressedHorizontalFilter);

private static string TestImageLossyVerticalFilterPath => Path.Combine(TestEnvironment.InputImagesDirectoryFullPath, Lossy.AlphaCompressedVerticalFilter);

private static string TestImageLossySimpleFilterPath => Path.Combine(TestEnvironment.InputImagesDirectoryFullPath, Lossy.SimpleFilter02);

private static string TestImageLossyComplexFilterPath => Path.Combine(TestEnvironment.InputImagesDirectoryFullPath, Lossy.BikeComplexFilter);
Expand Down Expand Up @@ -365,6 +369,26 @@ public void WebpDecoder_ThrowImageFormatException_OnInvalidImages<TPixel>(TestIm
});

#if SUPPORTS_RUNTIME_INTRINSICS
private static void RunDecodeLossyWithHorizontalFilter()
{
var provider = TestImageProvider<Rgba32>.File(TestImageLossyHorizontalFilterPath);
using (Image<Rgba32> image = provider.GetImage(WebpDecoder))
{
image.DebugSave(provider);
image.CompareToOriginal(provider, ReferenceDecoder);
}
}

private static void RunDecodeLossyWithVerticalFilter()
{
var provider = TestImageProvider<Rgba32>.File(TestImageLossyVerticalFilterPath);
using (Image<Rgba32> image = provider.GetImage(WebpDecoder))
{
image.DebugSave(provider);
image.CompareToOriginal(provider, ReferenceDecoder);
}
}

private static void RunDecodeLossyWithSimpleFilterTest()
{
var provider = TestImageProvider<Rgba32>.File(TestImageLossySimpleFilterPath);
Expand All @@ -385,6 +409,12 @@ private static void RunDecodeLossyWithComplexFilterTest()
}
}

[Fact]
public void DecodeLossyWithHorizontalFilter_WithoutHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunDecodeLossyWithHorizontalFilter, HwIntrinsics.DisableHWIntrinsic);

[Fact]
public void DecodeLossyWithVerticalFilter_WithoutHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunDecodeLossyWithVerticalFilter, HwIntrinsics.DisableHWIntrinsic);

[Fact]
public void DecodeLossyWithSimpleFilterTest_WithoutHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunDecodeLossyWithSimpleFilterTest, HwIntrinsics.DisableHWIntrinsic);

Expand Down