Skip to content

Commit

Permalink
Merge pull request #1930 from SixLabors/bp/alphadecodingsse
Browse files Browse the repository at this point in the history
SSE2 / AVX2 of webp AlphaDecoding
  • Loading branch information
JimBobSquarePants committed Jan 18, 2022
2 parents fe95bb5 + 4c4f27d commit 669c900
Show file tree
Hide file tree
Showing 4 changed files with 108 additions and 15 deletions.
82 changes: 73 additions & 9 deletions src/ImageSharp/Formats/Webp/AlphaDecoder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@
using SixLabors.ImageSharp.Formats.Webp.BitReader;
using SixLabors.ImageSharp.Formats.Webp.Lossless;
using SixLabors.ImageSharp.Memory;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
#endif

namespace SixLabors.ImageSharp.Formats.Webp
{
Expand Down Expand Up @@ -307,34 +311,94 @@ private static void ColorIndexInverseTransformAlpha(

private static void HorizontalUnfilter(Span<byte> prev, Span<byte> input, Span<byte> dst, int width)
{
byte pred = (byte)(prev == null ? 0 : prev[0]);
#if SUPPORTS_RUNTIME_INTRINSICS
if (Sse2.IsSupported)
{
dst[0] = (byte)(input[0] + (prev.IsEmpty ? 0 : prev[0]));
if (width <= 1)
{
return;
}

nint i;
Vector128<int> last = Vector128<int>.Zero.WithElement(0, dst[0]);
ref byte srcRef = ref MemoryMarshal.GetReference(input);
for (i = 1; i + 8 <= width; i += 8)
{
var a0 = Vector128.Create(Unsafe.As<byte, long>(ref Unsafe.Add(ref srcRef, i)), 0);
Vector128<byte> a1 = Sse2.Add(a0.AsByte(), last.AsByte());
Vector128<byte> a2 = Sse2.ShiftLeftLogical128BitLane(a1, 1);
Vector128<byte> a3 = Sse2.Add(a1, a2);
Vector128<byte> a4 = Sse2.ShiftLeftLogical128BitLane(a3, 2);
Vector128<byte> a5 = Sse2.Add(a3, a4);
Vector128<byte> a6 = Sse2.ShiftLeftLogical128BitLane(a5, 4);
Vector128<byte> a7 = Sse2.Add(a5, a6);

ref byte outputRef = ref Unsafe.Add(ref MemoryMarshal.GetReference(dst), i);
Unsafe.As<byte, Vector64<byte>>(ref outputRef) = a7.GetLower();
last = Sse2.ShiftRightLogical(a7.AsInt64(), 56).AsInt32();
}

for (int i = 0; i < width; i++)
for (; i < width; ++i)
{
dst[(int)i] = (byte)(input[(int)i] + dst[(int)i - 1]);
}
}
else
#endif
{
byte val = (byte)(pred + input[i]);
pred = val;
dst[i] = val;
byte pred = (byte)(prev.IsEmpty ? 0 : prev[0]);

for (int i = 0; i < width; i++)
{
byte val = (byte)(pred + input[i]);
pred = val;
dst[i] = val;
}
}
}

private static void VerticalUnfilter(Span<byte> prev, Span<byte> input, Span<byte> dst, int width)
{
if (prev == null)
if (prev.IsEmpty)
{
HorizontalUnfilter(null, input, dst, width);
}
else
{
for (int i = 0; i < width; i++)
#if SUPPORTS_RUNTIME_INTRINSICS
if (Avx2.IsSupported)
{
dst[i] = (byte)(prev[i] + input[i]);
nint i;
int maxPos = width & ~31;
for (i = 0; i < maxPos; i += 32)
{
Vector256<int> a0 = Unsafe.As<byte, Vector256<int>>(ref Unsafe.Add(ref MemoryMarshal.GetReference(input), i));
Vector256<int> b0 = Unsafe.As<byte, Vector256<int>>(ref Unsafe.Add(ref MemoryMarshal.GetReference(prev), i));
Vector256<byte> c0 = Avx2.Add(a0.AsByte(), b0.AsByte());
ref byte outputRef = ref Unsafe.Add(ref MemoryMarshal.GetReference(dst), i);
Unsafe.As<byte, Vector256<byte>>(ref outputRef) = c0;
}

for (; i < width; i++)
{
dst[(int)i] = (byte)(prev[(int)i] + input[(int)i]);
}
}
else
#endif
{
for (int i = 0; i < width; i++)
{
dst[i] = (byte)(prev[i] + input[i]);
}
}
}
}

private static void GradientUnfilter(Span<byte> prev, Span<byte> input, Span<byte> dst, int width)
{
if (prev == null)
if (prev.IsEmpty)
{
HorizontalUnfilter(null, input, dst, width);
}
Expand Down
1 change: 0 additions & 1 deletion src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
// Licensed under the Apache License, Version 2.0.

using System;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using SixLabors.ImageSharp.Memory;
Expand Down
10 changes: 5 additions & 5 deletions tests/ImageSharp.Tests/Formats/GeneralFormatTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,11 @@ public void DecodeThenEncodeImageFromStreamShouldSucceed()
public static readonly TheoryData<string> QuantizerNames =
new()
{
nameof(KnownQuantizers.Octree),
nameof(KnownQuantizers.WebSafe),
nameof(KnownQuantizers.Werner),
nameof(KnownQuantizers.Wu)
};
nameof(KnownQuantizers.Octree),
nameof(KnownQuantizers.WebSafe),
nameof(KnownQuantizers.Werner),
nameof(KnownQuantizers.Wu)
};

[Theory]
[WithFile(TestImages.Png.CalliphoraPartial, nameof(QuantizerNames), PixelTypes.Rgba32)]
Expand Down
30 changes: 30 additions & 0 deletions tests/ImageSharp.Tests/Formats/WebP/WebpDecoderTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ public class WebpDecoderTests

private static MagickReferenceDecoder ReferenceDecoder => new();

private static string TestImageLossyHorizontalFilterPath => Path.Combine(TestEnvironment.InputImagesDirectoryFullPath, Lossy.AlphaCompressedHorizontalFilter);

private static string TestImageLossyVerticalFilterPath => Path.Combine(TestEnvironment.InputImagesDirectoryFullPath, Lossy.AlphaCompressedVerticalFilter);

private static string TestImageLossySimpleFilterPath => Path.Combine(TestEnvironment.InputImagesDirectoryFullPath, Lossy.SimpleFilter02);

private static string TestImageLossyComplexFilterPath => Path.Combine(TestEnvironment.InputImagesDirectoryFullPath, Lossy.BikeComplexFilter);
Expand Down Expand Up @@ -365,6 +369,26 @@ public void WebpDecoder_ThrowImageFormatException_OnInvalidImages<TPixel>(TestIm
});

#if SUPPORTS_RUNTIME_INTRINSICS
private static void RunDecodeLossyWithHorizontalFilter()
{
var provider = TestImageProvider<Rgba32>.File(TestImageLossyHorizontalFilterPath);
using (Image<Rgba32> image = provider.GetImage(WebpDecoder))
{
image.DebugSave(provider);
image.CompareToOriginal(provider, ReferenceDecoder);
}
}

private static void RunDecodeLossyWithVerticalFilter()
{
var provider = TestImageProvider<Rgba32>.File(TestImageLossyVerticalFilterPath);
using (Image<Rgba32> image = provider.GetImage(WebpDecoder))
{
image.DebugSave(provider);
image.CompareToOriginal(provider, ReferenceDecoder);
}
}

private static void RunDecodeLossyWithSimpleFilterTest()
{
var provider = TestImageProvider<Rgba32>.File(TestImageLossySimpleFilterPath);
Expand All @@ -385,6 +409,12 @@ private static void RunDecodeLossyWithComplexFilterTest()
}
}

[Fact]
public void DecodeLossyWithHorizontalFilter_WithoutHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunDecodeLossyWithHorizontalFilter, HwIntrinsics.DisableHWIntrinsic);

[Fact]
public void DecodeLossyWithVerticalFilter_WithoutHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunDecodeLossyWithVerticalFilter, HwIntrinsics.DisableHWIntrinsic);

[Fact]
public void DecodeLossyWithSimpleFilterTest_WithoutHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunDecodeLossyWithSimpleFilterTest, HwIntrinsics.DisableHWIntrinsic);

Expand Down

0 comments on commit 669c900

Please sign in to comment.