Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add SSE2 versions of ClampedAddSubtractFull and ClampedAddSubtractHalf #1805

Merged
merged 12 commits into from
Nov 8, 2021
8 changes: 8 additions & 0 deletions src/ImageSharp/Formats/Webp/Lossless/ColorCache.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.

using System.Runtime.CompilerServices;

namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{
/// <summary>
Expand Down Expand Up @@ -41,6 +43,7 @@ public void Init(int hashBits)
/// Inserts a new color into the cache.
/// </summary>
/// <param name="bgra">The color to insert.</param>
[MethodImpl(InliningOptions.ShortMethod)]
public void Insert(uint bgra)
{
int key = HashPix(bgra, this.HashShift);
Expand All @@ -52,13 +55,15 @@ public void Insert(uint bgra)
/// </summary>
/// <param name="key">The key to lookup.</param>
/// <returns>The color for the key.</returns>
[MethodImpl(InliningOptions.ShortMethod)]
public uint Lookup(int key) => this.Colors[key];

/// <summary>
/// Returns the index of the given color.
/// </summary>
/// <param name="bgra">The color to check.</param>
/// <returns>The index of the color in the cache or -1 if its not present.</returns>
[MethodImpl(InliningOptions.ShortMethod)]
public int Contains(uint bgra)
{
int key = HashPix(bgra, this.HashShift);
Expand All @@ -70,15 +75,18 @@ public int Contains(uint bgra)
/// </summary>
/// <param name="bgra">The color.</param>
/// <returns>The index for the color.</returns>
[MethodImpl(InliningOptions.ShortMethod)]
public int GetIndex(uint bgra) => HashPix(bgra, this.HashShift);

/// <summary>
/// Adds a new color to the cache.
/// </summary>
/// <param name="key">The key.</param>
/// <param name="bgra">The color to add.</param>
[MethodImpl(InliningOptions.ShortMethod)]
public void Set(uint key, uint bgra) => this.Colors[key] = bgra;

[MethodImpl(InliningOptions.ShortMethod)]
public static int HashPix(uint argb, int shift) => (int)((argb * HashMul) >> shift);
}
}
84 changes: 59 additions & 25 deletions src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -765,6 +765,7 @@ public static byte TransformColorBlue(sbyte greenToBlue, sbyte redToBlue, uint a
/// <summary>
/// Fast calculation of log2(v) for integer input.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static float FastLog2(uint v) => v < LogLookupIdxMax ? WebpLookupTables.Log2Table[v] : FastLog2Slow(v);

/// <summary>
Expand Down Expand Up @@ -793,7 +794,7 @@ public static void ColorCodeToMultipliers(uint colorCode, ref Vp8LMultipliers m)

private static float FastSLog2Slow(uint v)
{
Guard.MustBeGreaterThanOrEqualTo(v, LogLookupIdxMax, nameof(v));
DebugGuard.MustBeGreaterThanOrEqualTo<uint>(v, LogLookupIdxMax, nameof(v));
if (v < ApproxLogWithCorrectionMax)
{
int logCnt = 0;
Expand Down Expand Up @@ -1214,30 +1215,65 @@ public static uint AddPixels(uint a, uint b)

private static uint ClampedAddSubtractFull(uint c0, uint c1, uint c2)
{
int a = AddSubtractComponentFull(
(int)(c0 >> 24),
(int)(c1 >> 24),
(int)(c2 >> 24));
int r = AddSubtractComponentFull(
(int)((c0 >> 16) & 0xff),
(int)((c1 >> 16) & 0xff),
(int)((c2 >> 16) & 0xff));
int g = AddSubtractComponentFull(
(int)((c0 >> 8) & 0xff),
(int)((c1 >> 8) & 0xff),
(int)((c2 >> 8) & 0xff));
int b = AddSubtractComponentFull((int)(c0 & 0xff), (int)(c1 & 0xff), (int)(c2 & 0xff));
return ((uint)a << 24) | ((uint)r << 16) | ((uint)g << 8) | (uint)b;
#if SUPPORTS_RUNTIME_INTRINSICS
if (Sse2.IsSupported)
{
Vector128<byte> c0Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c0).AsByte(), Vector128<byte>.Zero);
Vector128<byte> c1Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c1).AsByte(), Vector128<byte>.Zero);
Vector128<byte> c2Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c2).AsByte(), Vector128<byte>.Zero);
Vector128<short> v1 = Sse2.Add(c0Vec.AsInt16(), c1Vec.AsInt16());
Vector128<short> v2 = Sse2.Subtract(v1, c2Vec.AsInt16());
Vector128<byte> b = Sse2.PackUnsignedSaturate(v2, v2);
uint output = Sse2.ConvertToUInt32(b.AsUInt32());
return output;
}
#endif
{
int a = AddSubtractComponentFull(
(int)(c0 >> 24),
(int)(c1 >> 24),
(int)(c2 >> 24));
int r = AddSubtractComponentFull(
(int)((c0 >> 16) & 0xff),
(int)((c1 >> 16) & 0xff),
(int)((c2 >> 16) & 0xff));
int g = AddSubtractComponentFull(
(int)((c0 >> 8) & 0xff),
(int)((c1 >> 8) & 0xff),
(int)((c2 >> 8) & 0xff));
int b = AddSubtractComponentFull((int)(c0 & 0xff), (int)(c1 & 0xff), (int)(c2 & 0xff));
return ((uint)a << 24) | ((uint)r << 16) | ((uint)g << 8) | (uint)b;
}
}

private static uint ClampedAddSubtractHalf(uint c0, uint c1, uint c2)
{
uint ave = Average2(c0, c1);
int a = AddSubtractComponentHalf((int)(ave >> 24), (int)(c2 >> 24));
int r = AddSubtractComponentHalf((int)((ave >> 16) & 0xff), (int)((c2 >> 16) & 0xff));
int g = AddSubtractComponentHalf((int)((ave >> 8) & 0xff), (int)((c2 >> 8) & 0xff));
int b = AddSubtractComponentHalf((int)(ave & 0xff), (int)(c2 & 0xff));
return ((uint)a << 24) | ((uint)r << 16) | ((uint)g << 8) | (uint)b;
#if SUPPORTS_RUNTIME_INTRINSICS
if (Sse2.IsSupported)
{
Vector128<byte> c0Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c0).AsByte(), Vector128<byte>.Zero);
Vector128<byte> c1Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c1).AsByte(), Vector128<byte>.Zero);
Vector128<byte> b0 = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c2).AsByte(), Vector128<byte>.Zero);
Vector128<short> avg = Sse2.Add(c1Vec.AsInt16(), c0Vec.AsInt16());
Vector128<short> a0 = Sse2.ShiftRightLogical(avg, 1);
Vector128<short> a1 = Sse2.Subtract(a0, b0.AsInt16());
Vector128<short> bgta = Sse2.CompareGreaterThan(b0.AsInt16(), a0.AsInt16());
Vector128<short> a2 = Sse2.Subtract(a1, bgta);
Vector128<short> a3 = Sse2.ShiftRightArithmetic(a2, 1);
Vector128<short> a4 = Sse2.Add(a0, a3).AsInt16();
Vector128<byte> a5 = Sse2.PackUnsignedSaturate(a4, a4);
uint output = Sse2.ConvertToUInt32(a5.AsUInt32());
return output;
}
#endif
{
uint ave = Average2(c0, c1);
int a = AddSubtractComponentHalf((int)(ave >> 24), (int)(c2 >> 24));
int r = AddSubtractComponentHalf((int)((ave >> 16) & 0xff), (int)((c2 >> 16) & 0xff));
int g = AddSubtractComponentHalf((int)((ave >> 8) & 0xff), (int)((c2 >> 8) & 0xff));
int b = AddSubtractComponentHalf((int)(ave & 0xff), (int)(c2 & 0xff));
return ((uint)a << 24) | ((uint)r << 16) | ((uint)g << 8) | (uint)b;
}
}

[MethodImpl(InliningOptions.ShortMethod)]
Expand Down Expand Up @@ -1275,11 +1311,9 @@ private static uint Select(uint a, uint b, uint c, Span<short> scratch)
Vector128<byte> pb = Sse2.UnpackLow(bc, Vector128<byte>.Zero); // |b - c|
Vector128<ushort> diff = Sse2.Subtract(pb.AsUInt16(), pa.AsUInt16());
Sse2.Store((ushort*)p, diff);
int paMinusPb = output[3] + output[2] + output[1] + output[0];
return (paMinusPb <= 0) ? a : b;
}

int paMinusPb = output[0] + output[1] + output[2] + output[3];

return (paMinusPb <= 0) ? a : b;
}
else
#endif
Expand Down
58 changes: 58 additions & 0 deletions tests/ImageSharp.Tests/Formats/WebP/LosslessUtilsTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -153,9 +153,55 @@ private static void RunPredictor11Test()
}
}

private static void RunPredictor12Test()
{
// arrange
uint[] topData = { 4294844413, 4294779388 };
uint left = 4294844413;
uint expectedResult = 4294779388;

// act
unsafe
{
fixed (uint* top = &topData[1])
{
uint actual = LosslessUtils.Predictor12(left, top);

// assert
Assert.Equal(expectedResult, actual);
}
}
}

private static void RunPredictor13Test()
{
// arrange
uint[] topData = { 4278193922, 4278193666 };
uint left = 4278193410;
uint expectedResult = 4278193154;

// act
unsafe
{
fixed (uint* top = &topData[1])
{
uint actual = LosslessUtils.Predictor13(left, top);

// assert
Assert.Equal(expectedResult, actual);
}
}
}

[Fact]
public void Predictor11_Works() => RunPredictor11Test();

[Fact]
public void Predictor12_Works() => RunPredictor12Test();

[Fact]
public void Predictor13_Works() => RunPredictor13Test();

[Fact]
public void SubtractGreen_Works() => RunSubtractGreenTest();

Expand All @@ -175,6 +221,18 @@ private static void RunPredictor11Test()
[Fact]
public void Predictor11_WithoutSSE2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunPredictor11Test, HwIntrinsics.DisableSSE2);

[Fact]
public void Predictor12_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunPredictor12Test, HwIntrinsics.AllowAll);

[Fact]
public void Predictor12_WithoutSSE2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunPredictor12Test, HwIntrinsics.DisableSSE2);

[Fact]
public void Predictor13_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunPredictor13Test, HwIntrinsics.AllowAll);

[Fact]
public void Predictor13_WithoutSSE2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunPredictor13Test, HwIntrinsics.DisableSSE2);

[Fact]
public void SubtractGreen_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunSubtractGreenTest, HwIntrinsics.AllowAll);

Expand Down