diff --git a/src/ImageSharp/Formats/Webp/Lossless/ColorCache.cs b/src/ImageSharp/Formats/Webp/Lossless/ColorCache.cs
index 8596d85558..02bbc38fcf 100644
--- a/src/ImageSharp/Formats/Webp/Lossless/ColorCache.cs
+++ b/src/ImageSharp/Formats/Webp/Lossless/ColorCache.cs
@@ -1,6 +1,8 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.
+using System.Runtime.CompilerServices;
+
namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{
///
@@ -41,6 +43,7 @@ public void Init(int hashBits)
/// Inserts a new color into the cache.
///
/// The color to insert.
+ [MethodImpl(InliningOptions.ShortMethod)]
public void Insert(uint bgra)
{
int key = HashPix(bgra, this.HashShift);
@@ -52,6 +55,7 @@ public void Insert(uint bgra)
///
/// The key to lookup.
/// The color for the key.
+ [MethodImpl(InliningOptions.ShortMethod)]
public uint Lookup(int key) => this.Colors[key];
///
@@ -59,6 +63,7 @@ public void Insert(uint bgra)
///
/// The color to check.
/// The index of the color in the cache or -1 if its not present.
+ [MethodImpl(InliningOptions.ShortMethod)]
public int Contains(uint bgra)
{
int key = HashPix(bgra, this.HashShift);
@@ -70,6 +75,7 @@ public int Contains(uint bgra)
///
/// The color.
/// The index for the color.
+ [MethodImpl(InliningOptions.ShortMethod)]
public int GetIndex(uint bgra) => HashPix(bgra, this.HashShift);
///
@@ -77,8 +83,10 @@ public int Contains(uint bgra)
///
/// The key.
/// The color to add.
+ [MethodImpl(InliningOptions.ShortMethod)]
public void Set(uint key, uint bgra) => this.Colors[key] = bgra;
+ [MethodImpl(InliningOptions.ShortMethod)]
public static int HashPix(uint argb, int shift) => (int)((argb * HashMul) >> shift);
}
}
diff --git a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs
index 8231464070..f9b97c6c44 100644
--- a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs
+++ b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs
@@ -765,6 +765,7 @@ public static byte TransformColorBlue(sbyte greenToBlue, sbyte redToBlue, uint a
///
/// Fast calculation of log2(v) for integer input.
///
+ [MethodImpl(InliningOptions.ShortMethod)]
public static float FastLog2(uint v) => v < LogLookupIdxMax ? WebpLookupTables.Log2Table[v] : FastLog2Slow(v);
///
@@ -793,7 +794,7 @@ public static void ColorCodeToMultipliers(uint colorCode, ref Vp8LMultipliers m)
private static float FastSLog2Slow(uint v)
{
- Guard.MustBeGreaterThanOrEqualTo(v, LogLookupIdxMax, nameof(v));
+ DebugGuard.MustBeGreaterThanOrEqualTo(v, LogLookupIdxMax, nameof(v));
if (v < ApproxLogWithCorrectionMax)
{
int logCnt = 0;
@@ -1214,30 +1215,65 @@ public static uint AddPixels(uint a, uint b)
private static uint ClampedAddSubtractFull(uint c0, uint c1, uint c2)
{
- int a = AddSubtractComponentFull(
- (int)(c0 >> 24),
- (int)(c1 >> 24),
- (int)(c2 >> 24));
- int r = AddSubtractComponentFull(
- (int)((c0 >> 16) & 0xff),
- (int)((c1 >> 16) & 0xff),
- (int)((c2 >> 16) & 0xff));
- int g = AddSubtractComponentFull(
- (int)((c0 >> 8) & 0xff),
- (int)((c1 >> 8) & 0xff),
- (int)((c2 >> 8) & 0xff));
- int b = AddSubtractComponentFull((int)(c0 & 0xff), (int)(c1 & 0xff), (int)(c2 & 0xff));
- return ((uint)a << 24) | ((uint)r << 16) | ((uint)g << 8) | (uint)b;
+#if SUPPORTS_RUNTIME_INTRINSICS
+ if (Sse2.IsSupported)
+ {
+ Vector128 c0Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c0).AsByte(), Vector128.Zero);
+ Vector128 c1Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c1).AsByte(), Vector128.Zero);
+ Vector128 c2Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c2).AsByte(), Vector128.Zero);
+ Vector128 v1 = Sse2.Add(c0Vec.AsInt16(), c1Vec.AsInt16());
+ Vector128 v2 = Sse2.Subtract(v1, c2Vec.AsInt16());
+ Vector128 b = Sse2.PackUnsignedSaturate(v2, v2);
+ uint output = Sse2.ConvertToUInt32(b.AsUInt32());
+ return output;
+ }
+#endif
+ {
+ int a = AddSubtractComponentFull(
+ (int)(c0 >> 24),
+ (int)(c1 >> 24),
+ (int)(c2 >> 24));
+ int r = AddSubtractComponentFull(
+ (int)((c0 >> 16) & 0xff),
+ (int)((c1 >> 16) & 0xff),
+ (int)((c2 >> 16) & 0xff));
+ int g = AddSubtractComponentFull(
+ (int)((c0 >> 8) & 0xff),
+ (int)((c1 >> 8) & 0xff),
+ (int)((c2 >> 8) & 0xff));
+ int b = AddSubtractComponentFull((int)(c0 & 0xff), (int)(c1 & 0xff), (int)(c2 & 0xff));
+ return ((uint)a << 24) | ((uint)r << 16) | ((uint)g << 8) | (uint)b;
+ }
}
private static uint ClampedAddSubtractHalf(uint c0, uint c1, uint c2)
{
- uint ave = Average2(c0, c1);
- int a = AddSubtractComponentHalf((int)(ave >> 24), (int)(c2 >> 24));
- int r = AddSubtractComponentHalf((int)((ave >> 16) & 0xff), (int)((c2 >> 16) & 0xff));
- int g = AddSubtractComponentHalf((int)((ave >> 8) & 0xff), (int)((c2 >> 8) & 0xff));
- int b = AddSubtractComponentHalf((int)(ave & 0xff), (int)(c2 & 0xff));
- return ((uint)a << 24) | ((uint)r << 16) | ((uint)g << 8) | (uint)b;
+#if SUPPORTS_RUNTIME_INTRINSICS
+ if (Sse2.IsSupported)
+ {
+ Vector128 c0Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c0).AsByte(), Vector128.Zero);
+ Vector128 c1Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c1).AsByte(), Vector128.Zero);
+ Vector128 b0 = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c2).AsByte(), Vector128.Zero);
+ Vector128 avg = Sse2.Add(c1Vec.AsInt16(), c0Vec.AsInt16());
+ Vector128 a0 = Sse2.ShiftRightLogical(avg, 1);
+ Vector128 a1 = Sse2.Subtract(a0, b0.AsInt16());
+ Vector128 bgta = Sse2.CompareGreaterThan(b0.AsInt16(), a0.AsInt16());
+ Vector128 a2 = Sse2.Subtract(a1, bgta);
+ Vector128 a3 = Sse2.ShiftRightArithmetic(a2, 1);
+ Vector128 a4 = Sse2.Add(a0, a3).AsInt16();
+ Vector128 a5 = Sse2.PackUnsignedSaturate(a4, a4);
+ uint output = Sse2.ConvertToUInt32(a5.AsUInt32());
+ return output;
+ }
+#endif
+ {
+ uint ave = Average2(c0, c1);
+ int a = AddSubtractComponentHalf((int)(ave >> 24), (int)(c2 >> 24));
+ int r = AddSubtractComponentHalf((int)((ave >> 16) & 0xff), (int)((c2 >> 16) & 0xff));
+ int g = AddSubtractComponentHalf((int)((ave >> 8) & 0xff), (int)((c2 >> 8) & 0xff));
+ int b = AddSubtractComponentHalf((int)(ave & 0xff), (int)(c2 & 0xff));
+ return ((uint)a << 24) | ((uint)r << 16) | ((uint)g << 8) | (uint)b;
+ }
}
[MethodImpl(InliningOptions.ShortMethod)]
@@ -1275,11 +1311,9 @@ private static uint Select(uint a, uint b, uint c, Span scratch)
Vector128 pb = Sse2.UnpackLow(bc, Vector128.Zero); // |b - c|
Vector128 diff = Sse2.Subtract(pb.AsUInt16(), pa.AsUInt16());
Sse2.Store((ushort*)p, diff);
+ int paMinusPb = output[3] + output[2] + output[1] + output[0];
+ return (paMinusPb <= 0) ? a : b;
}
-
- int paMinusPb = output[0] + output[1] + output[2] + output[3];
-
- return (paMinusPb <= 0) ? a : b;
}
else
#endif
diff --git a/tests/ImageSharp.Tests/Formats/WebP/LosslessUtilsTests.cs b/tests/ImageSharp.Tests/Formats/WebP/LosslessUtilsTests.cs
index bf381ebdaa..c70f332ef6 100644
--- a/tests/ImageSharp.Tests/Formats/WebP/LosslessUtilsTests.cs
+++ b/tests/ImageSharp.Tests/Formats/WebP/LosslessUtilsTests.cs
@@ -153,9 +153,55 @@ private static void RunPredictor11Test()
}
}
+ private static void RunPredictor12Test()
+ {
+ // arrange
+ uint[] topData = { 4294844413, 4294779388 };
+ uint left = 4294844413;
+ uint expectedResult = 4294779388;
+
+ // act
+ unsafe
+ {
+ fixed (uint* top = &topData[1])
+ {
+ uint actual = LosslessUtils.Predictor12(left, top);
+
+ // assert
+ Assert.Equal(expectedResult, actual);
+ }
+ }
+ }
+
+ private static void RunPredictor13Test()
+ {
+ // arrange
+ uint[] topData = { 4278193922, 4278193666 };
+ uint left = 4278193410;
+ uint expectedResult = 4278193154;
+
+ // act
+ unsafe
+ {
+ fixed (uint* top = &topData[1])
+ {
+ uint actual = LosslessUtils.Predictor13(left, top);
+
+ // assert
+ Assert.Equal(expectedResult, actual);
+ }
+ }
+ }
+
[Fact]
public void Predictor11_Works() => RunPredictor11Test();
+ [Fact]
+ public void Predictor12_Works() => RunPredictor12Test();
+
+ [Fact]
+ public void Predictor13_Works() => RunPredictor13Test();
+
[Fact]
public void SubtractGreen_Works() => RunSubtractGreenTest();
@@ -175,6 +221,18 @@ private static void RunPredictor11Test()
[Fact]
public void Predictor11_WithoutSSE2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunPredictor11Test, HwIntrinsics.DisableSSE2);
+ [Fact]
+ public void Predictor12_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunPredictor12Test, HwIntrinsics.AllowAll);
+
+ [Fact]
+ public void Predictor12_WithoutSSE2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunPredictor12Test, HwIntrinsics.DisableSSE2);
+
+ [Fact]
+ public void Predictor13_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunPredictor13Test, HwIntrinsics.AllowAll);
+
+ [Fact]
+ public void Predictor13_WithoutSSE2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunPredictor13Test, HwIntrinsics.DisableSSE2);
+
[Fact]
public void SubtractGreen_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunSubtractGreenTest, HwIntrinsics.AllowAll);