diff --git a/src/ImageSharp/Formats/Webp/Lossless/ColorCache.cs b/src/ImageSharp/Formats/Webp/Lossless/ColorCache.cs index 8596d85558..02bbc38fcf 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/ColorCache.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/ColorCache.cs @@ -1,6 +1,8 @@ // Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. +using System.Runtime.CompilerServices; + namespace SixLabors.ImageSharp.Formats.Webp.Lossless { /// @@ -41,6 +43,7 @@ public void Init(int hashBits) /// Inserts a new color into the cache. /// /// The color to insert. + [MethodImpl(InliningOptions.ShortMethod)] public void Insert(uint bgra) { int key = HashPix(bgra, this.HashShift); @@ -52,6 +55,7 @@ public void Insert(uint bgra) /// /// The key to lookup. /// The color for the key. + [MethodImpl(InliningOptions.ShortMethod)] public uint Lookup(int key) => this.Colors[key]; /// @@ -59,6 +63,7 @@ public void Insert(uint bgra) /// /// The color to check. /// The index of the color in the cache or -1 if its not present. + [MethodImpl(InliningOptions.ShortMethod)] public int Contains(uint bgra) { int key = HashPix(bgra, this.HashShift); @@ -70,6 +75,7 @@ public int Contains(uint bgra) /// /// The color. /// The index for the color. + [MethodImpl(InliningOptions.ShortMethod)] public int GetIndex(uint bgra) => HashPix(bgra, this.HashShift); /// @@ -77,8 +83,10 @@ public int Contains(uint bgra) /// /// The key. /// The color to add. + [MethodImpl(InliningOptions.ShortMethod)] public void Set(uint key, uint bgra) => this.Colors[key] = bgra; + [MethodImpl(InliningOptions.ShortMethod)] public static int HashPix(uint argb, int shift) => (int)((argb * HashMul) >> shift); } } diff --git a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs index 8231464070..f9b97c6c44 100644 --- a/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs +++ b/src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs @@ -765,6 +765,7 @@ public static byte TransformColorBlue(sbyte greenToBlue, sbyte redToBlue, uint a /// /// Fast calculation of log2(v) for integer input. /// + [MethodImpl(InliningOptions.ShortMethod)] public static float FastLog2(uint v) => v < LogLookupIdxMax ? WebpLookupTables.Log2Table[v] : FastLog2Slow(v); /// @@ -793,7 +794,7 @@ public static void ColorCodeToMultipliers(uint colorCode, ref Vp8LMultipliers m) private static float FastSLog2Slow(uint v) { - Guard.MustBeGreaterThanOrEqualTo(v, LogLookupIdxMax, nameof(v)); + DebugGuard.MustBeGreaterThanOrEqualTo(v, LogLookupIdxMax, nameof(v)); if (v < ApproxLogWithCorrectionMax) { int logCnt = 0; @@ -1214,30 +1215,65 @@ public static uint AddPixels(uint a, uint b) private static uint ClampedAddSubtractFull(uint c0, uint c1, uint c2) { - int a = AddSubtractComponentFull( - (int)(c0 >> 24), - (int)(c1 >> 24), - (int)(c2 >> 24)); - int r = AddSubtractComponentFull( - (int)((c0 >> 16) & 0xff), - (int)((c1 >> 16) & 0xff), - (int)((c2 >> 16) & 0xff)); - int g = AddSubtractComponentFull( - (int)((c0 >> 8) & 0xff), - (int)((c1 >> 8) & 0xff), - (int)((c2 >> 8) & 0xff)); - int b = AddSubtractComponentFull((int)(c0 & 0xff), (int)(c1 & 0xff), (int)(c2 & 0xff)); - return ((uint)a << 24) | ((uint)r << 16) | ((uint)g << 8) | (uint)b; +#if SUPPORTS_RUNTIME_INTRINSICS + if (Sse2.IsSupported) + { + Vector128 c0Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c0).AsByte(), Vector128.Zero); + Vector128 c1Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c1).AsByte(), Vector128.Zero); + Vector128 c2Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c2).AsByte(), Vector128.Zero); + Vector128 v1 = Sse2.Add(c0Vec.AsInt16(), c1Vec.AsInt16()); + Vector128 v2 = Sse2.Subtract(v1, c2Vec.AsInt16()); + Vector128 b = Sse2.PackUnsignedSaturate(v2, v2); + uint output = Sse2.ConvertToUInt32(b.AsUInt32()); + return output; + } +#endif + { + int a = AddSubtractComponentFull( + (int)(c0 >> 24), + (int)(c1 >> 24), + (int)(c2 >> 24)); + int r = AddSubtractComponentFull( + (int)((c0 >> 16) & 0xff), + (int)((c1 >> 16) & 0xff), + (int)((c2 >> 16) & 0xff)); + int g = AddSubtractComponentFull( + (int)((c0 >> 8) & 0xff), + (int)((c1 >> 8) & 0xff), + (int)((c2 >> 8) & 0xff)); + int b = AddSubtractComponentFull((int)(c0 & 0xff), (int)(c1 & 0xff), (int)(c2 & 0xff)); + return ((uint)a << 24) | ((uint)r << 16) | ((uint)g << 8) | (uint)b; + } } private static uint ClampedAddSubtractHalf(uint c0, uint c1, uint c2) { - uint ave = Average2(c0, c1); - int a = AddSubtractComponentHalf((int)(ave >> 24), (int)(c2 >> 24)); - int r = AddSubtractComponentHalf((int)((ave >> 16) & 0xff), (int)((c2 >> 16) & 0xff)); - int g = AddSubtractComponentHalf((int)((ave >> 8) & 0xff), (int)((c2 >> 8) & 0xff)); - int b = AddSubtractComponentHalf((int)(ave & 0xff), (int)(c2 & 0xff)); - return ((uint)a << 24) | ((uint)r << 16) | ((uint)g << 8) | (uint)b; +#if SUPPORTS_RUNTIME_INTRINSICS + if (Sse2.IsSupported) + { + Vector128 c0Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c0).AsByte(), Vector128.Zero); + Vector128 c1Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c1).AsByte(), Vector128.Zero); + Vector128 b0 = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c2).AsByte(), Vector128.Zero); + Vector128 avg = Sse2.Add(c1Vec.AsInt16(), c0Vec.AsInt16()); + Vector128 a0 = Sse2.ShiftRightLogical(avg, 1); + Vector128 a1 = Sse2.Subtract(a0, b0.AsInt16()); + Vector128 bgta = Sse2.CompareGreaterThan(b0.AsInt16(), a0.AsInt16()); + Vector128 a2 = Sse2.Subtract(a1, bgta); + Vector128 a3 = Sse2.ShiftRightArithmetic(a2, 1); + Vector128 a4 = Sse2.Add(a0, a3).AsInt16(); + Vector128 a5 = Sse2.PackUnsignedSaturate(a4, a4); + uint output = Sse2.ConvertToUInt32(a5.AsUInt32()); + return output; + } +#endif + { + uint ave = Average2(c0, c1); + int a = AddSubtractComponentHalf((int)(ave >> 24), (int)(c2 >> 24)); + int r = AddSubtractComponentHalf((int)((ave >> 16) & 0xff), (int)((c2 >> 16) & 0xff)); + int g = AddSubtractComponentHalf((int)((ave >> 8) & 0xff), (int)((c2 >> 8) & 0xff)); + int b = AddSubtractComponentHalf((int)(ave & 0xff), (int)(c2 & 0xff)); + return ((uint)a << 24) | ((uint)r << 16) | ((uint)g << 8) | (uint)b; + } } [MethodImpl(InliningOptions.ShortMethod)] @@ -1275,11 +1311,9 @@ private static uint Select(uint a, uint b, uint c, Span scratch) Vector128 pb = Sse2.UnpackLow(bc, Vector128.Zero); // |b - c| Vector128 diff = Sse2.Subtract(pb.AsUInt16(), pa.AsUInt16()); Sse2.Store((ushort*)p, diff); + int paMinusPb = output[3] + output[2] + output[1] + output[0]; + return (paMinusPb <= 0) ? a : b; } - - int paMinusPb = output[0] + output[1] + output[2] + output[3]; - - return (paMinusPb <= 0) ? a : b; } else #endif diff --git a/tests/ImageSharp.Tests/Formats/WebP/LosslessUtilsTests.cs b/tests/ImageSharp.Tests/Formats/WebP/LosslessUtilsTests.cs index bf381ebdaa..c70f332ef6 100644 --- a/tests/ImageSharp.Tests/Formats/WebP/LosslessUtilsTests.cs +++ b/tests/ImageSharp.Tests/Formats/WebP/LosslessUtilsTests.cs @@ -153,9 +153,55 @@ private static void RunPredictor11Test() } } + private static void RunPredictor12Test() + { + // arrange + uint[] topData = { 4294844413, 4294779388 }; + uint left = 4294844413; + uint expectedResult = 4294779388; + + // act + unsafe + { + fixed (uint* top = &topData[1]) + { + uint actual = LosslessUtils.Predictor12(left, top); + + // assert + Assert.Equal(expectedResult, actual); + } + } + } + + private static void RunPredictor13Test() + { + // arrange + uint[] topData = { 4278193922, 4278193666 }; + uint left = 4278193410; + uint expectedResult = 4278193154; + + // act + unsafe + { + fixed (uint* top = &topData[1]) + { + uint actual = LosslessUtils.Predictor13(left, top); + + // assert + Assert.Equal(expectedResult, actual); + } + } + } + [Fact] public void Predictor11_Works() => RunPredictor11Test(); + [Fact] + public void Predictor12_Works() => RunPredictor12Test(); + + [Fact] + public void Predictor13_Works() => RunPredictor13Test(); + [Fact] public void SubtractGreen_Works() => RunSubtractGreenTest(); @@ -175,6 +221,18 @@ private static void RunPredictor11Test() [Fact] public void Predictor11_WithoutSSE2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunPredictor11Test, HwIntrinsics.DisableSSE2); + [Fact] + public void Predictor12_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunPredictor12Test, HwIntrinsics.AllowAll); + + [Fact] + public void Predictor12_WithoutSSE2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunPredictor12Test, HwIntrinsics.DisableSSE2); + + [Fact] + public void Predictor13_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunPredictor13Test, HwIntrinsics.AllowAll); + + [Fact] + public void Predictor13_WithoutSSE2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunPredictor13Test, HwIntrinsics.DisableSSE2); + [Fact] public void SubtractGreen_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunSubtractGreenTest, HwIntrinsics.AllowAll);