Skip to content

Commit

Permalink
Merge pull request #1805 from SixLabors/bp/clampedaddsubtractsse
Browse files Browse the repository at this point in the history
Add SSE2 versions of ClampedAddSubtractFull and ClampedAddSubtractHalf
  • Loading branch information
brianpopow committed Nov 8, 2021
2 parents 94b9962 + 9fa7ac2 commit d8d2616
Show file tree
Hide file tree
Showing 3 changed files with 125 additions and 25 deletions.
8 changes: 8 additions & 0 deletions src/ImageSharp/Formats/Webp/Lossless/ColorCache.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.

using System.Runtime.CompilerServices;

namespace SixLabors.ImageSharp.Formats.Webp.Lossless
{
/// <summary>
Expand Down Expand Up @@ -41,6 +43,7 @@ public void Init(int hashBits)
/// Inserts a new color into the cache.
/// </summary>
/// <param name="bgra">The color to insert.</param>
[MethodImpl(InliningOptions.ShortMethod)]
public void Insert(uint bgra)
{
int key = HashPix(bgra, this.HashShift);
Expand All @@ -52,13 +55,15 @@ public void Insert(uint bgra)
/// </summary>
/// <param name="key">The key to lookup.</param>
/// <returns>The color for the key.</returns>
[MethodImpl(InliningOptions.ShortMethod)]
public uint Lookup(int key) => this.Colors[key];

/// <summary>
/// Returns the index of the given color.
/// </summary>
/// <param name="bgra">The color to check.</param>
/// <returns>The index of the color in the cache or -1 if its not present.</returns>
[MethodImpl(InliningOptions.ShortMethod)]
public int Contains(uint bgra)
{
int key = HashPix(bgra, this.HashShift);
Expand All @@ -70,15 +75,18 @@ public int Contains(uint bgra)
/// </summary>
/// <param name="bgra">The color.</param>
/// <returns>The index for the color.</returns>
[MethodImpl(InliningOptions.ShortMethod)]
public int GetIndex(uint bgra) => HashPix(bgra, this.HashShift);

/// <summary>
/// Adds a new color to the cache.
/// </summary>
/// <param name="key">The key.</param>
/// <param name="bgra">The color to add.</param>
[MethodImpl(InliningOptions.ShortMethod)]
public void Set(uint key, uint bgra) => this.Colors[key] = bgra;

[MethodImpl(InliningOptions.ShortMethod)]
public static int HashPix(uint argb, int shift) => (int)((argb * HashMul) >> shift);
}
}
84 changes: 59 additions & 25 deletions src/ImageSharp/Formats/Webp/Lossless/LosslessUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -765,6 +765,7 @@ public static byte TransformColorBlue(sbyte greenToBlue, sbyte redToBlue, uint a
/// <summary>
/// Fast calculation of log2(v) for integer input.
/// </summary>
[MethodImpl(InliningOptions.ShortMethod)]
public static float FastLog2(uint v) => v < LogLookupIdxMax ? WebpLookupTables.Log2Table[v] : FastLog2Slow(v);

/// <summary>
Expand Down Expand Up @@ -793,7 +794,7 @@ public static void ColorCodeToMultipliers(uint colorCode, ref Vp8LMultipliers m)

private static float FastSLog2Slow(uint v)
{
Guard.MustBeGreaterThanOrEqualTo(v, LogLookupIdxMax, nameof(v));
DebugGuard.MustBeGreaterThanOrEqualTo<uint>(v, LogLookupIdxMax, nameof(v));
if (v < ApproxLogWithCorrectionMax)
{
int logCnt = 0;
Expand Down Expand Up @@ -1214,30 +1215,65 @@ public static uint AddPixels(uint a, uint b)

private static uint ClampedAddSubtractFull(uint c0, uint c1, uint c2)
{
int a = AddSubtractComponentFull(
(int)(c0 >> 24),
(int)(c1 >> 24),
(int)(c2 >> 24));
int r = AddSubtractComponentFull(
(int)((c0 >> 16) & 0xff),
(int)((c1 >> 16) & 0xff),
(int)((c2 >> 16) & 0xff));
int g = AddSubtractComponentFull(
(int)((c0 >> 8) & 0xff),
(int)((c1 >> 8) & 0xff),
(int)((c2 >> 8) & 0xff));
int b = AddSubtractComponentFull((int)(c0 & 0xff), (int)(c1 & 0xff), (int)(c2 & 0xff));
return ((uint)a << 24) | ((uint)r << 16) | ((uint)g << 8) | (uint)b;
#if SUPPORTS_RUNTIME_INTRINSICS
if (Sse2.IsSupported)
{
Vector128<byte> c0Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c0).AsByte(), Vector128<byte>.Zero);
Vector128<byte> c1Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c1).AsByte(), Vector128<byte>.Zero);
Vector128<byte> c2Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c2).AsByte(), Vector128<byte>.Zero);
Vector128<short> v1 = Sse2.Add(c0Vec.AsInt16(), c1Vec.AsInt16());
Vector128<short> v2 = Sse2.Subtract(v1, c2Vec.AsInt16());
Vector128<byte> b = Sse2.PackUnsignedSaturate(v2, v2);
uint output = Sse2.ConvertToUInt32(b.AsUInt32());
return output;
}
#endif
{
int a = AddSubtractComponentFull(
(int)(c0 >> 24),
(int)(c1 >> 24),
(int)(c2 >> 24));
int r = AddSubtractComponentFull(
(int)((c0 >> 16) & 0xff),
(int)((c1 >> 16) & 0xff),
(int)((c2 >> 16) & 0xff));
int g = AddSubtractComponentFull(
(int)((c0 >> 8) & 0xff),
(int)((c1 >> 8) & 0xff),
(int)((c2 >> 8) & 0xff));
int b = AddSubtractComponentFull((int)(c0 & 0xff), (int)(c1 & 0xff), (int)(c2 & 0xff));
return ((uint)a << 24) | ((uint)r << 16) | ((uint)g << 8) | (uint)b;
}
}

private static uint ClampedAddSubtractHalf(uint c0, uint c1, uint c2)
{
uint ave = Average2(c0, c1);
int a = AddSubtractComponentHalf((int)(ave >> 24), (int)(c2 >> 24));
int r = AddSubtractComponentHalf((int)((ave >> 16) & 0xff), (int)((c2 >> 16) & 0xff));
int g = AddSubtractComponentHalf((int)((ave >> 8) & 0xff), (int)((c2 >> 8) & 0xff));
int b = AddSubtractComponentHalf((int)(ave & 0xff), (int)(c2 & 0xff));
return ((uint)a << 24) | ((uint)r << 16) | ((uint)g << 8) | (uint)b;
#if SUPPORTS_RUNTIME_INTRINSICS
if (Sse2.IsSupported)
{
Vector128<byte> c0Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c0).AsByte(), Vector128<byte>.Zero);
Vector128<byte> c1Vec = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c1).AsByte(), Vector128<byte>.Zero);
Vector128<byte> b0 = Sse2.UnpackLow(Sse2.ConvertScalarToVector128UInt32(c2).AsByte(), Vector128<byte>.Zero);
Vector128<short> avg = Sse2.Add(c1Vec.AsInt16(), c0Vec.AsInt16());
Vector128<short> a0 = Sse2.ShiftRightLogical(avg, 1);
Vector128<short> a1 = Sse2.Subtract(a0, b0.AsInt16());
Vector128<short> bgta = Sse2.CompareGreaterThan(b0.AsInt16(), a0.AsInt16());
Vector128<short> a2 = Sse2.Subtract(a1, bgta);
Vector128<short> a3 = Sse2.ShiftRightArithmetic(a2, 1);
Vector128<short> a4 = Sse2.Add(a0, a3).AsInt16();
Vector128<byte> a5 = Sse2.PackUnsignedSaturate(a4, a4);
uint output = Sse2.ConvertToUInt32(a5.AsUInt32());
return output;
}
#endif
{
uint ave = Average2(c0, c1);
int a = AddSubtractComponentHalf((int)(ave >> 24), (int)(c2 >> 24));
int r = AddSubtractComponentHalf((int)((ave >> 16) & 0xff), (int)((c2 >> 16) & 0xff));
int g = AddSubtractComponentHalf((int)((ave >> 8) & 0xff), (int)((c2 >> 8) & 0xff));
int b = AddSubtractComponentHalf((int)(ave & 0xff), (int)(c2 & 0xff));
return ((uint)a << 24) | ((uint)r << 16) | ((uint)g << 8) | (uint)b;
}
}

[MethodImpl(InliningOptions.ShortMethod)]
Expand Down Expand Up @@ -1275,11 +1311,9 @@ private static uint Select(uint a, uint b, uint c, Span<short> scratch)
Vector128<byte> pb = Sse2.UnpackLow(bc, Vector128<byte>.Zero); // |b - c|
Vector128<ushort> diff = Sse2.Subtract(pb.AsUInt16(), pa.AsUInt16());
Sse2.Store((ushort*)p, diff);
int paMinusPb = output[3] + output[2] + output[1] + output[0];
return (paMinusPb <= 0) ? a : b;
}

int paMinusPb = output[0] + output[1] + output[2] + output[3];

return (paMinusPb <= 0) ? a : b;
}
else
#endif
Expand Down
58 changes: 58 additions & 0 deletions tests/ImageSharp.Tests/Formats/WebP/LosslessUtilsTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -153,9 +153,55 @@ private static void RunPredictor11Test()
}
}

private static void RunPredictor12Test()
{
// arrange
uint[] topData = { 4294844413, 4294779388 };
uint left = 4294844413;
uint expectedResult = 4294779388;

// act
unsafe
{
fixed (uint* top = &topData[1])
{
uint actual = LosslessUtils.Predictor12(left, top);

// assert
Assert.Equal(expectedResult, actual);
}
}
}

private static void RunPredictor13Test()
{
// arrange
uint[] topData = { 4278193922, 4278193666 };
uint left = 4278193410;
uint expectedResult = 4278193154;

// act
unsafe
{
fixed (uint* top = &topData[1])
{
uint actual = LosslessUtils.Predictor13(left, top);

// assert
Assert.Equal(expectedResult, actual);
}
}
}

[Fact]
public void Predictor11_Works() => RunPredictor11Test();

[Fact]
public void Predictor12_Works() => RunPredictor12Test();

[Fact]
public void Predictor13_Works() => RunPredictor13Test();

[Fact]
public void SubtractGreen_Works() => RunSubtractGreenTest();

Expand All @@ -175,6 +221,18 @@ private static void RunPredictor11Test()
[Fact]
public void Predictor11_WithoutSSE2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunPredictor11Test, HwIntrinsics.DisableSSE2);

[Fact]
public void Predictor12_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunPredictor12Test, HwIntrinsics.AllowAll);

[Fact]
public void Predictor12_WithoutSSE2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunPredictor12Test, HwIntrinsics.DisableSSE2);

[Fact]
public void Predictor13_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunPredictor13Test, HwIntrinsics.AllowAll);

[Fact]
public void Predictor13_WithoutSSE2_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunPredictor13Test, HwIntrinsics.DisableSSE2);

[Fact]
public void SubtractGreen_WithHardwareIntrinsics_Works() => FeatureTestRunner.RunWithHwIntrinsicsFeature(RunSubtractGreenTest, HwIntrinsics.AllowAll);

Expand Down

0 comments on commit d8d2616

Please sign in to comment.