Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PNG: Add SSE/AVX version of Sub, Up, Average and Paethfilters #2028

Merged
merged 23 commits into from
Feb 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
747422c
Add Sse2 version of Average png filter
brianpopow Feb 22, 2022
7152088
No need for the scratch buffer for 4 bytes per pixel
brianpopow Feb 22, 2022
18548f7
Dont use intrinsics for 3 bytes per pixel
brianpopow Feb 22, 2022
9eb71a2
Add average filter tests
brianpopow Feb 23, 2022
a921c57
Add benchmark file for average filter with 4bpp
brianpopow Feb 23, 2022
3ab1ba6
Use DisableHWIntrinsic in average filter test
brianpopow Feb 23, 2022
9f3c466
Merge branch 'main' into bp/pngavgsse2
brianpopow Feb 23, 2022
9f32255
Rename average filter tests
brianpopow Feb 23, 2022
4f6b807
Fix average test data
brianpopow Feb 23, 2022
c01001f
Add comment about pixel layout
brianpopow Feb 23, 2022
e1f96f2
Additional png decoder tests for average filter
brianpopow Feb 23, 2022
8716c51
Add SSE2 version of up filter
brianpopow Feb 24, 2022
0c8dbea
Add Avx version of up filter
brianpopow Feb 24, 2022
849e866
Add SSE2 version of sub filter
brianpopow Feb 24, 2022
670105f
Apply suggestions from code review
brianpopow Feb 24, 2022
04219b5
Use nint for offset
brianpopow Feb 24, 2022
8432b9f
Use nint for offset
brianpopow Feb 24, 2022
06843f2
Add SSE version of paeth filter
brianpopow Feb 24, 2022
d9fddd5
Merge branch 'bp/pngupfilter' into bp/pngavgsse2
brianpopow Feb 25, 2022
88b75da
Add tests for png filters with and without intrinsics
brianpopow Feb 25, 2022
26a742e
Additional tests for decoding png's with filter
brianpopow Feb 25, 2022
bdbb9d2
Merge branch 'main' into bp/pngavgsse2
brianpopow Feb 27, 2022
fe06e38
Merge branch 'main' into bp/pngavgsse2
JimBobSquarePants Feb 28, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 62 additions & 7 deletions src/ImageSharp/Formats/Png/Filters/AverageFilter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,21 +20,76 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters
internal static class AverageFilter
{
/// <summary>
/// Decodes the scanline
/// Decodes a scanline, which was filtered with the average filter.
/// </summary>
/// <param name="scanline">The scanline to decode</param>
/// <param name="scanline">The scanline to decode.</param>
/// <param name="previousScanline">The previous scanline.</param>
/// <param name="bytesPerPixel">The bytes per pixel.</param>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static void Decode(Span<byte> scanline, Span<byte> previousScanline, int bytesPerPixel)
{
DebugGuard.MustBeSameSized<byte>(scanline, previousScanline, nameof(scanline));

// The Avg filter predicts each pixel as the (truncated) average of a and b:
// Average(x) + floor((Raw(x-bpp)+Prior(x))/2)
// With pixels positioned like this:
// prev: c b
// row: a d
#if SUPPORTS_RUNTIME_INTRINSICS
if (Sse2.IsSupported && bytesPerPixel is 4)
{
DecodeSse2(scanline, previousScanline);
}
else
#endif
{
DecodeScalar(scanline, previousScanline, bytesPerPixel);
}
}

#if SUPPORTS_RUNTIME_INTRINSICS
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static void DecodeSse2(Span<byte> scanline, Span<byte> previousScanline)
{
ref byte scanBaseRef = ref MemoryMarshal.GetReference(scanline);
ref byte prevBaseRef = ref MemoryMarshal.GetReference(previousScanline);

// Average(x) + floor((Raw(x-bpp)+Prior(x))/2)
int x = 1;
Vector128<byte> d = Vector128<byte>.Zero;
var ones = Vector128.Create((byte)1);

int rb = scanline.Length;
nint offset = 1;
while (rb >= 4)
{
ref byte scanRef = ref Unsafe.Add(ref scanBaseRef, offset);
Vector128<byte> a = d;
Vector128<byte> b = Sse2.ConvertScalarToVector128Int32(Unsafe.As<byte, int>(ref Unsafe.Add(ref prevBaseRef, offset))).AsByte();
d = Sse2.ConvertScalarToVector128Int32(Unsafe.As<byte, int>(ref scanRef)).AsByte();

// PNG requires a truncating average, so we can't just use _mm_avg_epu8,
// but we can fix it up by subtracting off 1 if it rounded up.
Vector128<byte> avg = Sse2.Average(a, b);
Vector128<byte> xor = Sse2.Xor(a, b);
Vector128<byte> and = Sse2.And(xor, ones);
avg = Sse2.Subtract(avg, and);
d = Sse2.Add(d, avg);

// Store the result.
Unsafe.As<byte, int>(ref scanRef) = Sse2.ConvertToInt32(d.AsInt32());

rb -= 4;
offset += 4;
}
}
#endif

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static void DecodeScalar(Span<byte> scanline, Span<byte> previousScanline, int bytesPerPixel)
{
ref byte scanBaseRef = ref MemoryMarshal.GetReference(scanline);
ref byte prevBaseRef = ref MemoryMarshal.GetReference(previousScanline);

nint x = 1;
for (; x <= bytesPerPixel /* Note the <= because x starts at 1 */; ++x)
{
ref byte scan = ref Unsafe.Add(ref scanBaseRef, x);
Expand All @@ -52,13 +107,13 @@ public static void Decode(Span<byte> scanline, Span<byte> previousScanline, int
}

/// <summary>
/// Encodes the scanline
/// Encodes a scanline with the average filter applied.
/// </summary>
/// <param name="scanline">The scanline to encode</param>
/// <param name="scanline">The scanline to encode.</param>
/// <param name="previousScanline">The previous scanline.</param>
/// <param name="result">The filtered scanline result.</param>
/// <param name="bytesPerPixel">The bytes per pixel.</param>
/// <param name="sum">The sum of the total variance of the filtered row</param>
/// <param name="sum">The sum of the total variance of the filtered row.</param>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static void Encode(ReadOnlySpan<byte> scanline, ReadOnlySpan<byte> previousScanline, Span<byte> result, int bytesPerPixel, out int sum)
{
Expand Down
88 changes: 84 additions & 4 deletions src/ImageSharp/Formats/Png/Filters/PaethFilter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,96 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters
internal static class PaethFilter
{
/// <summary>
/// Decodes the scanline
/// Decodes a scanline, which was filtered with the paeth filter.
/// </summary>
/// <param name="scanline">The scanline to decode</param>
/// <param name="scanline">The scanline to decode.</param>
/// <param name="previousScanline">The previous scanline.</param>
/// <param name="bytesPerPixel">The bytes per pixel.</param>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static void Decode(Span<byte> scanline, Span<byte> previousScanline, int bytesPerPixel)
{
DebugGuard.MustBeSameSized<byte>(scanline, previousScanline, nameof(scanline));

// Paeth tries to predict pixel d using the pixel to the left of it, a,
// and two pixels from the previous row, b and c:
// prev: c b
// row: a d
// The Paeth function predicts d to be whichever of a, b, or c is nearest to
// p = a + b - c.
#if SUPPORTS_RUNTIME_INTRINSICS
if (Sse41.IsSupported && bytesPerPixel is 4)
{
DecodeSse41(scanline, previousScanline);
}
else
#endif
{
DecodeScalar(scanline, previousScanline, bytesPerPixel);
}
}

#if SUPPORTS_RUNTIME_INTRINSICS

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static void DecodeSse41(Span<byte> scanline, Span<byte> previousScanline)
{
ref byte scanBaseRef = ref MemoryMarshal.GetReference(scanline);
ref byte prevBaseRef = ref MemoryMarshal.GetReference(previousScanline);

Vector128<byte> b = Vector128<byte>.Zero;
Vector128<byte> d = Vector128<byte>.Zero;

int rb = scanline.Length;
nint offset = 1;
while (rb >= 4)
{
ref byte scanRef = ref Unsafe.Add(ref scanBaseRef, offset);

// It's easiest to do this math (particularly, deal with pc) with 16-bit intermediates.
Vector128<byte> c = b;
Vector128<byte> a = d;
b = Sse2.UnpackLow(
Sse2.ConvertScalarToVector128Int32(Unsafe.As<byte, int>(ref Unsafe.Add(ref prevBaseRef, offset))).AsByte(),
Vector128<byte>.Zero);
d = Sse2.UnpackLow(
Sse2.ConvertScalarToVector128Int32(Unsafe.As<byte, int>(ref scanRef)).AsByte(),
Vector128<byte>.Zero);

// (p-a) == (a+b-c - a) == (b-c)
Vector128<short> pa = Sse2.Subtract(b.AsInt16(), c.AsInt16());

// (p-b) == (a+b-c - b) == (a-c)
Vector128<short> pb = Sse2.Subtract(a.AsInt16(), c.AsInt16());

// (p-c) == (a+b-c - c) == (a+b-c-c) == (b-c)+(a-c)
Vector128<short> pc = Sse2.Add(pa.AsInt16(), pb.AsInt16());

pa = Ssse3.Abs(pa.AsInt16()).AsInt16(); /* |p-a| */
pb = Ssse3.Abs(pb.AsInt16()).AsInt16(); /* |p-b| */
pc = Ssse3.Abs(pc.AsInt16()).AsInt16(); /* |p-c| */

Vector128<short> smallest = Sse2.Min(pc, Sse2.Min(pa, pb));

// Paeth breaks ties favoring a over b over c.
Vector128<byte> mask = Sse41.BlendVariable(c, b, Sse2.CompareEqual(smallest, pb).AsByte());
Vector128<byte> nearest = Sse41.BlendVariable(mask, a, Sse2.CompareEqual(smallest, pa).AsByte());

// Note `_epi8`: we need addition to wrap modulo 255.
d = Sse2.Add(d, nearest);

// Store the result.
Unsafe.As<byte, int>(ref scanRef) = Sse2.ConvertToInt32(Sse2.PackUnsignedSaturate(d.AsInt16(), d.AsInt16()).AsInt32());

rb -= 4;
offset += 4;
}
}

#endif

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static void DecodeScalar(Span<byte> scanline, Span<byte> previousScanline, int bytesPerPixel)
{
ref byte scanBaseRef = ref MemoryMarshal.GetReference(scanline);
ref byte prevBaseRef = ref MemoryMarshal.GetReference(previousScanline);

Expand All @@ -56,13 +136,13 @@ public static void Decode(Span<byte> scanline, Span<byte> previousScanline, int
}

/// <summary>
/// Encodes the scanline
/// Encodes a scanline and applies the paeth filter.
/// </summary>
/// <param name="scanline">The scanline to encode</param>
/// <param name="previousScanline">The previous scanline.</param>
/// <param name="result">The filtered scanline result.</param>
/// <param name="bytesPerPixel">The bytes per pixel.</param>
/// <param name="sum">The sum of the total variance of the filtered row</param>
/// <param name="sum">The sum of the total variance of the filtered row.</param>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static void Encode(ReadOnlySpan<byte> scanline, ReadOnlySpan<byte> previousScanline, Span<byte> result, int bytesPerPixel, out int sum)
{
Expand Down
52 changes: 46 additions & 6 deletions src/ImageSharp/Formats/Png/Filters/SubFilter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -21,17 +21,57 @@ namespace SixLabors.ImageSharp.Formats.Png.Filters
internal static class SubFilter
{
/// <summary>
/// Decodes the scanline
/// Decodes a scanline, which was filtered with the sub filter.
/// </summary>
/// <param name="scanline">The scanline to decode</param>
/// <param name="scanline">The scanline to decode.</param>
/// <param name="bytesPerPixel">The bytes per pixel.</param>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static void Decode(Span<byte> scanline, int bytesPerPixel)
{
// The Sub filter predicts each pixel as the previous pixel.
#if SUPPORTS_RUNTIME_INTRINSICS
if (Sse2.IsSupported && bytesPerPixel is 4)
{
DecodeSse2(scanline);
}
else
#endif
{
DecodeScalar(scanline, bytesPerPixel);
}
}

#if SUPPORTS_RUNTIME_INTRINSICS
private static void DecodeSse2(Span<byte> scanline)
{
ref byte scanBaseRef = ref MemoryMarshal.GetReference(scanline);

Vector128<byte> d = Vector128<byte>.Zero;

int rb = scanline.Length;
nint offset = 1;
while (rb >= 4)
{
ref byte scanRef = ref Unsafe.Add(ref scanBaseRef, offset);
Vector128<byte> a = d;
d = Sse2.ConvertScalarToVector128Int32(Unsafe.As<byte, int>(ref scanRef)).AsByte();

d = Sse2.Add(d, a);

Unsafe.As<byte, int>(ref scanRef) = Sse2.ConvertToInt32(d.AsInt32());

rb -= 4;
offset += 4;
}
}
#endif

private static void DecodeScalar(Span<byte> scanline, int bytesPerPixel)
{
ref byte scanBaseRef = ref MemoryMarshal.GetReference(scanline);

// Sub(x) + Raw(x-bpp)
int x = bytesPerPixel + 1;
nint x = bytesPerPixel + 1;
Unsafe.Add(ref scanBaseRef, x);
for (; x < scanline.Length; ++x)
{
Expand All @@ -42,12 +82,12 @@ public static void Decode(Span<byte> scanline, int bytesPerPixel)
}

/// <summary>
/// Encodes the scanline
/// Encodes a scanline with the sup filter applied.
/// </summary>
/// <param name="scanline">The scanline to encode</param>
/// <param name="scanline">The scanline to encode.</param>
/// <param name="result">The filtered scanline result.</param>
/// <param name="bytesPerPixel">The bytes per pixel.</param>
/// <param name="sum">The sum of the total variance of the filtered row</param>
/// <param name="sum">The sum of the total variance of the filtered row.</param>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static void Encode(ReadOnlySpan<byte> scanline, ReadOnlySpan<byte> result, int bytesPerPixel, out int sum)
{
Expand Down
Loading