Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AVX conversions for Luminance and Rgb #2039

Merged
merged 4 commits into from
Mar 8, 2022
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,13 @@
// Licensed under the Apache License, Version 2.0.

using System;
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
#endif
using SixLabors.ImageSharp.Advanced;
using SixLabors.ImageSharp.PixelFormats;

Expand Down Expand Up @@ -74,6 +79,44 @@ public void Convert(int x, int y, ref RowOctet<TPixel> currentRows)
ref Block8x8F yBlock = ref this.Y;
ref L8 l8Start = ref MemoryMarshal.GetReference(this.l8Span);

if (RgbToYCbCrConverterVectorized.IsSupported)
{
ConvertAvx(ref l8Start, ref yBlock);
}
else
{
ConvertScalar(ref l8Start, ref yBlock);
}
}

/// <summary>
/// Converts 8x8 L8 pixel matrix to 8x8 Block of floats using Avx2 Intrinsics.
/// </summary>
/// <param name="l8Start">Start of span of L8 pixels with size of 64</param>
/// <param name="yBlock">8x8 destination matrix of Luminance(Y) converted data</param>
private static void ConvertAvx(ref L8 l8Start, ref Block8x8F yBlock)
{
Debug.Assert(RgbToYCbCrConverterVectorized.IsSupported, "AVX2 is required to run this converter");

#if SUPPORTS_RUNTIME_INTRINSICS
ref Vector128<byte> l8ByteSpan = ref Unsafe.As<L8, Vector128<byte>>(ref l8Start);
ref Vector256<float> destRef = ref yBlock.V0;

const int bytesPerL8Stride = 8;
for (nint i = 0; i < 8; i++)
{
Unsafe.Add(ref destRef, i) = Avx2.ConvertToVector256Single(Avx2.ConvertToVector256Int32(Unsafe.AddByteOffset(ref l8ByteSpan, bytesPerL8Stride * i)));
}
#endif
}

/// <summary>
/// Converts 8x8 L8 pixel matrix to 8x8 Block of floats.
/// </summary>
/// <param name="l8Start">Start of span of L8 pixels with size of 64</param>
/// <param name="yBlock">8x8 destination matrix of Luminance(Y) converted data</param>
private static void ConvertScalar(ref L8 l8Start, ref Block8x8F yBlock)
{
for (int i = 0; i < Block8x8F.Size; i++)
{
ref L8 c = ref Unsafe.Add(ref l8Start, i);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,13 @@
// Licensed under the Apache License, Version 2.0.

using System;
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if SUPPORTS_RUNTIME_INTRINSICS
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
#endif
using SixLabors.ImageSharp.Advanced;
using SixLabors.ImageSharp.PixelFormats;

Expand Down Expand Up @@ -94,10 +99,56 @@ public void Convert(int x, int y, ref RowOctet<TPixel> currentRows)
ref Block8x8F greenBlock = ref this.G;
ref Block8x8F blueBlock = ref this.B;

CopyToBlock(this.rgbSpan, ref redBlock, ref greenBlock, ref blueBlock);
if (RgbToYCbCrConverterVectorized.IsSupported)
{
ConvertAvx(this.rgbSpan, ref redBlock, ref greenBlock, ref blueBlock);
}
else
{
ConvertScalar(this.rgbSpan, ref redBlock, ref greenBlock, ref blueBlock);
}
}

/// <summary>
/// Converts 8x8 RGB24 pixel matrix to 8x8 Block of floats using Avx2 Intrinsics.
/// </summary>
/// <param name="rgbSpan">Span of Rgb24 pixels with size of 64</param>
/// <param name="rBlock">8x8 destination matrix of Red converted data</param>
/// <param name="gBlock">8x8 destination matrix of Blue converted data</param>
/// <param name="bBlock">8x8 destination matrix of Green converted data</param>
private static void ConvertAvx(Span<Rgb24> rgbSpan, ref Block8x8F rBlock, ref Block8x8F gBlock, ref Block8x8F bBlock)
{
Debug.Assert(RgbToYCbCrConverterVectorized.IsSupported, "AVX2 is required to run this converter");

#if SUPPORTS_RUNTIME_INTRINSICS
ref Vector256<byte> rgbByteSpan = ref Unsafe.As<Rgb24, Vector256<byte>>(ref MemoryMarshal.GetReference(rgbSpan));
ref Vector256<float> redRef = ref rBlock.V0;
ref Vector256<float> greenRef = ref gBlock.V0;
ref Vector256<float> blueRef = ref bBlock.V0;
var zero = Vector256.Create(0).AsByte();

var extractToLanesMask = Unsafe.As<byte, Vector256<uint>>(ref MemoryMarshal.GetReference(RgbToYCbCrConverterVectorized.MoveFirst24BytesToSeparateLanes));
var extractRgbMask = Unsafe.As<byte, Vector256<byte>>(ref MemoryMarshal.GetReference(RgbToYCbCrConverterVectorized.ExtractRgb));
Vector256<byte> rgb, rg, bx;

const int bytesPerRgbStride = 24;
for (nint i = 0; i < 8; i++)
{
rgb = Avx2.PermuteVar8x32(Unsafe.AddByteOffset(ref rgbByteSpan, bytesPerRgbStride * i).AsUInt32(), extractToLanesMask).AsByte();

rgb = Avx2.Shuffle(rgb, extractRgbMask);

rg = Avx2.UnpackLow(rgb, zero);
bx = Avx2.UnpackHigh(rgb, zero);

Unsafe.Add(ref redRef, i) = Avx.ConvertToVector256Single(Avx2.UnpackLow(rg, zero).AsInt32());
Unsafe.Add(ref greenRef, i) = Avx.ConvertToVector256Single(Avx2.UnpackHigh(rg, zero).AsInt32());
Unsafe.Add(ref blueRef, i) = Avx.ConvertToVector256Single(Avx2.UnpackLow(bx, zero).AsInt32());
}
#endif
}

private static void CopyToBlock(Span<Rgb24> rgbSpan, ref Block8x8F redBlock, ref Block8x8F greenBlock, ref Block8x8F blueBlock)
private static void ConvertScalar(Span<Rgb24> rgbSpan, ref Block8x8F redBlock, ref Block8x8F greenBlock, ref Block8x8F blueBlock)
{
ref Rgb24 rgbStart = ref MemoryMarshal.GetReference(rgbSpan);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,13 +60,13 @@ public static int AvxCompatibilityPadding

#if SUPPORTS_RUNTIME_INTRINSICS

private static ReadOnlySpan<byte> MoveFirst24BytesToSeparateLanes => new byte[]
internal static ReadOnlySpan<byte> MoveFirst24BytesToSeparateLanes => new byte[]
{
0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 6, 0, 0, 0,
3, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 7, 0, 0, 0
};

private static ReadOnlySpan<byte> ExtractRgb => new byte[]
internal static ReadOnlySpan<byte> ExtractRgb => new byte[]
{
0, 3, 6, 9, 1, 4, 7, 10, 2, 5, 8, 11, 0xFF, 0xFF, 0xFF, 0xFF,
0, 3, 6, 9, 1, 4, 7, 10, 2, 5, 8, 11, 0xFF, 0xFF, 0xFF, 0xFF
Expand Down
67 changes: 47 additions & 20 deletions tests/ImageSharp.Benchmarks/Codecs/Jpeg/EncodeJpeg.cs
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,11 @@ public class EncodeJpeg

// ImageSharp
private Image<Rgba32> bmpCore;
private Image<L8> bmpLuminance;
private JpegEncoder encoder400;
private JpegEncoder encoder420;
private JpegEncoder encoder444;
private JpegEncoder encoderRgb;

private MemoryStream destinationStream;

Expand All @@ -40,8 +43,11 @@ public void ReadImages()

this.bmpCore = Image.Load<Rgba32>(this.bmpStream);
this.bmpCore.Metadata.ExifProfile = null;
this.bmpLuminance = this.bmpCore.CloneAs<L8>();
this.encoder400 = new JpegEncoder { Quality = this.Quality, ColorType = JpegColorType.Luminance };
this.encoder420 = new JpegEncoder { Quality = this.Quality, ColorType = JpegColorType.YCbCrRatio420 };
this.encoder444 = new JpegEncoder { Quality = this.Quality, ColorType = JpegColorType.YCbCrRatio444 };
this.encoderRgb = new JpegEncoder { Quality = this.Quality, ColorType = JpegColorType.Rgb };

this.bmpStream.Position = 0;
this.bmpDrawing = SDImage.FromStream(this.bmpStream);
Expand Down Expand Up @@ -79,6 +85,14 @@ public void JpegSystemDrawing()
this.destinationStream.Seek(0, SeekOrigin.Begin);
}

[Benchmark(Description = "ImageSharp (greyscale) Jpeg 4:0:0")]
public void JpegCore400()
{
this.bmpLuminance.SaveAsJpeg(this.destinationStream, this.encoder400);
this.destinationStream.Seek(0, SeekOrigin.Begin);
}


[Benchmark(Description = "ImageSharp Jpeg 4:2:0")]
public void JpegCore420()
{
Expand All @@ -93,6 +107,13 @@ public void JpegCore444()
this.destinationStream.Seek(0, SeekOrigin.Begin);
}

[Benchmark(Description = "ImageSharp Jpeg rgb")]
public void JpegRgb()
{
this.bmpCore.SaveAsJpeg(this.destinationStream, this.encoderRgb);
this.destinationStream.Seek(0, SeekOrigin.Begin);
}

// https://docs.microsoft.com/en-us/dotnet/api/system.drawing.imaging.encoderparameter?redirectedfrom=MSDN&view=net-5.0
private static ImageCodecInfo GetEncoder(ImageFormat format)
{
Expand All @@ -111,24 +132,30 @@ private static ImageCodecInfo GetEncoder(ImageFormat format)
}

/*
BenchmarkDotNet=v0.13.0, OS=Windows 10.0.19042
Intel Core i7-6700K CPU 4.00GHz (Skylake), 1 CPU, 8 logical and 4 physical cores
.NET SDK=6.0.100-preview.3.21202.5
[Host] : .NET Core 3.1.18 (CoreCLR 4.700.21.35901, CoreFX 4.700.21.36305), X64 RyuJIT
DefaultJob : .NET Core 3.1.18 (CoreCLR 4.700.21.35901, CoreFX 4.700.21.36305), X64 RyuJIT


| Method | Quality | Mean | Error | StdDev | Ratio |
|---------------------------- |-------- |---------:|---------:|---------:|------:|
| 'System.Drawing Jpeg 4:2:0' | 75 | 30.04 ms | 0.540 ms | 0.479 ms | 1.00 |
| 'ImageSharp Jpeg 4:2:0' | 75 | 19.32 ms | 0.290 ms | 0.257 ms | 0.64 |
| 'ImageSharp Jpeg 4:4:4' | 75 | 26.76 ms | 0.332 ms | 0.294 ms | 0.89 |
| | | | | | |
| 'System.Drawing Jpeg 4:2:0' | 90 | 32.82 ms | 0.184 ms | 0.163 ms | 1.00 |
| 'ImageSharp Jpeg 4:2:0' | 90 | 25.00 ms | 0.408 ms | 0.361 ms | 0.76 |
| 'ImageSharp Jpeg 4:4:4' | 90 | 31.83 ms | 0.636 ms | 0.595 ms | 0.97 |
| | | | | | |
| 'System.Drawing Jpeg 4:2:0' | 100 | 39.30 ms | 0.359 ms | 0.318 ms | 1.00 |
| 'ImageSharp Jpeg 4:2:0' | 100 | 34.49 ms | 0.265 ms | 0.235 ms | 0.88 |
| 'ImageSharp Jpeg 4:4:4' | 100 | 56.40 ms | 0.565 ms | 0.501 ms | 1.44 |
BenchmarkDotNet=v0.13.0, OS=linuxmint 20.3
AMD Ryzen 7 5800X, 1 CPU, 16 logical and 8 physical cores
.NET SDK=6.0.200
[Host] : .NET Core 3.1.22 (CoreCLR 4.700.21.56803, CoreFX 4.700.21.57101), X64 RyuJIT
DefaultJob : .NET Core 3.1.22 (CoreCLR 4.700.21.56803, CoreFX 4.700.21.57101), X64 RyuJIT


| Method | Quality | Mean | Error | StdDev | Ratio | RatioSD |
|------------------------------------ |-------- |----------:|----------:|----------:|------:|--------:|
| 'System.Drawing Jpeg 4:2:0' | 75 | 9.157 ms | 0.0138 ms | 0.0123 ms | 1.00 | 0.00 |
| 'ImageSharp (greyscale) Jpeg 4:0:0' | 75 | 12.142 ms | 0.1321 ms | 0.1236 ms | 1.33 | 0.01 |
| 'ImageSharp Jpeg 4:2:0' | 75 | 19.655 ms | 0.1057 ms | 0.0883 ms | 2.15 | 0.01 |
| 'ImageSharp Jpeg 4:4:4' | 75 | 19.157 ms | 0.2852 ms | 0.2668 ms | 2.09 | 0.03 |
| 'ImageSharp Jpeg rgb' | 75 | 26.404 ms | 0.3803 ms | 0.3557 ms | 2.89 | 0.04 |
| | | | | | | |
| 'System.Drawing Jpeg 4:2:0' | 90 | 10.828 ms | 0.0727 ms | 0.0680 ms | 1.00 | 0.00 |
| 'ImageSharp (greyscale) Jpeg 4:0:0' | 90 | 14.918 ms | 0.1089 ms | 0.1019 ms | 1.38 | 0.01 |
| 'ImageSharp Jpeg 4:2:0' | 90 | 23.718 ms | 0.0301 ms | 0.0267 ms | 2.19 | 0.02 |
| 'ImageSharp Jpeg 4:4:4' | 90 | 23.857 ms | 0.2387 ms | 0.2233 ms | 2.20 | 0.03 |
| 'ImageSharp Jpeg rgb' | 90 | 34.700 ms | 0.2207 ms | 0.2064 ms | 3.20 | 0.03 |
| | | | | | | |
| 'System.Drawing Jpeg 4:2:0' | 100 | 13.478 ms | 0.0054 ms | 0.0048 ms | 1.00 | 0.00 |
| 'ImageSharp (greyscale) Jpeg 4:0:0' | 100 | 19.446 ms | 0.0803 ms | 0.0751 ms | 1.44 | 0.01 |
| 'ImageSharp Jpeg 4:2:0' | 100 | 30.339 ms | 0.4578 ms | 0.4282 ms | 2.25 | 0.03 |
| 'ImageSharp Jpeg 4:4:4' | 100 | 39.056 ms | 0.1779 ms | 0.1664 ms | 2.90 | 0.01 |
| 'ImageSharp Jpeg rgb' | 100 | 51.828 ms | 0.3336 ms | 0.3121 ms | 3.85 | 0.02 |
*/