Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix issues with envelope deserialization #1965

Merged
merged 6 commits into from
Oct 6, 2022
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
- Ignore null Context values ([#1942](https://github.com/getsentry/sentry-dotnet/pull/1942))
- Tags should not differ based on current culture ([#1949](https://github.com/getsentry/sentry-dotnet/pull/1949))
- Always recalculate payload length ([#1957](https://github.com/getsentry/sentry-dotnet/pull/1957))
- Fix issues with envelope deserialization ([#1965](https://github.com/getsentry/sentry-dotnet/pull/1965))

## 3.21.0

Expand Down
70 changes: 56 additions & 14 deletions src/Sentry/Internal/Extensions/StreamExtensions.cs
Original file line number Diff line number Diff line change
@@ -1,23 +1,66 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Runtime.CompilerServices;
using System.Threading;
using System.Threading.Tasks;

namespace Sentry.Internal.Extensions
{
internal static class StreamExtensions
{
public static async IAsyncEnumerable<byte> ReadAllBytesAsync(
public static async Task<byte[]> ReadLineAsync(
this Stream stream,
[EnumeratorCancellation] CancellationToken cancellationToken = default)
CancellationToken cancellationToken = default)
{
// This approach avoids reading one byte at a time.

const int size = 128;
using var buffer = new PooledBuffer<byte>(size);

using var result = new MemoryStream(capacity: size);

var overreach = 0;
var found = false;
while (!found)
{
var bytesRead = await stream.ReadAsync(buffer.Array, 0, size, cancellationToken).ConfigureAwait(false);
if (bytesRead <= 0)
{
break;
}

for (var i = 0; i < bytesRead; i++)
{
if (buffer.Array[i] != '\n')
{
continue;
}

found = true;
overreach = bytesRead - i - 1;
bytesRead = i;
break;
}

result.Write(buffer.Array, 0, bytesRead);
}

stream.Position -= overreach;
return result.ToArray();
}

public static async Task SkipNewlinesAsync(this Stream stream, CancellationToken cancellationToken = default)
{
// We probably have very few newline characters to skip, so reading one byte at a time is fine here.

using var buffer = new PooledBuffer<byte>(1);

while (await stream.ReadAsync(buffer.Array, 0, 1, cancellationToken).ConfigureAwait(false) > 0)
{
yield return buffer.Array[0];
if (buffer.Array[0] != '\n')
{
stream.Position--;
return;
}
}
}

Expand All @@ -38,16 +81,15 @@ public static async Task<byte[]> ReadByteChunkAsync(
return result;
}

public static async Task WriteByteAsync(
this Stream stream,
byte value,
CancellationToken cancellationToken = default)
{
using var buffer = new PooledBuffer<byte>(1);
buffer.Array[0] = value;
// pre-creating this buffer leads to an optimized path when writing
private static readonly byte[] NewlineBuffer = {(byte)'\n'};

await stream.WriteAsync(buffer.Array, 0, 1, cancellationToken).ConfigureAwait(false);
}
public static async Task WriteNewlineAsync(this Stream stream, CancellationToken cancellationToken = default) =>
#pragma warning disable CA1835 // the byte-array implementation of WriteAsync is more direct than using ReadOnlyMemory<byte>
await stream.WriteAsync(NewlineBuffer, 0, 1, cancellationToken).ConfigureAwait(false);
mattjohnsonpint marked this conversation as resolved.
Show resolved Hide resolved
#pragma warning restore CA1835

public static void WriteNewline(this Stream stream) => stream.Write(NewlineBuffer, 0, 1);

public static long? TryGetLength(this Stream stream)
{
Expand Down
25 changes: 6 additions & 19 deletions src/Sentry/Protocol/Envelopes/Envelope.cs
Original file line number Diff line number Diff line change
Expand Up @@ -97,13 +97,13 @@ internal async Task SerializeAsync(
{
// Header
await SerializeHeaderAsync(stream, logger, clock, cancellationToken).ConfigureAwait(false);
await stream.WriteByteAsync((byte)'\n', cancellationToken).ConfigureAwait(false);
await stream.WriteNewlineAsync(cancellationToken).ConfigureAwait(false);

// Items
foreach (var item in Items)
{
await item.SerializeAsync(stream, logger, cancellationToken).ConfigureAwait(false);
await stream.WriteByteAsync((byte)'\n', cancellationToken).ConfigureAwait(false);
await stream.WriteNewlineAsync(cancellationToken).ConfigureAwait(false);
}
}

Expand All @@ -115,13 +115,13 @@ internal void Serialize(Stream stream, IDiagnosticLogger? logger, ISystemClock c
{
// Header
SerializeHeader(stream, logger, clock);
stream.WriteByte((byte)'\n');
stream.WriteNewline();

// Items
foreach (var item in Items)
{
item.Serialize(stream, logger);
stream.WriteByte((byte)'\n');
stream.WriteNewline();
}
}

Expand Down Expand Up @@ -277,23 +277,10 @@ internal static Envelope FromClientReport(ClientReport clientReport)
Stream stream,
CancellationToken cancellationToken = default)
{
var buffer = new List<byte>();

var prevByte = default(int);
await foreach (var curByte in stream.ReadAllBytesAsync(cancellationToken).ConfigureAwait(false))
{
// Break if found an unescaped newline
if (curByte == '\n' && prevByte != '\\')
{
break;
}

buffer.Add(curByte);
prevByte = curByte;
}
var buffer = await stream.ReadLineAsync(cancellationToken).ConfigureAwait(false);

var header =
Json.Parse(buffer.ToArray(), JsonExtensions.GetDictionaryOrNull)
Json.Parse(buffer, JsonExtensions.GetDictionaryOrNull)
?? throw new InvalidOperationException("Envelope header is malformed.");

// The sent_at header should not be included in the result
Expand Down
49 changes: 10 additions & 39 deletions src/Sentry/Protocol/Envelopes/EnvelopeItem.cs
Original file line number Diff line number Diff line change
Expand Up @@ -82,15 +82,6 @@ public EnvelopeItem(IReadOnlyDictionary<string, object?> header, ISerializable p
/// <returns>The file name or null.</returns>
public string? TryGetFileName() => Header.GetValueOrDefault(FileNameKey) as string;

private async Task<MemoryStream> BufferPayloadAsync(IDiagnosticLogger? logger, CancellationToken cancellationToken)
{
var buffer = new MemoryStream();
await Payload.SerializeAsync(buffer, logger, cancellationToken).ConfigureAwait(false);
buffer.Seek(0, SeekOrigin.Begin);

return buffer;
}

private MemoryStream BufferPayload(IDiagnosticLogger? logger)
{
var buffer = new MemoryStream();
Expand All @@ -107,11 +98,7 @@ private static async Task SerializeHeaderAsync(
CancellationToken cancellationToken)
{
var writer = new Utf8JsonWriter(stream);
#if NET461 || NETSTANDARD2_0
using (writer)
#else
await using (writer.ConfigureAwait(false))
#endif
{
writer.WriteDictionaryValue(header, logger);
await writer.FlushAsync(cancellationToken).ConfigureAwait(false);
Expand All @@ -136,18 +123,22 @@ public async Task SerializeAsync(Stream stream, IDiagnosticLogger? logger,
// in item headers. Don't trust any previously calculated value to be correct.
// See https://github.com/getsentry/sentry-dotnet/issues/1956

var payloadBuffer = await BufferPayloadAsync(logger, cancellationToken).ConfigureAwait(false);
// NOTE: Previously we used BufferPayloadAsync, but since we buffer from in-memory objects to a MemoryStream
// there's no advantage to doing so asynchronously. We will get better perf from a synchronous approach.
var payloadBuffer = BufferPayload(logger);
#if NET461 || NETSTANDARD2_0
using (payloadBuffer)
#else
await using (payloadBuffer.ConfigureAwait(false))
#endif
{
// Write to the outbound stream asynchronously. It's likely either an HttpRequestStream or a FileStream.

// Header
var headerWithLength = Header.ToDictionary();
headerWithLength[LengthKey] = payloadBuffer.Length;
await SerializeHeaderAsync(stream, headerWithLength, logger, cancellationToken).ConfigureAwait(false);
await stream.WriteByteAsync((byte)'\n', cancellationToken).ConfigureAwait(false);
await stream.WriteNewlineAsync( cancellationToken).ConfigureAwait(false);

// Payload
await payloadBuffer.CopyToAsync(stream, cancellationToken).ConfigureAwait(false);
Expand All @@ -167,7 +158,7 @@ public void Serialize(Stream stream, IDiagnosticLogger? logger)
var headerWithLength = Header.ToDictionary();
headerWithLength[LengthKey] = payloadBuffer.Length;
SerializeHeader(stream, headerWithLength, logger);
stream.WriteByte((byte)'\n');
stream.WriteNewline();

// Payload
payloadBuffer.CopyTo(stream);
Expand Down Expand Up @@ -277,23 +268,10 @@ internal static EnvelopeItem FromClientReport(ClientReport report)
Stream stream,
CancellationToken cancellationToken = default)
{
var buffer = new List<byte>();

var prevByte = default(int);
await foreach (var curByte in stream.ReadAllBytesAsync(cancellationToken).ConfigureAwait(false))
{
// Break if found an unescaped newline
if (curByte == '\n' && prevByte != '\\')
mattjohnsonpint marked this conversation as resolved.
Show resolved Hide resolved
{
break;
}

buffer.Add(curByte);
prevByte = curByte;
}
var buffer = await stream.ReadLineAsync(cancellationToken).ConfigureAwait(false);

return
Json.Parse(buffer.ToArray(), JsonExtensions.GetDictionaryOrNull)
Json.Parse(buffer, JsonExtensions.GetDictionaryOrNull)
?? throw new InvalidOperationException("Envelope item header is malformed.");
}

Expand Down Expand Up @@ -386,14 +364,7 @@ public static async Task<EnvelopeItem> DeserializeAsync(
var payload = await DeserializePayloadAsync(stream, header, cancellationToken).ConfigureAwait(false);

// Swallow trailing newlines (some envelopes may have them after payloads)
await foreach (var curByte in stream.ReadAllBytesAsync(cancellationToken).ConfigureAwait(false))
{
if (curByte != '\n')
{
stream.Position--;
break;
}
}
await stream.SkipNewlinesAsync(cancellationToken).ConfigureAwait(false);

// Always remove the length header on deserialization so it will get re-calculated if later serialized.
// We cannot trust the length to be identical when round-tripped.
Expand Down