Skip to content

Commit

Permalink
Tar: support GNU numeric format.
Browse files Browse the repository at this point in the history
The tar specification stores numeric fields using an octal representation.
This limits the range of values that can be stored.

To increase the supported range, a GNU extension defines that when
the leading byte is 0xff/0x88 the remaining bytes are a
negative/positive big endian formatted value.

When writing under the PAX format, we continue to only use the
only octal representation in the header fields. The values are
overridden using extended attributes.
  • Loading branch information
tmds committed Apr 17, 2024
1 parent 91ee24a commit 8fc115a
Show file tree
Hide file tree
Showing 17 changed files with 412 additions and 342 deletions.
3 changes: 0 additions & 3 deletions src/libraries/System.Formats.Tar/src/Resources/Strings.resx
Original file line number Diff line number Diff line change
Expand Up @@ -193,9 +193,6 @@
<data name="TarEntryFieldExceedsMaxLength" xml:space="preserve">
<value>The field '{0}' exceeds the maximum allowed length for this format.</value>
</data>
<data name="TarSizeFieldTooLargeForEntryFormat" xml:space="preserve">
<value>The value of the size field for the current entry of format '{0}' is greater than the format allows.</value>
</data>
<data name="TarExtAttrDisallowedKeyChar" xml:space="preserve">
<value>The extended attribute key '{0}' contains a disallowed '{1}' character.</value>
</data>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,6 @@ public DateTimeOffset AccessTime
get => _header._aTime;
set
{
ArgumentOutOfRangeException.ThrowIfLessThan(value, DateTimeOffset.UnixEpoch);
_header._aTime = value;
}
}
Expand All @@ -112,7 +111,6 @@ public DateTimeOffset ChangeTime
get => _header._cTime;
set
{
ArgumentOutOfRangeException.ThrowIfLessThan(value, DateTimeOffset.UnixEpoch);
_header._cTime = value;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@ public int DeviceMajor
}

ArgumentOutOfRangeException.ThrowIfNegative(value);
ArgumentOutOfRangeException.ThrowIfGreaterThan(value, 0x1FFFFF); // 7777777 in octal

_header._devMajor = value;
}
Expand All @@ -85,7 +84,6 @@ public int DeviceMinor
}

ArgumentOutOfRangeException.ThrowIfNegative(value);
ArgumentOutOfRangeException.ThrowIfGreaterThan(value, 0x1FFFFF); // 7777777 in octal

_header._devMinor = value;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,6 @@ public DateTimeOffset ModificationTime
get => _header._mTime;
set
{
ArgumentOutOfRangeException.ThrowIfLessThan(value, DateTimeOffset.UnixEpoch);
_header._mTime = value;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -374,8 +374,7 @@ private async Task ProcessDataBlockAsync(Stream archiveStream, bool copyData, Ca
return null;
}

long size = (long)TarHelpers.ParseOctal<ulong>(buffer.Slice(FieldLocations.Size, FieldLengths.Size));
Debug.Assert(size <= TarHelpers.MaxSizeLength, "size exceeded the max value possible with 11 octal digits. Actual size " + size);
long size = TarHelpers.ParseNumeric<long>(buffer.Slice(FieldLocations.Size, FieldLengths.Size));
if (size < 0)
{
throw new InvalidDataException(SR.Format(SR.TarSizeFieldNegative));
Expand All @@ -384,14 +383,14 @@ private async Task ProcessDataBlockAsync(Stream archiveStream, bool copyData, Ca
// Continue with the rest of the fields that require no special checks
TarHeader header = new(initialFormat,
name: TarHelpers.GetTrimmedUtf8String(buffer.Slice(FieldLocations.Name, FieldLengths.Name)),
mode: (int)TarHelpers.ParseOctal<uint>(buffer.Slice(FieldLocations.Mode, FieldLengths.Mode)),
mTime: TarHelpers.GetDateTimeOffsetFromSecondsSinceEpoch((long)TarHelpers.ParseOctal<ulong>(buffer.Slice(FieldLocations.MTime, FieldLengths.MTime))),
mode: TarHelpers.ParseNumeric<int>(buffer.Slice(FieldLocations.Mode, FieldLengths.Mode)),
mTime: TarHelpers.GetDateTimeOffsetFromSecondsSinceEpoch(TarHelpers.ParseNumeric<long>(buffer.Slice(FieldLocations.MTime, FieldLengths.MTime))),
typeFlag: (TarEntryType)buffer[FieldLocations.TypeFlag])
{
_checksum = checksum,
_size = size,
_uid = (int)TarHelpers.ParseOctal<uint>(buffer.Slice(FieldLocations.Uid, FieldLengths.Uid)),
_gid = (int)TarHelpers.ParseOctal<uint>(buffer.Slice(FieldLocations.Gid, FieldLengths.Gid)),
_uid = TarHelpers.ParseNumeric<int>(buffer.Slice(FieldLocations.Uid, FieldLengths.Uid)),
_gid = TarHelpers.ParseNumeric<int>(buffer.Slice(FieldLocations.Gid, FieldLengths.Gid)),
_linkName = TarHelpers.GetTrimmedUtf8String(buffer.Slice(FieldLocations.LinkName, FieldLengths.LinkName))
};

Expand Down Expand Up @@ -524,10 +523,10 @@ private void ReadPosixAndGnuSharedAttributes(Span<byte> buffer)
if (_typeFlag is TarEntryType.CharacterDevice or TarEntryType.BlockDevice)
{
// Major number for a character device or block device entry.
_devMajor = (int)TarHelpers.ParseOctal<uint>(buffer.Slice(FieldLocations.DevMajor, FieldLengths.DevMajor));
_devMajor = TarHelpers.ParseNumeric<int>(buffer.Slice(FieldLocations.DevMajor, FieldLengths.DevMajor));

// Minor number for a character device or block device entry.
_devMinor = (int)TarHelpers.ParseOctal<uint>(buffer.Slice(FieldLocations.DevMinor, FieldLengths.DevMinor));
_devMinor = TarHelpers.ParseNumeric<int>(buffer.Slice(FieldLocations.DevMinor, FieldLengths.DevMinor));
}
}

Expand All @@ -536,10 +535,10 @@ private void ReadPosixAndGnuSharedAttributes(Span<byte> buffer)
private void ReadGnuAttributes(Span<byte> buffer)
{
// Convert byte arrays
long aTime = (long)TarHelpers.ParseOctal<ulong>(buffer.Slice(FieldLocations.ATime, FieldLengths.ATime));
long aTime = TarHelpers.ParseNumeric<long>(buffer.Slice(FieldLocations.ATime, FieldLengths.ATime));
_aTime = TarHelpers.GetDateTimeOffsetFromSecondsSinceEpoch(aTime);

long cTime = (long)TarHelpers.ParseOctal<ulong>(buffer.Slice(FieldLocations.CTime, FieldLengths.CTime));
long cTime = TarHelpers.ParseNumeric<long>(buffer.Slice(FieldLocations.CTime, FieldLengths.CTime));
_cTime = TarHelpers.GetDateTimeOffsetFromSecondsSinceEpoch(cTime);

// TODO: Read the bytes of the currently unsupported GNU fields, in case user wants to write this entry into another GNU archive, they need to be preserved. https://github.com/dotnet/runtime/issues/68230
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// The .NET Foundation licenses this file to you under the MIT license.

using System.Buffers;
using System.Buffers.Binary;
using System.Buffers.Text;
using System.Collections.Generic;
using System.Diagnostics;
Expand All @@ -15,6 +16,9 @@ namespace System.Formats.Tar
// Writes header attributes of a tar archive entry.
internal sealed partial class TarHeader
{
private const long Octal12ByteFieldMaxValue = (1L << (3 * 11)) - 1; // Max value of 11 octal digits.
private const int Octal8ByteFieldMaxValue = (1 << (3 * 7)) - 1; // Max value of 7 octal digits.

private static ReadOnlySpan<byte> UstarMagicBytes => "ustar\0"u8;
private static ReadOnlySpan<byte> UstarVersionBytes => "00"u8;

Expand Down Expand Up @@ -606,35 +610,22 @@ private int WriteCommonFields(Span<byte> buffer, TarEntryType actualEntryType)

if (_mode > 0)
{
checksum += FormatOctal(_mode, buffer.Slice(FieldLocations.Mode, FieldLengths.Mode));
checksum += FormatNumeric(_mode, buffer.Slice(FieldLocations.Mode, FieldLengths.Mode));
}

if (_uid > 0)
{
checksum += FormatOctal(_uid, buffer.Slice(FieldLocations.Uid, FieldLengths.Uid));
checksum += FormatNumeric(_uid, buffer.Slice(FieldLocations.Uid, FieldLengths.Uid));
}

if (_gid > 0)
{
checksum += FormatOctal(_gid, buffer.Slice(FieldLocations.Gid, FieldLengths.Gid));
checksum += FormatNumeric(_gid, buffer.Slice(FieldLocations.Gid, FieldLengths.Gid));
}

if (_size > 0)
{
if (_size <= TarHelpers.MaxSizeLength)
{
checksum += FormatOctal(_size, buffer.Slice(FieldLocations.Size, FieldLengths.Size));
}
else if (_format is not TarEntryFormat.Pax)
{
throw new ArgumentException(SR.Format(SR.TarSizeFieldTooLargeForEntryFormat, _format));
}
else
{
// No writing, just verifications
Debug.Assert(_typeFlag is not TarEntryType.ExtendedAttributes and not TarEntryType.GlobalExtendedAttributes);
Debug.Assert(Convert.ToInt64(ExtendedAttributes[PaxEaSize]) > TarHelpers.MaxSizeLength);
}
checksum += FormatNumeric(_size, buffer.Slice(FieldLocations.Size, FieldLengths.Size));
}

checksum += WriteAsTimestamp(_mTime, buffer.Slice(FieldLocations.MTime, FieldLengths.MTime));
Expand Down Expand Up @@ -739,12 +730,12 @@ private int WritePosixAndGnuSharedFields(Span<byte> buffer)

if (_devMajor > 0)
{
checksum += FormatOctal(_devMajor, buffer.Slice(FieldLocations.DevMajor, FieldLengths.DevMajor));
checksum += FormatNumeric(_devMajor, buffer.Slice(FieldLocations.DevMajor, FieldLengths.DevMajor));
}

if (_devMinor > 0)
{
checksum += FormatOctal(_devMinor, buffer.Slice(FieldLocations.DevMinor, FieldLengths.DevMinor));
checksum += FormatNumeric(_devMinor, buffer.Slice(FieldLocations.DevMinor, FieldLengths.DevMinor));
}

return checksum;
Expand Down Expand Up @@ -916,7 +907,7 @@ private void CollectExtendedAttributesFromStandardFieldsIfNeeded()
ExtendedAttributes[PaxEaLinkName] = _linkName;
}

if (_size > TarHelpers.MaxSizeLength)
if (_size > Octal12ByteFieldMaxValue)
{
ExtendedAttributes[PaxEaSize] = _size.ToString();
}
Expand All @@ -925,6 +916,42 @@ private void CollectExtendedAttributesFromStandardFieldsIfNeeded()
ExtendedAttributes.Remove(PaxEaSize);
}

if (_uid > Octal8ByteFieldMaxValue)
{
ExtendedAttributes[PaxEaUid] = _uid.ToString();
}
else
{
ExtendedAttributes.Remove(PaxEaUid);
}

if (_gid > Octal8ByteFieldMaxValue)
{
ExtendedAttributes[PaxEaGid] = _gid.ToString();
}
else
{
ExtendedAttributes.Remove(PaxEaGid);
}

if (_devMajor > Octal8ByteFieldMaxValue)
{
ExtendedAttributes[PaxEaDevMajor] = _devMajor.ToString();
}
else
{
ExtendedAttributes.Remove(PaxEaDevMajor);
}

if (_devMinor > Octal8ByteFieldMaxValue)
{
ExtendedAttributes[PaxEaDevMinor] = _devMinor.ToString();
}
else
{
ExtendedAttributes.Remove(PaxEaDevMinor);
}

// Sets the specified string to the dictionary if it's longer than the specified max byte length; otherwise, remove it.
static void TryAddStringField(Dictionary<string, string> extendedAttributes, string key, string? value, int maxLength)
{
Expand Down Expand Up @@ -1022,6 +1049,60 @@ private static int Checksum(ReadOnlySpan<byte> bytes)
return checksum;
}

private int FormatNumeric(int value, Span<byte> destination)
{
Debug.Assert(destination.Length == 8, "8 byte field expected.");

// Prefer the octal format. For non-PAX, use GNU format to widen the range.
bool useOctal = (value >= 0 && value <= Octal8ByteFieldMaxValue) || _format is TarEntryFormat.Pax;

if (useOctal)
{
return FormatOctal(value, destination);
}
else if (value < 0)
{
// GNU format: store negative numbers in big endian format with leading '0xff' byte.
BinaryPrimitives.WriteInt64BigEndian(destination, value);
return Checksum(destination);
}
else
{
// GNU format: store positive numbers in big endian format with leading '0x80' byte.
BinaryPrimitives.WriteUInt64BigEndian(destination, (1UL << 63) | (uint)value);
return Checksum(destination);
}
}

private int FormatNumeric(long value, Span<byte> destination)
{
Debug.Assert(destination.Length == 12, "12 byte field expected.");
const int Offset = 4; // 4 bytes before the long.

// Prefer the octal format. For non-PAX, use GNU format to widen the range.
bool useOctal = (value >= 0 && value <= Octal12ByteFieldMaxValue) || _format is TarEntryFormat.Pax;

if (useOctal)
{
return FormatOctal(value, destination);
}
else if (value < 0)
{
// GNU format: store negative numbers in big endian format with leading '0xff' byte.
destination.Slice(0, Offset).Fill(0xff);
BinaryPrimitives.WriteInt64BigEndian(destination.Slice(Offset), value);
return Checksum(destination);
}
else
{
// GNU format: store positive numbers in big endian format with leading '0x80' byte.
destination.Slice(0, Offset).Fill(0);
destination[0] = 0x80;
BinaryPrimitives.WriteInt64BigEndian(destination.Slice(Offset), value);
return Checksum(destination);
}
}

// Writes the specified decimal number as a right-aligned octal number and returns its checksum.
private static int FormatOctal(long value, Span<byte> destination)
{
Expand All @@ -1040,11 +1121,11 @@ private static int FormatOctal(long value, Span<byte> destination)
return WriteRightAlignedBytesAndGetChecksum(digits.Slice(i), destination);
}

// Writes the specified DateTimeOffset's Unix time seconds as a right-aligned octal number, and returns its checksum.
private static int WriteAsTimestamp(DateTimeOffset timestamp, Span<byte> destination)
// Writes the specified DateTimeOffset's Unix time seconds, and returns its checksum.
private int WriteAsTimestamp(DateTimeOffset timestamp, Span<byte> destination)
{
long unixTimeSeconds = timestamp.ToUnixTimeSeconds();
return FormatOctal(unixTimeSeconds, destination);
return FormatNumeric(unixTimeSeconds, destination);
}

// Writes the specified text as an UTF8 string aligned to the left, and returns its checksum.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ internal static partial class TarHelpers
{
internal const short RecordSize = 512;
internal const int MaxBufferLength = 4096;
internal const long MaxSizeLength = (1L << 33) - 1; // Max value of 11 octal digits = 2^33 - 1 or 8 Gb.

internal const UnixFileMode ValidUnixFileModes =
UnixFileMode.UserRead |
Expand Down Expand Up @@ -215,6 +214,28 @@ internal static TarEntryType GetCorrectTypeFlagForFormat(TarEntryFormat format,
return entryType;
}

/// <summary>Parses a numeric field.</summary>
internal static T ParseNumeric<T>(ReadOnlySpan<byte> buffer) where T : struct, INumber<T>, IBinaryInteger<T>
{
// The tar standard specifies that numeric fields are stored using an octal representation.
// This limits the range of values that can be stored in the fields.
// To increase the supported range, a GNU extension defines that when the leading byte is
// '0xff'/'0x80' the remaining bytes are a negative/positive big formatted endian value.
byte leadingByte = buffer[0];
if (leadingByte == 0xff)
{
return T.ReadBigEndian(buffer, isUnsigned: false);
}
else if (leadingByte == 0x80)
{
return T.ReadBigEndian(buffer.Slice(1), isUnsigned: true);
}
else
{
return ParseOctal<T>(buffer);
}
}

/// <summary>Parses a byte span that represents an ASCII string containing a number in octal base.</summary>
internal static T ParseOctal<T>(ReadOnlySpan<byte> buffer) where T : struct, INumber<T>
{
Expand Down
4 changes: 1 addition & 3 deletions src/libraries/System.Formats.Tar/tests/Manual/ManualTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,8 @@ public static IEnumerable<object[]> WriteEntry_LongFileSize_TheoryData()
foreach (TarEntryFormat entryFormat in new[] { TarEntryFormat.V7, TarEntryFormat.Ustar, TarEntryFormat.Gnu, TarEntryFormat.Pax })
{
yield return new object[] { entryFormat, LegacyMaxFileSize, unseekableStream };
yield return new object[] { entryFormat, LegacyMaxFileSize + 1, unseekableStream };
}

// Pax supports unlimited size files.
yield return new object[] { TarEntryFormat.Pax, LegacyMaxFileSize + 1, unseekableStream };
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,6 @@ public async Task AllowSpacesInOctalFieldsAsync(string folderName, string testCa
[InlineData("invalid-go17")] // Many octal fields are all zero chars
[InlineData("issue11169")] // Checksum with null in the middle
[InlineData("issue10968")] // Garbage chars
[InlineData("writer-big")] // The size field contains an euro char
public async Task Throw_ArchivesWithRandomCharsAsync(string testCaseName)
{
await using MemoryStream archiveStream = GetTarMemoryStream(CompressionMethod.Uncompressed, "golang_tar", testCaseName);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,6 @@ public void AllowSpacesInOctalFields(string folderName, string testCaseName)
[InlineData("invalid-go17")] // Many octal fields are all zero chars
[InlineData("issue11169")] // Checksum with null in the middle
[InlineData("issue10968")] // Garbage chars
[InlineData("writer-big")] // The size field contains an euro char
public void Throw_ArchivesWithRandomChars(string testCaseName)
{
using MemoryStream archiveStream = GetTarMemoryStream(CompressionMethod.Uncompressed, "golang_tar", testCaseName);
Expand Down
2 changes: 0 additions & 2 deletions src/libraries/System.Formats.Tar/tests/TarTestsBase.Gnu.cs
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,10 @@ protected void SetGnuProperties(GnuTarEntry entry)

// ATime: Verify the default value was approximately "now"
Assert.True(entry.AccessTime > approxNow);
Assert.Throws<ArgumentOutOfRangeException>(() => entry.AccessTime = DateTimeOffset.MinValue);
entry.AccessTime = TestAccessTime;

// CTime: Verify the default value was approximately "now"
Assert.True(entry.ChangeTime > approxNow);
Assert.Throws<ArgumentOutOfRangeException>(() => entry.ChangeTime = DateTimeOffset.MinValue);
entry.ChangeTime = TestChangeTime;
}

Expand Down
Loading

0 comments on commit 8fc115a

Please sign in to comment.