Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tar: support GNU numeric format. #101172

Merged
merged 8 commits into from
Jun 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/libraries/System.Formats.Tar/src/Resources/Strings.resx
Original file line number Diff line number Diff line change
Expand Up @@ -193,8 +193,8 @@
<data name="TarEntryFieldExceedsMaxLength" xml:space="preserve">
<value>The field '{0}' exceeds the maximum allowed length for this format.</value>
</data>
<data name="TarSizeFieldTooLargeForEntryFormat" xml:space="preserve">
<value>The value of the size field for the current entry of format '{0}' is greater than the format allows.</value>
<data name="TarFieldTooLargeForEntryFormat" xml:space="preserve">
<value>The value of the field for the current entry of format '{0}' is greater than the format allows.</value>
</data>
<data name="TarExtAttrDisallowedKeyChar" xml:space="preserve">
<value>The extended attribute key '{0}' contains a disallowed '{1}' character.</value>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,6 @@ public DateTimeOffset AccessTime
get => _header._aTime;
set
{
ArgumentOutOfRangeException.ThrowIfLessThan(value, DateTimeOffset.UnixEpoch);
_header._aTime = value;
}
}
Expand All @@ -112,7 +111,6 @@ public DateTimeOffset ChangeTime
get => _header._cTime;
set
{
ArgumentOutOfRangeException.ThrowIfLessThan(value, DateTimeOffset.UnixEpoch);
tmds marked this conversation as resolved.
Show resolved Hide resolved
_header._cTime = value;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ internal PosixTarEntry(TarEntry other, TarEntryFormat format)
/// </summary>
/// <remarks>Character and block devices are Unix-specific entry types.</remarks>
/// <exception cref="InvalidOperationException">The entry does not represent a block device or a character device.</exception>
/// <exception cref="ArgumentOutOfRangeException">The value is negative, or larger than 2097151.</exception>
/// <exception cref="ArgumentOutOfRangeException">The value is negative, or larger than 2097151 when using <see cref="TarEntryFormat.V7"/> or <see cref="TarEntryFormat.Ustar"/>.</exception>
public int DeviceMajor
{
get => _header._devMajor;
Expand All @@ -62,7 +62,10 @@ public int DeviceMajor
}
tmds marked this conversation as resolved.
Show resolved Hide resolved

ArgumentOutOfRangeException.ThrowIfNegative(value);
ArgumentOutOfRangeException.ThrowIfGreaterThan(value, 0x1FFFFF); // 7777777 in octal
if (FormatIsOctalOnly)
tmds marked this conversation as resolved.
Show resolved Hide resolved
{
ArgumentOutOfRangeException.ThrowIfGreaterThan(value, 0x1FFFFF); // 7777777 in octal
}

_header._devMajor = value;
}
Expand All @@ -73,7 +76,7 @@ public int DeviceMajor
/// </summary>
/// <remarks>Character and block devices are Unix-specific entry types.</remarks>
/// <exception cref="InvalidOperationException">The entry does not represent a block device or a character device.</exception>
/// <exception cref="ArgumentOutOfRangeException">The value is negative, or larger than 2097151.</exception>
/// <exception cref="ArgumentOutOfRangeException">The value is negative, or larger than 2097151 when using <see cref="TarEntryFormat.V7"/> or <see cref="TarEntryFormat.Ustar"/>.</exception>
public int DeviceMinor
{
get => _header._devMinor;
Expand All @@ -85,7 +88,10 @@ public int DeviceMinor
}

ArgumentOutOfRangeException.ThrowIfNegative(value);
ArgumentOutOfRangeException.ThrowIfGreaterThan(value, 0x1FFFFF); // 7777777 in octal
if (FormatIsOctalOnly)
{
ArgumentOutOfRangeException.ThrowIfGreaterThan(value, 0x1FFFFF); // 7777777 in octal
}

_header._devMinor = value;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ public abstract partial class TarEntry
// Used to access the data section of this entry in an unseekable file
private TarReader? _readerOfOrigin;

// These formats have a limited numeric range due to the octal number representation.
protected bool FormatIsOctalOnly => _header._format is TarEntryFormat.V7 or TarEntryFormat.Ustar;

// Constructor called when reading a TarEntry from a TarReader.
internal TarEntry(TarHeader header, TarReader readerOfOrigin, TarEntryFormat format)
{
Expand Down Expand Up @@ -92,13 +95,16 @@ public int Gid
/// A timestamps that represents the last time the contents of the file represented by this entry were modified.
/// </summary>
/// <remarks>In Unix platforms, this timestamp is commonly known as <c>mtime</c>.</remarks>
/// <exception cref="ArgumentOutOfRangeException">The specified value is larger than <see cref="DateTimeOffset.UnixEpoch"/>.</exception>
/// <exception cref="ArgumentOutOfRangeException">The specified value is larger than <see cref="DateTimeOffset.UnixEpoch"/> when using <see cref="TarEntryFormat.V7"/> or <see cref="TarEntryFormat.Ustar"/>.</exception>
public DateTimeOffset ModificationTime
{
get => _header._mTime;
tmds marked this conversation as resolved.
Show resolved Hide resolved
set
{
ArgumentOutOfRangeException.ThrowIfLessThan(value, DateTimeOffset.UnixEpoch);
tmds marked this conversation as resolved.
Show resolved Hide resolved
if (FormatIsOctalOnly)
{
ArgumentOutOfRangeException.ThrowIfLessThan(value, DateTimeOffset.UnixEpoch);
}
_header._mTime = value;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -374,8 +374,7 @@ private async Task ProcessDataBlockAsync(Stream archiveStream, bool copyData, Ca
return null;
}

long size = (long)TarHelpers.ParseOctal<ulong>(buffer.Slice(FieldLocations.Size, FieldLengths.Size));
Debug.Assert(size <= TarHelpers.MaxSizeLength, "size exceeded the max value possible with 11 octal digits. Actual size " + size);
long size = TarHelpers.ParseNumeric<long>(buffer.Slice(FieldLocations.Size, FieldLengths.Size));
if (size < 0)
{
throw new InvalidDataException(SR.Format(SR.TarSizeFieldNegative));
Expand All @@ -384,14 +383,14 @@ private async Task ProcessDataBlockAsync(Stream archiveStream, bool copyData, Ca
// Continue with the rest of the fields that require no special checks
TarHeader header = new(initialFormat,
name: TarHelpers.GetTrimmedUtf8String(buffer.Slice(FieldLocations.Name, FieldLengths.Name)),
mode: (int)TarHelpers.ParseOctal<uint>(buffer.Slice(FieldLocations.Mode, FieldLengths.Mode)),
mTime: TarHelpers.GetDateTimeOffsetFromSecondsSinceEpoch((long)TarHelpers.ParseOctal<ulong>(buffer.Slice(FieldLocations.MTime, FieldLengths.MTime))),
mode: TarHelpers.ParseNumeric<int>(buffer.Slice(FieldLocations.Mode, FieldLengths.Mode)),
mTime: TarHelpers.GetDateTimeOffsetFromSecondsSinceEpoch(TarHelpers.ParseNumeric<long>(buffer.Slice(FieldLocations.MTime, FieldLengths.MTime))),
typeFlag: (TarEntryType)buffer[FieldLocations.TypeFlag])
{
_checksum = checksum,
_size = size,
_uid = (int)TarHelpers.ParseOctal<uint>(buffer.Slice(FieldLocations.Uid, FieldLengths.Uid)),
_gid = (int)TarHelpers.ParseOctal<uint>(buffer.Slice(FieldLocations.Gid, FieldLengths.Gid)),
_uid = TarHelpers.ParseNumeric<int>(buffer.Slice(FieldLocations.Uid, FieldLengths.Uid)),
_gid = TarHelpers.ParseNumeric<int>(buffer.Slice(FieldLocations.Gid, FieldLengths.Gid)),
_linkName = TarHelpers.GetTrimmedUtf8String(buffer.Slice(FieldLocations.LinkName, FieldLengths.LinkName))
};

Expand Down Expand Up @@ -524,10 +523,10 @@ private void ReadPosixAndGnuSharedAttributes(Span<byte> buffer)
if (_typeFlag is TarEntryType.CharacterDevice or TarEntryType.BlockDevice)
{
// Major number for a character device or block device entry.
_devMajor = (int)TarHelpers.ParseOctal<uint>(buffer.Slice(FieldLocations.DevMajor, FieldLengths.DevMajor));
_devMajor = TarHelpers.ParseNumeric<int>(buffer.Slice(FieldLocations.DevMajor, FieldLengths.DevMajor));

// Minor number for a character device or block device entry.
_devMinor = (int)TarHelpers.ParseOctal<uint>(buffer.Slice(FieldLocations.DevMinor, FieldLengths.DevMinor));
_devMinor = TarHelpers.ParseNumeric<int>(buffer.Slice(FieldLocations.DevMinor, FieldLengths.DevMinor));
}
}

Expand All @@ -536,10 +535,10 @@ private void ReadPosixAndGnuSharedAttributes(Span<byte> buffer)
private void ReadGnuAttributes(Span<byte> buffer)
{
// Convert byte arrays
long aTime = (long)TarHelpers.ParseOctal<ulong>(buffer.Slice(FieldLocations.ATime, FieldLengths.ATime));
long aTime = TarHelpers.ParseNumeric<long>(buffer.Slice(FieldLocations.ATime, FieldLengths.ATime));
_aTime = TarHelpers.GetDateTimeOffsetFromSecondsSinceEpoch(aTime);

long cTime = (long)TarHelpers.ParseOctal<ulong>(buffer.Slice(FieldLocations.CTime, FieldLengths.CTime));
long cTime = TarHelpers.ParseNumeric<long>(buffer.Slice(FieldLocations.CTime, FieldLengths.CTime));
_cTime = TarHelpers.GetDateTimeOffsetFromSecondsSinceEpoch(cTime);

// TODO: Read the bytes of the currently unsupported GNU fields, in case user wants to write this entry into another GNU archive, they need to be preserved. https://github.com/dotnet/runtime/issues/68230
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// The .NET Foundation licenses this file to you under the MIT license.

using System.Buffers;
using System.Buffers.Binary;
using System.Buffers.Text;
using System.Collections.Generic;
using System.Diagnostics;
Expand All @@ -15,6 +16,9 @@ namespace System.Formats.Tar
// Writes header attributes of a tar archive entry.
internal sealed partial class TarHeader
{
private const long Octal12ByteFieldMaxValue = (1L << (3 * 11)) - 1; // Max value of 11 octal digits.
private const int Octal8ByteFieldMaxValue = (1 << (3 * 7)) - 1; // Max value of 7 octal digits.
tmds marked this conversation as resolved.
Show resolved Hide resolved

private static ReadOnlySpan<byte> UstarMagicBytes => "ustar\0"u8;
private static ReadOnlySpan<byte> UstarVersionBytes => "00"u8;

Expand Down Expand Up @@ -606,35 +610,22 @@ private int WriteCommonFields(Span<byte> buffer, TarEntryType actualEntryType)

if (_mode > 0)
{
checksum += FormatOctal(_mode, buffer.Slice(FieldLocations.Mode, FieldLengths.Mode));
checksum += FormatNumeric(_mode, buffer.Slice(FieldLocations.Mode, FieldLengths.Mode));
}

if (_uid > 0)
{
checksum += FormatOctal(_uid, buffer.Slice(FieldLocations.Uid, FieldLengths.Uid));
checksum += FormatNumeric(_uid, buffer.Slice(FieldLocations.Uid, FieldLengths.Uid));
}

if (_gid > 0)
{
checksum += FormatOctal(_gid, buffer.Slice(FieldLocations.Gid, FieldLengths.Gid));
checksum += FormatNumeric(_gid, buffer.Slice(FieldLocations.Gid, FieldLengths.Gid));
}

if (_size > 0)
{
if (_size <= TarHelpers.MaxSizeLength)
{
checksum += FormatOctal(_size, buffer.Slice(FieldLocations.Size, FieldLengths.Size));
}
else if (_format is not TarEntryFormat.Pax)
{
throw new ArgumentException(SR.Format(SR.TarSizeFieldTooLargeForEntryFormat, _format));
}
else
{
// No writing, just verifications
Debug.Assert(_typeFlag is not TarEntryType.ExtendedAttributes and not TarEntryType.GlobalExtendedAttributes);
Debug.Assert(Convert.ToInt64(ExtendedAttributes[PaxEaSize]) > TarHelpers.MaxSizeLength);
}
checksum += FormatNumeric(_size, buffer.Slice(FieldLocations.Size, FieldLengths.Size));
}

checksum += WriteAsTimestamp(_mTime, buffer.Slice(FieldLocations.MTime, FieldLengths.MTime));
Expand Down Expand Up @@ -739,12 +730,12 @@ private int WritePosixAndGnuSharedFields(Span<byte> buffer)

if (_devMajor > 0)
{
checksum += FormatOctal(_devMajor, buffer.Slice(FieldLocations.DevMajor, FieldLengths.DevMajor));
checksum += FormatNumeric(_devMajor, buffer.Slice(FieldLocations.DevMajor, FieldLengths.DevMajor));
}

if (_devMinor > 0)
{
checksum += FormatOctal(_devMinor, buffer.Slice(FieldLocations.DevMinor, FieldLengths.DevMinor));
checksum += FormatNumeric(_devMinor, buffer.Slice(FieldLocations.DevMinor, FieldLengths.DevMinor));
}

return checksum;
Expand Down Expand Up @@ -916,7 +907,7 @@ private void CollectExtendedAttributesFromStandardFieldsIfNeeded()
ExtendedAttributes[PaxEaLinkName] = _linkName;
}

if (_size > TarHelpers.MaxSizeLength)
if (_size > Octal12ByteFieldMaxValue)
{
ExtendedAttributes[PaxEaSize] = _size.ToString();
}
Expand All @@ -925,6 +916,42 @@ private void CollectExtendedAttributesFromStandardFieldsIfNeeded()
ExtendedAttributes.Remove(PaxEaSize);
}

if (_uid > Octal8ByteFieldMaxValue)
{
ExtendedAttributes[PaxEaUid] = _uid.ToString();
}
else
{
ExtendedAttributes.Remove(PaxEaUid);
}

if (_gid > Octal8ByteFieldMaxValue)
{
ExtendedAttributes[PaxEaGid] = _gid.ToString();
}
else
{
ExtendedAttributes.Remove(PaxEaGid);
}

if (_devMajor > Octal8ByteFieldMaxValue)
{
ExtendedAttributes[PaxEaDevMajor] = _devMajor.ToString();
}
else
{
ExtendedAttributes.Remove(PaxEaDevMajor);
}

if (_devMinor > Octal8ByteFieldMaxValue)
{
ExtendedAttributes[PaxEaDevMinor] = _devMinor.ToString();
}
else
{
ExtendedAttributes.Remove(PaxEaDevMinor);
}

// Sets the specified string to the dictionary if it's longer than the specified max byte length; otherwise, remove it.
static void TryAddStringField(Dictionary<string, string> extendedAttributes, string key, string? value, int maxLength)
{
Expand Down Expand Up @@ -1022,6 +1049,56 @@ private static int Checksum(ReadOnlySpan<byte> bytes)
return checksum;
}

private int FormatNumeric(int value, Span<byte> destination)
{
Debug.Assert(destination.Length == 8, "8 byte field expected.");

bool isOctalRange = value >= 0 && value <= Octal8ByteFieldMaxValue;

if (isOctalRange || _format == TarEntryFormat.Pax)
{
return FormatOctal(value, destination);
}
else if (_format == TarEntryFormat.Gnu)
{
// GNU format: store negative numbers in big endian format with leading '0xff' byte.
// store positive numbers in big endian format with leading '0x80' byte.
long destinationValue = value;
destinationValue |= 1L << 63;
BinaryPrimitives.WriteInt64BigEndian(destination, destinationValue);
return Checksum(destination);
}
else
{
throw new ArgumentException(SR.Format(SR.TarFieldTooLargeForEntryFormat, _format));
}
}

private int FormatNumeric(long value, Span<byte> destination)
{
Debug.Assert(destination.Length == 12, "12 byte field expected.");
const int Offset = 4; // 4 bytes before the long.

bool isOctalRange = value >= 0 && value <= Octal12ByteFieldMaxValue;

if (isOctalRange || _format == TarEntryFormat.Pax)
{
return FormatOctal(value, destination);
}
else if (_format == TarEntryFormat.Gnu)
{
// GNU format: store negative numbers in big endian format with leading '0xff' byte.
// store positive numbers in big endian format with leading '0x80' byte.
BinaryPrimitives.WriteUInt32BigEndian(destination, value < 0 ? 0xffffffff : 0x80000000);
BinaryPrimitives.WriteInt64BigEndian(destination.Slice(Offset), value);
return Checksum(destination);
}
else
{
throw new ArgumentException(SR.Format(SR.TarFieldTooLargeForEntryFormat, _format));
}
}

// Writes the specified decimal number as a right-aligned octal number and returns its checksum.
private static int FormatOctal(long value, Span<byte> destination)
{
Expand All @@ -1040,11 +1117,11 @@ private static int FormatOctal(long value, Span<byte> destination)
return WriteRightAlignedBytesAndGetChecksum(digits.Slice(i), destination);
}

// Writes the specified DateTimeOffset's Unix time seconds as a right-aligned octal number, and returns its checksum.
private static int WriteAsTimestamp(DateTimeOffset timestamp, Span<byte> destination)
// Writes the specified DateTimeOffset's Unix time seconds, and returns its checksum.
private int WriteAsTimestamp(DateTimeOffset timestamp, Span<byte> destination)
{
long unixTimeSeconds = timestamp.ToUnixTimeSeconds();
return FormatOctal(unixTimeSeconds, destination);
return FormatNumeric(unixTimeSeconds, destination);
}

// Writes the specified text as an UTF8 string aligned to the left, and returns its checksum.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ internal static partial class TarHelpers
{
internal const short RecordSize = 512;
internal const int MaxBufferLength = 4096;
internal const long MaxSizeLength = (1L << 33) - 1; // Max value of 11 octal digits = 2^33 - 1 or 8 Gb.

internal const UnixFileMode ValidUnixFileModes =
UnixFileMode.UserRead |
Expand Down Expand Up @@ -215,6 +214,29 @@ internal static TarEntryType GetCorrectTypeFlagForFormat(TarEntryFormat format,
return entryType;
}

/// <summary>Parses a numeric field.</summary>
internal static T ParseNumeric<T>(ReadOnlySpan<byte> buffer) where T : struct, INumber<T>, IBinaryInteger<T>
{
// The tar standard specifies that numeric fields are stored using an octal representation.
// This limits the range of values that can be stored in the fields.
// To increase the supported range, a GNU extension defines that when the leading byte is
// '0xff'/'0x80' the remaining bytes are a negative/positive big formatted endian value.
tmds marked this conversation as resolved.
Show resolved Hide resolved
// Like the 'tar' tool we are permissive when encountering this representation in non GNU formats.
byte leadingByte = buffer[0];
if (leadingByte == 0xff)
{
return T.ReadBigEndian(buffer, isUnsigned: false);
}
else if (leadingByte == 0x80)
{
return T.ReadBigEndian(buffer.Slice(1), isUnsigned: true);
}
else
{
return ParseOctal<T>(buffer);
}
}

/// <summary>Parses a byte span that represents an ASCII string containing a number in octal base.</summary>
internal static T ParseOctal<T>(ReadOnlySpan<byte> buffer) where T : struct, INumber<T>
{
Expand Down
Loading
Loading