Skip to content

Commit

Permalink
Expose the TarEntry start of the data stream (#105007)
Browse files Browse the repository at this point in the history
Add a public property to TarEntry that indicates the position, relative to the beginning of the archive stream, where the current entry's data begins.

---------

Co-authored-by: Stephen Toub <stoub@microsoft.com>
  • Loading branch information
carlossanlop and stephentoub committed Jul 22, 2024
1 parent cc96c13 commit 58e8c0c
Show file tree
Hide file tree
Showing 10 changed files with 1,373 additions and 3 deletions.
1 change: 1 addition & 0 deletions src/libraries/System.Formats.Tar/ref/System.Formats.Tar.cs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ internal TarEntry() { }
public System.IO.UnixFileMode Mode { get { throw null; } set { } }
public System.DateTimeOffset ModificationTime { get { throw null; } set { } }
public string Name { get { throw null; } set { } }
public long DataOffset { get { throw null; } }
public int Uid { get { throw null; } set { } }
public void ExtractToFile(string destinationFileName, bool overwrite) { }
public System.Threading.Tasks.Task ExtractToFileAsync(string destinationFileName, bool overwrite, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { throw null; }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,15 @@ public Stream? DataStream
}
}

/// <summary>
/// Gets the starting position of the data stream respective to the archive stream.
/// </summary>
/// <remarks>
/// If the entry does not come from an archive stream or if the archive stream is not seekable, returns -1.
/// The position value returned by this property is relative to the absolute start of the archive stream, independent of where the tar archive begins.
/// </remarks>
public long DataOffset => _header._dataOffset;

/// <summary>
/// A string that represents the current entry.
/// </summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ internal sealed partial class TarHeader

archiveStream.ReadExactly(buffer);

TarHeader? header = TryReadAttributes(initialFormat, buffer);
TarHeader? header = TryReadAttributes(initialFormat, buffer, archiveStream);
if (header != null && processDataBlock)
{
header.ProcessDataBlock(archiveStream, copyData);
Expand All @@ -47,7 +47,7 @@ internal sealed partial class TarHeader

await archiveStream.ReadExactlyAsync(buffer, cancellationToken).ConfigureAwait(false);

TarHeader? header = TryReadAttributes(initialFormat, buffer.Span);
TarHeader? header = TryReadAttributes(initialFormat, buffer.Span, archiveStream);
if (header != null && processDataBlock)
{
await header.ProcessDataBlockAsync(archiveStream, copyData, cancellationToken).ConfigureAwait(false);
Expand All @@ -58,7 +58,7 @@ internal sealed partial class TarHeader
return header;
}

private static TarHeader? TryReadAttributes(TarEntryFormat initialFormat, ReadOnlySpan<byte> buffer)
private static TarHeader? TryReadAttributes(TarEntryFormat initialFormat, ReadOnlySpan<byte> buffer, Stream archiveStream)
{
// Confirms if v7 or pax, or tentatively selects ustar
TarHeader? header = TryReadCommonAttributes(buffer, initialFormat);
Expand Down Expand Up @@ -86,6 +86,8 @@ internal sealed partial class TarHeader
}
// In PAX, there is nothing to read in this section (empty space)
}
// Finished reading the header metadata, next byte belongs to the data section, save the position
SetDataOffset(header, archiveStream);
}
return header;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,9 @@ private void WriteWithUnseekableDataStream(TarEntryFormat format, Stream destina
// We know the exact location where the data starts depending on the format
long dataStartPosition = headerStartPosition + dataLocation;

// Before writing, update the offset field now that the entry belongs to an archive
_dataOffset = dataStartPosition;

// Move to the data start location and write the data
destinationStream.Seek(dataLocation, SeekOrigin.Current);
_dataStream.CopyTo(destinationStream); // The data gets copied from the current position
Expand Down Expand Up @@ -132,6 +135,9 @@ private async Task WriteWithUnseekableDataStreamAsync(TarEntryFormat format, Str
// We know the exact location where the data starts depending on the format
long dataStartPosition = headerStartPosition + dataLocation;

// Before writing, update the offset field now that the entry belongs to an archive
_dataOffset = dataStartPosition;

// Move to the data start location and write the data
destinationStream.Seek(dataLocation, SeekOrigin.Current);
await _dataStream.CopyToAsync(destinationStream, cancellationToken).ConfigureAwait(false); // The data gets copied from the current position
Expand Down Expand Up @@ -758,6 +764,9 @@ private int WriteGnuFields(Span<byte> buffer)
// Writes the current header's data stream into the archive stream.
private void WriteData(Stream archiveStream, Stream dataStream)
{
// Before writing, update the offset field now that the entry belongs to an archive
SetDataOffset(this, archiveStream);

dataStream.CopyTo(archiveStream); // The data gets copied from the current position
WriteEmptyPadding(archiveStream);
}
Expand Down Expand Up @@ -798,6 +807,9 @@ private async Task WriteDataAsync(Stream archiveStream, Stream dataStream, Cance
{
cancellationToken.ThrowIfCancellationRequested();

// Before writing, update the offset field now that the entry belongs to an archive
SetDataOffset(this, archiveStream);

await dataStream.CopyToAsync(archiveStream, cancellationToken).ConfigureAwait(false); // The data gets copied from the current position

int paddingAfterData = TarHelpers.CalculatePadding(_size);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ internal sealed partial class TarHeader
private const string PaxEaDevMinor = "devminor";

internal Stream? _dataStream;
internal long _dataOffset;

// Position in the stream where the data ends in this header.
internal long _endOfHeaderAndDataAndBlockAlignment;
Expand Down Expand Up @@ -95,6 +96,7 @@ internal TarHeader(TarEntryFormat format, string name = "", int mode = 0, DateTi
_typeFlag = typeFlag;
_magic = GetMagicForFormat(format);
_version = GetVersionForFormat(format);
_dataOffset = -1;
}

// Constructor called when creating an entry using the common fields from another entry.
Expand Down Expand Up @@ -149,5 +151,10 @@ internal void InitializeExtendedAttributesWithExisting(IEnumerable<KeyValuePair<
TarEntryFormat.Gnu => GnuVersion,
_ => string.Empty,
};

// Stores the archive stream's position where we know the current entry's data section begins,
// if the archive stream is seekable. Otherwise, -1.
private static void SetDataOffset(TarHeader header, Stream archiveStream) =>
header._dataOffset = archiveStream.CanSeek ? archiveStream.Position : -1;
}
}
Loading

0 comments on commit 58e8c0c

Please sign in to comment.