Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Missing context regions for single-line #2616

Merged
merged 16 commits into from
Feb 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 13 additions & 13 deletions src/Sarif/FileRegionsCache.cs
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,9 @@ private Region PopulateTextRegionProperties(NewLineIndex lineIndex, Region input
return region;
}

internal const int BIGSNIPPETLENGTH = 512;
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BIGSNIPPETLENGTH

Note that I've lifted out these magic values to make them more explicit in test use, and to allow product changes to flow more naturally to test validation.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we prefer put at the top of the file with other definitions

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, will do this in a follow-on change, thanks for the review!!

internal const int SMALLSNIPPETLENGTH = 128;

public Region ConstructMultilineContextSnippet(Region inputRegion, Uri uri, string fileText = null)
{
if (inputRegion?.IsBinaryRegion != false)
Expand All @@ -156,11 +159,10 @@ public Region ConstructMultilineContextSnippet(Region inputRegion, Uri uri, stri
return null;
}

const int bigSnippetLength = 512;
const int smallSnippetLength = 128;
fileText ??= newLineIndex.Text;

// Generating full inputRegion to prevent issues.
Region originalRegion = this.PopulateTextRegionProperties(inputRegion, uri, populateSnippet: true);
Region originalRegion = this.PopulateTextRegionProperties(inputRegion, uri, populateSnippet: true, fileText);
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fileText

We were not consistent around passing this optional argument around. This likely would break certain in-memory analysis operations that aren't highly exercised/well-tested today.


int maxLineNumber = newLineIndex.MaximumLineNumber;

Expand All @@ -171,10 +173,10 @@ public Region ConstructMultilineContextSnippet(Region inputRegion, Uri uri, stri
};

// Generating multilineRegion with one line before and after.
Region multilineContextSnippet = this.PopulateTextRegionProperties(region, uri, populateSnippet: true);
Region multilineContextSnippet = this.PopulateTextRegionProperties(region, uri, populateSnippet: true, fileText);

if (originalRegion.CharLength <= multilineContextSnippet.CharLength &&
multilineContextSnippet.CharLength <= bigSnippetLength)
multilineContextSnippet.CharLength <= BIGSNIPPETLENGTH)
{
return multilineContextSnippet;
}
Expand All @@ -184,17 +186,15 @@ public Region ConstructMultilineContextSnippet(Region inputRegion, Uri uri, stri
region.EndColumn = 0;
region.StartLine = 0;
region.EndLine = 0;
region.CharOffset = originalRegion.CharOffset < smallSnippetLength
? 0
: originalRegion.CharOffset - smallSnippetLength;
region.CharOffset = Math.Max(0, originalRegion.CharOffset - SMALLSNIPPETLENGTH);
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

CharOffset

Any prepended context region must start at the beginning of the file, any value that precedes this is invalid.


region.CharLength = originalRegion.CharLength + region.CharOffset + smallSnippetLength < newLineIndex.Text.Length
? originalRegion.CharLength + smallSnippetLength + Math.Abs(region.CharOffset - originalRegion.CharOffset)
: newLineIndex.Text.Length - region.CharOffset;
region.CharLength =
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

CharLength

Similarly, any context region that blows past the end of the actual file is invalid.

Math.Min(originalRegion.CharLength + region.CharOffset + SMALLSNIPPETLENGTH,
fileText.Length - region.CharOffset);

// Generating multineRegion with 128 characters to the left and right from the
// Generating multiline region with 128 characters to the left and right from the
// originalRegion if possible.
multilineContextSnippet = this.PopulateTextRegionProperties(region, uri, populateSnippet: true);
multilineContextSnippet = this.PopulateTextRegionProperties(region, uri, populateSnippet: true, fileText);

// We can't generate a contextRegion which is smaller than the original region.
Debug.Assert(originalRegion.CharLength <= multilineContextSnippet.CharLength);
Expand Down
135 changes: 130 additions & 5 deletions src/Test.UnitTests.Sarif/FileRegionsCacheTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
using System;
using System.Collections.Generic;
using System.Collections.ObjectModel;
using System.ComponentModel.DataAnnotations;
using System.Diagnostics;
using System.Runtime.Intrinsics.X86;
using System.Text;
using System.Threading.Tasks;

Expand Down Expand Up @@ -411,11 +413,8 @@ public void FileRegionsCache_PopulatesFromMissingFile()
[Fact]
public void FileRegionsCache_PopulatesUsingProvidedText()
{
var run = new Run();
var fileRegionsCache = new FileRegionsCache();

Uri uri = new Uri(@"c:\temp\DoesNotExist\" + Guid.NewGuid().ToString() + ".cpp");

string fileText = "12345\n56790\n";
int charOffset = 6;
int charLength = 1;
Expand Down Expand Up @@ -447,6 +446,129 @@ public void FileRegionsCache_PopulatesUsingProvidedText()
actual.ValueEquals(expected).Should().BeTrue();
}

[Fact]
public void FileRegionsCache_PopulatesContextRegions()
{
string sentinel = "baz";
char padding = 'a';

// The context region populating API has two special values, 128 characters
// (which are used to generate leading and trailing text in single-line
// text files) and a value of 512', which is intended to be a limit
// on the overall size of the snippet that's returned.

int bsl = FileRegionsCache.BIGSNIPPETLENGTH;
int ssl = FileRegionsCache.SMALLSNIPPETLENGTH;

var context = new StringBuilder();

// Prepend a newline (or not!) in front of every sentinel region.
foreach (string pr in new string[] { null, Environment.NewLine })
{
// Post-fix a newline (or not!) in front of every sentinel region.
foreach (string po in new string[] { null, Environment.NewLine })
{
string[] tests =
{
$"{new string(padding, 0)}{pr}{sentinel}{po}{new string(padding, 0)}",
$"{new string(padding, 0)}{pr}{sentinel}{po}{new string(padding, 3)}",
$"{new string(padding, 3)}{pr}{sentinel}{po}{new string(padding, 0)}",
$"{new string(padding, 3)}{pr}{sentinel}{po}{new string(padding, 3)}",
$"{new string(padding, ssl)}{pr}{sentinel}{po}{new string(padding, ssl)}",
$"{new string(padding, 1)}{pr}{sentinel}{po}{new string(padding, ssl)}",
$"{new string(padding, ssl)}{pr}{sentinel}{po}{new string(padding, 1)}",
$"{new string(padding, ssl)}{pr}{sentinel}{po}{new string(padding, 0)}",
$"{new string(padding, 0)}{pr}{sentinel}{po}{new string(padding, ssl)}",
$"{new string(padding, 0)}{pr}{sentinel}{po}{new string(padding, 0)}",
$"{new string(padding, bsl)}{pr}{sentinel}{po}{new string(padding, bsl)}",
$"{new string(padding, 10)}{pr}{sentinel}{po}{new string(padding, bsl)}",
$"{new string(padding, bsl)}{pr}{sentinel}{po}{new string(padding, 10)}",
$"{new string(padding, bsl)}{pr}{sentinel}{po}{new string(padding, 0)}",
$"{new string(padding, 0)}{pr}{sentinel}{po}{new string(padding, bsl)}",
$"{new string(padding, 0)}{pr}{sentinel}{po}{new string(padding, 0)}",
$"{new string(padding, 0)}{pr}{sentinel}{po}{new string(padding, 0)}",
$"{new string(padding, 0)}{pr}{sentinel}{po}{new string(padding, 3)}",
$"{new string(padding, 3)}{pr}{sentinel}{po}{new string(padding, 0)}",
$"{new string(padding, 3)}{pr}{sentinel}{po}{new string(padding, 3)}",
$"{new string(padding, ssl)}{pr}{sentinel}{po}{new string(padding, ssl)}",
$"{new string(padding, 1)}{pr}{sentinel}{po}{new string(padding, ssl)}",
$"{new string(padding, ssl)}{pr}{sentinel}{po}{new string(padding, 1)}",
$"{new string(padding, ssl)}{pr}{sentinel}{po}{new string(padding, 0)}",
$"{new string(padding, 0)}{pr}{sentinel}{po}{new string(padding, ssl)}",
$"{new string(padding, 0)}{pr}{sentinel}{po}{new string(padding, 0)}",
$"{new string(padding, bsl)}{pr}{sentinel}{po}{new string(padding, bsl)}",
$"{new string(padding, 10)}{pr}{sentinel}{po}{new string(padding, bsl)}",
$"{new string(padding, bsl)}{pr}{sentinel}{po}{new string(padding, 10)}",
$"{new string(padding, bsl)}{pr}{sentinel}{po}{new string(padding, 0)}",
$"{new string(padding, 0)}{pr}{sentinel}{po}{new string(padding, bsl)}",
$"{new string(padding, 0)}{pr}{sentinel}{po}{new string(padding, 0)}",
};

context.Clear();
int iteration = 0;

// DEBUGGING THESE TESTS: these tests do not accumulate all outputs and report
// them, instead they break on the first failure. A failure will report a
// message like so:
//
// Expected contextRegion.Snippet not to be <null> because 'baz' snippet exists
// (iteration 12, while processing char-based region type for value 'abaza').
//
// Observe the iteration value (in this case 12). Set a conditional breakpoint
// below when the iteration variable equals this value and you can debug the
// relevant failure.

foreach (string test in tests)
{
var cache = new FileRegionsCache();
var uri = new Uri(@$"c:\temp\DoesNotExist\{Guid.NewGuid()}.cpp");

int index = test.IndexOf(sentinel);

// The FileRegions code takes two discrete code paths depending on whether
// the input variable is char-offset based or uses the start line convention.
var regions = new Region[]
{
new Region
{
CharOffset = index,
CharLength = sentinel.Length,
},
new Region
{
StartLine = 1,
StartColumn = index + 1,
EndColumn = index + sentinel.Length + 1,
}
};

string charBased = "char-based";
string lineBased = "line-based";

foreach (Region region in regions)
{
context.Clear();
context.Append($"(iteration {iteration}, while processing {(region.StartLine == 1 ? lineBased : charBased)} region type for value '{test}')");

// First, we populate the region and text snippet for the actual test finding.
Region actual = cache.PopulateTextRegionProperties(region, uri, populateSnippet: true, test);

actual.Snippet.Should().NotBeNull($"'{sentinel}' snippet exists {context}");
actual.Snippet.Text?.Should().Be($"{sentinel}", $"region snippet did not match {context}");

// Now, we attempt to produce a context region.
Region contextRegion = cache.ConstructMultilineContextSnippet(actual, uri, test);
contextRegion.Snippet.Should().NotBeNull($"'{sentinel}' snippet exists {context}");
contextRegion.Snippet.Text.Contains(sentinel).Should().BeTrue($"context region should encapsulate finding {context}");
}

iteration++;
}
}
}
}


[Fact]
public void FileRegionsCache_PopulatesSpecExampleRegions()
{
Expand Down Expand Up @@ -601,8 +723,11 @@ public void FileRegionsCache_IncreasingToLeftAndRight()

Region multilineRegion = fileRegionsCache.ConstructMultilineContextSnippet(region, uri);

// 114 (charoffset) + 600 (charlength) + 128 (grabbing right content)
multilineRegion.CharLength.Should().Be(114 + 600 + 128);
// 114 (charoffset) + 600 (charlength) + (128 - prefixed length).

// The length of our prepended data;
int prefixed = Math.Max(multilineRegion.CharOffset - 128, 0);
multilineRegion.CharLength.Should().Be(prefixed + region.CharLength + (128 - prefixed));
}

[Fact]
Expand Down