Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[#2196] Use git blame line info for aggregate blame author modified and date info #2232

Open
wants to merge 24 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
0f8de06
Replace string wrangling with blameLine
logical-1985516 Jun 30, 2024
8c74f33
Fix lineNumber argument for blameLine and commitDate
logical-1985516 Jul 3, 2024
cd69a96
Improve variable name, add comments and replace magic numbers
logical-1985516 Jul 3, 2024
f8ddc62
Replace commitHash in blameLine with "" to return all blame lines
logical-1985516 Jul 6, 2024
72b1f89
Edit commitHash to now be returned from blameLineInfo for abstraction
logical-1985516 Jul 6, 2024
92ccb77
Add variable commitDateInMs for better readability
logical-1985516 Jul 6, 2024
c0c78a6
Rename method to be in seconds and change to commit-time in blameLine
logical-1985516 Jul 6, 2024
5073ecf
Remove unused constant COMMIT_TIME_OFFSET
logical-1985516 Jul 6, 2024
310856e
Remove unused constants
logical-1985516 Jul 6, 2024
5b6caf0
Change milliseconds to seconds
logical-1985516 Jul 6, 2024
b869ba1
Merge branch 'master' into 2196-use-GitBlameLineInfo-for-aggregateBla…
logical-1985516 Jul 14, 2024
22473ba
Add and use blameFile that returns a list of GitBlameLineInfo
logical-1985516 Jul 27, 2024
ed17dc8
Add timeOption in processGitBlameResultLine and clean up statements
logical-1985516 Jul 27, 2024
c8115e2
Merge branch '2196-use-GitBlameLineInfo-for-aggregateBlameAuthorModif…
logical-1985516 Jul 27, 2024
37e8f6b
Merge branch 'master' into 2196-use-GitBlameLineInfo-for-aggregateBla…
logical-1985516 Jul 27, 2024
c6e4015
Merge branch 'master' into 2196-use-GitBlameLineInfo-for-aggregateBla…
logical-1985516 Jul 28, 2024
eab3489
Merge branch 'master' into 2196-use-GitBlameLineInfo-for-aggregateBla…
gok99 Aug 4, 2024
cecb1b2
Change processGitBlameResultLine to take in String[] for compatibility
logical-1985516 Aug 10, 2024
96eaf57
Update blameFile to perform blame; abstract out processing in blameFile
logical-1985516 Aug 10, 2024
82bb6f7
Merge branch '2196-use-GitBlameLineInfo-for-aggregateBlameAuthorModif…
logical-1985516 Aug 10, 2024
d1a3cf8
Merge branch 'master' into 2196-use-GitBlameLineInfo-for-aggregateBla…
logical-1985516 Aug 10, 2024
2ab32d3
Use getGitBlameFileResult in FileInfoAnalyzer for abstraction
logical-1985516 Aug 10, 2024
f843539
Merge branch '2196-use-GitBlameLineInfo-for-aggregateBlameAuthorModif…
logical-1985516 Aug 10, 2024
972f0e2
Merge branch 'master' into 2196-use-GitBlameLineInfo-for-aggregateBla…
gok99 Aug 14, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 20 additions & 43 deletions src/main/java/reposense/authorship/FileInfoAnalyzer.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,25 +18,19 @@
import reposense.authorship.model.LineInfo;
import reposense.git.GitBlame;
import reposense.git.GitLog;
import reposense.git.model.GitBlameLineInfo;
import reposense.model.Author;
import reposense.model.CommitHash;
import reposense.model.RepoConfiguration;
import reposense.system.LogsManager;
import reposense.util.FileUtil;
import reposense.util.StringsUtil;

/**
* Analyzes the target and information given in the {@link FileInfo}.
*/
public class FileInfoAnalyzer {
private static final Logger logger = LogsManager.getLogger(FileInfoAnalyzer.class);

private static final int AUTHOR_NAME_OFFSET = "author ".length();
private static final int AUTHOR_EMAIL_OFFSET = "author-mail ".length();
private static final int AUTHOR_TIME_OFFSET = "author-time ".length();
private static final int AUTHOR_TIMEZONE_OFFSET = "author-tz ".length();
private static final int FULL_COMMIT_HASH_LENGTH = 40;

private static final String MESSAGE_FILE_MISSING = "Unable to analyze the file located at \"%s\" "
+ "as the file is missing from your system. Skipping this file.";

Expand Down Expand Up @@ -148,31 +142,21 @@ private FileResult generateBinaryFileResult(RepoConfiguration config, FileInfo f
*/
private void aggregateBlameAuthorModifiedAndDateInfo(RepoConfiguration config, FileInfo fileInfo,
boolean shouldAnalyzeAuthorship, double originalityThreshold) {
String blameResults;

if (!config.isFindingPreviousAuthorsPerformed()) {
blameResults = getGitBlameResult(config, fileInfo.getPath());
} else {
blameResults = getGitBlameWithPreviousAuthorsResult(config, fileInfo.getPath());
}
List<GitBlameLineInfo> gitBlameLineInfos = getGitBlameFileResult(config, fileInfo.getPath(),
config.isFindingPreviousAuthorsPerformed());

String[] blameResultLines = StringsUtil.NEWLINE.split(blameResults);
Path filePath = Paths.get(fileInfo.getPath());
LocalDateTime sinceDate = config.getSinceDate();
LocalDateTime untilDate = config.getUntilDate();

for (int lineCount = 0; lineCount < blameResultLines.length; lineCount += 5) {
String commitHash = blameResultLines[lineCount].substring(0, FULL_COMMIT_HASH_LENGTH);
String authorName = blameResultLines[lineCount + 1].substring(AUTHOR_NAME_OFFSET);
String authorEmail = blameResultLines[lineCount + 2]
.substring(AUTHOR_EMAIL_OFFSET).replaceAll("<|>", "");
long commitDateInMs = Long.parseLong(blameResultLines[lineCount + 3].substring(AUTHOR_TIME_OFFSET)) * 1000;
LocalDateTime commitDate = LocalDateTime.ofInstant(Instant.ofEpochMilli(commitDateInMs),
config.getZoneId());
Author author = config.getAuthor(authorName, authorEmail);

int lineNumber = lineCount / 5;
if (!fileInfo.isFileLineTracked(lineNumber) || author.isIgnoringFile(filePath)
for (int lineCount = 0; lineCount < gitBlameLineInfos.size(); lineCount++) {
GitBlameLineInfo blameLineInfo = gitBlameLineInfos.get(lineCount);
String commitHash = blameLineInfo.getCommitHash();
LocalDateTime commitDate = LocalDateTime.ofInstant(
Instant.ofEpochSecond(blameLineInfo.getTimestampInSeconds()), config.getZoneId());
Author author = config.getAuthor(blameLineInfo.getAuthorName(), blameLineInfo.getAuthorEmail());

if (!fileInfo.isFileLineTracked(lineCount) || author.isIgnoringFile(filePath)
|| CommitHash.isInsideCommitList(commitHash, config.getIgnoreCommitList())
|| commitDate.isBefore(sinceDate) || commitDate.isAfter(untilDate)) {
author = Author.UNKNOWN_AUTHOR;
Expand All @@ -184,32 +168,25 @@ private void aggregateBlameAuthorModifiedAndDateInfo(RepoConfiguration config, F
MESSAGE_SHALLOW_CLONING_LAST_MODIFIED_DATE_CONFLICT, config.getRepoName()));
}

fileInfo.setLineLastModifiedDate(lineNumber, commitDate);
fileInfo.setLineLastModifiedDate(lineCount, commitDate);
}
fileInfo.setLineAuthor(lineNumber, author);
fileInfo.setLineAuthor(lineCount, author);

if (shouldAnalyzeAuthorship && !author.equals(Author.UNKNOWN_AUTHOR)) {
String lineContent = fileInfo.getLine(lineNumber + 1).getContent();
String lineContent = fileInfo.getLine(lineCount + 1).getContent();
boolean isFullCredit = AuthorshipAnalyzer.analyzeAuthorship(config, fileInfo.getPath(), lineContent,
commitHash, author, originalityThreshold);
fileInfo.setIsFullCredit(lineNumber, isFullCredit);
fileInfo.setIsFullCredit(lineCount, isFullCredit);
}
}
}

/**
* Returns the analysis result from running git blame on {@code filePath} with reference to the root directory
* given in {@code config}.
*/
private String getGitBlameResult(RepoConfiguration config, String filePath) {
return GitBlame.blame(config.getRepoRoot(), filePath);
}

/**
* Returns the analysis result from running git blame with finding previous authors enabled on {@code filePath}
* with reference to the root directory given in {@code config}.
* Returns the analysis result from running git blame file on {@code filePath} with reference to the root directory
* given in {@code config} and {@code withPreviousAuthors}.
*/
private String getGitBlameWithPreviousAuthorsResult(RepoConfiguration config, String filePath) {
return GitBlame.blameWithPreviousAuthors(config.getRepoRoot(), filePath);
private List<GitBlameLineInfo> getGitBlameFileResult(RepoConfiguration config, String filePath,
boolean withPreviousAuthors) {
return GitBlame.blameFile(config.getRepoRoot(), filePath, withPreviousAuthors);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -73,11 +73,11 @@ public static boolean analyzeAuthorship(RepoConfiguration config, String filePat
GitBlameLineInfo deletedLineInfo = GitBlame.blameLine(config.getRepoRoot(), deletedLine.getGitBlameCommitHash(),
deletedLine.getFilePath(), deletedLine.getLineNumber());
Author previousAuthor = config.getAuthor(deletedLineInfo.getAuthorName(), deletedLineInfo.getAuthorEmail());
long sinceDateInMilliseconds = ZonedDateTime.of(config.getSinceDate(), config.getZoneId()).toEpochSecond();
long sinceDateInSeconds = ZonedDateTime.of(config.getSinceDate(), config.getZoneId()).toEpochSecond();

// Give full credit if author is unknown, is before since date, is in ignored list, or is an ignored file
if (previousAuthor.equals(Author.UNKNOWN_AUTHOR)
|| deletedLineInfo.getTimestampMilliseconds() < sinceDateInMilliseconds
|| deletedLineInfo.getTimestampInSeconds() < sinceDateInSeconds
|| CommitHash.isInsideCommitList(deletedLineInfo.getCommitHash(), config.getIgnoreCommitList())
|| previousAuthor.isIgnoringFile(Paths.get(deletedLine.getFilePath()))) {
return true;
Expand Down
53 changes: 45 additions & 8 deletions src/main/java/reposense/git/GitBlame.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you add / update tests for functions / flags you've added here?

import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import reposense.git.model.GitBlameLineInfo;
import reposense.util.StringsUtil;
Expand All @@ -16,6 +19,8 @@
public class GitBlame {
public static final String IGNORE_COMMIT_LIST_FILE_NAME = ".git-blame-ignore-revs";

private static final int BLAME_LINE_INFO_ROW_COUNT = 5;

private static final String COMMIT_HASH_REGEX = "(^[0-9a-f]{40} .*)";
private static final String AUTHOR_NAME_REGEX = "(^author .*)";
private static final String AUTHOR_EMAIL_REGEX = "(^author-mail .*)";
Expand All @@ -28,8 +33,9 @@ public class GitBlame {

private static final int AUTHOR_NAME_OFFSET = "author ".length();
private static final int AUTHOR_EMAIL_OFFSET = "author-mail ".length();
private static final int FULL_COMMIT_HASH_LENGTH = 40;
private static final int AUTHOR_TIME_OFFSET = "author-time ".length();
private static final int COMMIT_TIME_OFFSET = "committer-time ".length();
private static final int FULL_COMMIT_HASH_LENGTH = 40;

/**
* Returns the raw git blame result for the {@code fileDirectory}, performed at the {@code root} directory.
Expand Down Expand Up @@ -57,6 +63,17 @@ public static String blameWithPreviousAuthors(String root, String fileDirectory)
return StringsUtil.filterText(runCommand(rootPath, blameCommandWithFindingPreviousAuthors), COMBINATION_REGEX);
}

/**
* Returns the processed git blame result for the {@code fileDirectory} performed at the {@code root} directory,
* with reference to {@code withPreviousAuthors}.
*/
public static List<GitBlameLineInfo> blameFile(String root, String fileDirectory, boolean withPreviousAuthors) {
String blameResults = withPreviousAuthors
? blameWithPreviousAuthors(root, fileDirectory)
: blame(root, fileDirectory);
return processGitBlameResultLines(blameResults);
}

/**
* Returns the git blame result for {@code lineNumber} of {@code fileDirectory} at {@code commitHash}.
*/
Expand All @@ -68,21 +85,41 @@ public static GitBlameLineInfo blameLine(String root, String commitHash, String

String blameResult = StringsUtil.filterText(runCommand(rootPath, blameCommand),
COMBINATION_WITH_COMMIT_TIME_REGEX);
String[] blameResultLines = StringsUtil.NEWLINE.split(blameResult);
return processGitBlameResultLine(blameResultLines, "committer");
}

return processGitBlameResultLine(blameResult);
/**
* Returns the processed result of {@code blameResults}.
*/
private static List<GitBlameLineInfo> processGitBlameResultLines(String blameResults) {
String[] blameResultsLines = StringsUtil.NEWLINE.split(blameResults);
List<GitBlameLineInfo> blameFileResult = new ArrayList<>();
for (int lineCount = 0; lineCount < blameResultsLines.length; lineCount += BLAME_LINE_INFO_ROW_COUNT) {
String[] blameResultLines = Arrays
.copyOfRange(blameResultsLines, lineCount, lineCount + BLAME_LINE_INFO_ROW_COUNT - 1);
GitBlameLineInfo blameLineInfo = processGitBlameResultLine(blameResultLines, "author");
blameFileResult.add(blameLineInfo);
}
return blameFileResult;
}

/**
* Returns the processed result of {@code blameResult}.
* Returns the processed result of {@code blameResultLines}, with reference to {@code timeOption}.
*/
private static GitBlameLineInfo processGitBlameResultLine(String blameResult) {
String[] blameResultLines = StringsUtil.NEWLINE.split(blameResult);
private static GitBlameLineInfo processGitBlameResultLine(String[] blameResultLines, String timeOption) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd prefer to use a boolean for timeOption since it doesn't seem like there are likely to be new options in git in the future.

assert timeOption.equals("author") || timeOption.equals("committer");

String commitHash = blameResultLines[0].substring(0, FULL_COMMIT_HASH_LENGTH);
String authorName = blameResultLines[1].substring(AUTHOR_NAME_OFFSET);
String authorEmail = blameResultLines[2].substring(AUTHOR_EMAIL_OFFSET).replaceAll("[<>]", "");
long timestampMilliseconds = Long.parseLong(blameResultLines[5].substring(COMMIT_TIME_OFFSET));

return new GitBlameLineInfo(commitHash, authorName, authorEmail, timestampMilliseconds);
long timestampInSeconds;
if (timeOption.equals("author")) {
timestampInSeconds = Long.parseLong(blameResultLines[3].substring(AUTHOR_TIME_OFFSET));
} else {
timestampInSeconds = Long.parseLong(blameResultLines[5].substring(COMMIT_TIME_OFFSET));
}

return new GitBlameLineInfo(commitHash, authorName, authorEmail, timestampInSeconds);
}
}
12 changes: 6 additions & 6 deletions src/main/java/reposense/git/model/GitBlameLineInfo.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@ public class GitBlameLineInfo {
private final String commitHash;
private final String authorName;
private final String authorEmail;
private final long timestampMilliseconds;
private final long timestampInSeconds;
gok99 marked this conversation as resolved.
Show resolved Hide resolved

public GitBlameLineInfo(String commitHash, String authorName, String authorEmail, long timestampMilliseconds) {
public GitBlameLineInfo(String commitHash, String authorName, String authorEmail, long timestampInSeconds) {
this.commitHash = commitHash;
this.authorName = authorName;
this.authorEmail = authorEmail;
this.timestampMilliseconds = timestampMilliseconds;
this.timestampInSeconds = timestampInSeconds;
}

public String getCommitHash() {
Expand All @@ -28,8 +28,8 @@ public String getAuthorEmail() {
return authorEmail;
}

public long getTimestampMilliseconds() {
return timestampMilliseconds;
public long getTimestampInSeconds() {
return timestampInSeconds;
}

@Override
Expand All @@ -46,6 +46,6 @@ public boolean equals(Object other) {
return commitHash.equals(otherLineInfo.commitHash)
&& authorName.equals(otherLineInfo.authorName)
&& authorEmail.equals(otherLineInfo.authorEmail)
&& timestampMilliseconds == otherLineInfo.timestampMilliseconds;
&& timestampInSeconds == otherLineInfo.timestampInSeconds;
}
}
Loading