From d82857fbebd1111bb16588a4223bb24a8dcd07de Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Mon, 12 Feb 2024 13:50:17 +0100 Subject: [PATCH 01/38] Add commit-interaction data functionality This includes reading and storing the data as well as building author and artifact networks. Signed-off-by: Christian Hechtl Applied-by: Leo Sendelbach --- util-conf.R | 15 ++++++-- util-data.R | 66 ++++++++++++++++++++++++++++++++-- util-networks.R | 58 ++++++++++++++++++++++++++++++ util-read.R | 94 ++++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 226 insertions(+), 7 deletions(-) diff --git a/util-conf.R b/util-conf.R index 0031771a..434fbf96 100644 --- a/util-conf.R +++ b/util-conf.R @@ -15,7 +15,7 @@ ## Copyright 2016 by Wolfgang Mauerer ## Copyright 2017 by Raphael Nömmer ## Copyright 2017-2018 by Christian Hechtl -## Copyright 2020-2021 by Christian Hechtl +## Copyright 2020-2021, 2024 by Christian Hechtl ## Copyright 2017 by Felix Prasse ## Copyright 2017-2019 by Thomas Bock ## Copyright 2021, 2023-2024 by Thomas Bock @@ -468,6 +468,12 @@ ProjectConf = R6::R6Class("ProjectConf", inherit = Conf, allowed = c(TRUE, FALSE), allowed.number = 1 ), + commit.interactions = list( + default = FALSE, + type = "logical", + allowed = c(TRUE, FALSE), + allowed.number = 1 + ), custom.event.timestamps.file = list( default = NA, type = "character", @@ -629,6 +635,9 @@ ProjectConf = R6::R6Class("ProjectConf", inherit = Conf, conf$datapath.synchronicity = private$get.results.folder(data, selection.process, casestudy, "synchronicity") ## store path to PaStA data conf$datapath.pasta = private$get.results.folder(data, selection.process, casestudy, "pasta") + ## store path to commit interaction data + conf$datapath.commit.interaction = + private$get.results.folder(data, selection.process, casestudy, tagging, subfolder = tagging) ## store path to gender data conf$datapath.gender = private$get.results.folder(data, selection.process, casestudy, "gender") ## store path to issue data @@ -781,7 +790,7 @@ NetworkConf = R6::R6Class("NetworkConf", inherit = Conf, author.relation = list( default = "mail", type = "character", - allowed = c("mail", "cochange", "issue"), + allowed = c("mail", "cochange", "issue", "interaction"), allowed.number = Inf ), author.directed = list( @@ -812,7 +821,7 @@ NetworkConf = R6::R6Class("NetworkConf", inherit = Conf, artifact.relation = list( default = "cochange", type = "character", - allowed = c("cochange", "callgraph", "mail", "issue"), + allowed = c("cochange", "callgraph", "mail", "issue", "interaction"), allowed.number = Inf ), artifact.directed = list( diff --git a/util-data.R b/util-data.R index e8c9ee4d..be3ae945 100644 --- a/util-data.R +++ b/util-data.R @@ -16,7 +16,7 @@ ## Copyright 2020-2021, 2023-2024 by Thomas Bock ## Copyright 2017 by Raphael Nömmer ## Copyright 2017-2018 by Christian Hechtl -## Copyright 2020 by Christian Hechtl +## Copyright 2020, 2024 by Christian Hechtl ## Copyright 2017 by Felix Prasse ## Copyright 2017 by Ferdinand Frank ## Copyright 2018-2019 by Jakob Kronawitter @@ -162,6 +162,7 @@ ProjectData = R6::R6Class("ProjectData", commits = create.empty.commits.list(), # data.frame commits.unfiltered = create.empty.commits.list(), # data.frame commit.messages = create.empty.commit.message.list(), # data.frame + commit.interactions = create.empty.commit.interaction.list(), ## mails mails.unfiltered = create.empty.mails.list(), # data.frame mails = create.empty.mails.list(), # data.frame @@ -404,6 +405,24 @@ ProjectData = R6::R6Class("ProjectData", To clean this up you can call the function 'cleanup.commit.message.data()'.") } }, + + update.commit.interactions = function() { + if (!self$is.data.source.cached("commits.unfiltered")) { + self$get.commits() + } + + print(colnames(private$commit.interactions)) + commit.data.subset = data.frame(hash = private$commits.unfiltered$hash, author.name = private$commits.unfiltered$author.name) + commit.data.subset = commit.data.subset[!duplicated(commit.data.subset$hash),] + + commit.interaction.data = merge(private$commit.interactions, commit.data.subset, by.x = "base.hash", by.y = "hash") + colnames(commit.interaction.data)[7] = "base.author" + commit.interaction.data = merge(commit.interaction.data, commit.data.subset, by.x = "commit.hash", by.y = "hash") + colnames(commit.interaction.data)[8] = "interacting.author" + + private$commit.interactions = commit.interaction.data + + }, ## * * Gender data -------------------------------------------------- #' Update the gender related fields of: \code{authors} @@ -1186,6 +1205,46 @@ ProjectData = R6::R6Class("ProjectData", } }, + #' Get the commit interaction data. If no data.path is given, the standard data.path + #' will be used. + #' + #' @param data.path an optional different data path to the commit-interaction data + #' + #' @return the commit-interaction data + get.commit.interactions = function(data.path = NULL) { + logging::loginfo("Getting commit interactions.") + + ## if the commit-interaction data have not yet been read do this + if (!self$is.data.source.cached("commit.interactions")) { + if(is.null(data.path)) { + commit.interaction.data = read.commit.interactions(self$get.data.path()) + } else { + commit.interaction.data = read.commit.interactions(data.path) + } + + ## cache the result + private$commit.interactions = commit.interaction.data + private$update.commit.interactions() + } + + return(private$commit.interactions) + }, + + #' Set the commit-interaction data to the new given data. + #' + #' @param data the new commit-interaction data + set.commit.interactions = function(data) { + logging::loginfo("Setting commit messages data.") + + if (is.null(data)) { + data = create.empty.commit.interaction.list() + } + + ## set the actual data + private$commit.interactions = data + # browser() + }, + #' Get the synchronicity data. If it is not already stored in the ProjectData, this function triggers a read in #' from disk. #' @@ -1756,7 +1815,8 @@ ProjectData = R6::R6Class("ProjectData", "commit.messages" = "commit.messages", "synchronicity" = "synchronicity", "pasta" = "pasta", - "custom.event.timestamps" = "custom.event.timestamps" + "custom.event.timestamps" = "custom.event.timestamps", + "commit.interactions" = "commit.interactions" ) ) sources = self$get.cached.data.sources.internal(source.type) @@ -1788,7 +1848,7 @@ ProjectData = R6::R6Class("ProjectData", ## define the data sources unfiltered.data.sources = c("commits.unfiltered", "mails.unfiltered", "issues.unfiltered") additional.data.sources = c("authors", "commit.messages", "synchronicity", "pasta", - "gender", "custom.event.timestamps") + "gender", "custom.event.timestamps", "commit.interactions") main.data.sources = c("issues", "commits", "mails") ## set the right data sources to look for according to the argument diff --git a/util-networks.R b/util-networks.R index b02eab69..d949273e 100644 --- a/util-networks.R +++ b/util-networks.R @@ -14,6 +14,7 @@ ## Copyright 2016-2019 by Claus Hunsen ## Copyright 2017 by Raphael Nömmer ## Copyright 2017-2018 by Christian Hechtl +## Copyright 2024 by Christian Hechtl ## Copyright 2017-2019 by Thomas Bock ## Copyright 2021, 2023-2024 by Thomas Bock ## Copyright 2018 by Barbara Eckl @@ -225,6 +226,34 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", return(author.net) }, + #' Build and get the author network with commit-interactions as the relation. + #' + #' @return the commit-interaction author network + get.author.network.commit.interaction = function() { + ## get the authors that appear in the commit-interaction data as the vertices of the network + vertices = unique(c(private$proj.data$get.commit.interactions()$base.author, + private$proj.data$get.commit.interactions()$interacting.author)) + vertices = data.frame(name = vertices) + + ## get the commit-interaction data as the edge data of the network + edges = private$proj.data$get.commit.interactions() + ## set the authors as the 'to' and 'from' of the network + colnames(edges)[7] = "to" + colnames(edges)[8] = "from" + edges = edges[,c(7,8,1,2,3,4,5,6)] + colnames(edges)[3] = "hash" + author.net.data = list(vertices = vertices, edges = edges) + ## construct the network + author.net = construct.network.from.edge.list( + author.net.data[["vertices"]], + author.net.data[["edges"]], + network.conf = private$network.conf, + directed = private$network.conf$get.value("author.directed"), + available.edge.attributes = list(hash = "character") + ) + return(author.net) + }, + #' Get the thread-based author relation as network. #' If it does not already exist build it first. #' @@ -345,6 +374,33 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", return(artifacts.net) }, + #' Build and get the the commit-interaction based artifact network. + #' + #' @return the commit-interaction based artifact network + get.artifact.network.commit.interaction = function() { + ## get the commits that appear in the commit-interaction data as the vertices of the network + vertices = unique(c(private$proj.data$get.commit.interactions()$base.file, private$proj.data$get.commit.interactions()$file)) + vertices = data.frame(name = vertices) + ## get the commit-interaction data as the edge data of the network + edges = private$proj.data$get.commit.interactions() + ## set the commits as the 'to' and 'from' of the network + colnames(edges)[6] = "to" + colnames(edges)[4] = "from" + edges = edges[,c(6,4,1,2,3,5)] + colnames(edges)[3] = "hash" + author.net.data = list(vertices = vertices, edges = edges) + ## construct the network + author.net = construct.network.from.edge.list( + author.net.data[["vertices"]], + author.net.data[["edges"]], + network.conf = private$network.conf, + directed = private$network.conf$get.value("author.directed"), + available.edge.attributes = list(hash = "character") + ) + + return(author.net) + }, + #' Get the call-graph-based artifact network. #' If it does not already exist build it first. #' IMPORTANT: This only works for range-level analyses! @@ -743,6 +799,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", network = switch( relation, cochange = private$get.author.network.cochange(), + interaction = private$get.author.network.commit.interaction(), mail = private$get.author.network.mail(), issue = private$get.author.network.issue(), stop(sprintf("The author relation '%s' does not exist.", rel)) @@ -810,6 +867,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", callgraph = private$get.artifact.network.callgraph(), mail = private$get.artifact.network.mail(), issue = private$get.artifact.network.issue(), + interaction = private$get.artifact.network.commit.interaction(), stop(sprintf("The artifact relation '%s' does not exist.", relation)) ) diff --git a/util-read.R b/util-read.R index 8cfe1a80..4793f876 100644 --- a/util-read.R +++ b/util-read.R @@ -14,7 +14,7 @@ ## Copyright 2016-2019 by Claus Hunsen ## Copyright 2017 by Raphael Nömmer ## Copyright 2017-2018 by Christian Hechtl -## Copyright 2020-2022 by Christian Hechtl +## Copyright 2020-2022, 2024 by Christian Hechtl ## Copyright 2017 by Felix Prasse ## Copyright 2017-2018 by Thomas Bock ## Copyright 2023-2024 by Thomas Bock @@ -42,6 +42,7 @@ requireNamespace("plyr") requireNamespace("digest") # for sha1 hashing of IDs requireNamespace("sqldf") # for SQL-selections on data.frames requireNamespace("data.table") # for faster data.frame processing +requireNamespace("yaml") # for reading commit interaction data ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / ## Helper functions -------------------------------------------------------- @@ -843,6 +844,97 @@ create.empty.pasta.list = function() { return(create.empty.data.frame(PASTA.LIST.COLUMNS, PASTA.LIST.DATA.TYPES)) } +## * Commit interaction data ----------------------------------------------- + +## column names of a dataframe containing commit interaction data (see function \code{read.commit.interactions}) +COMMIT.INTERACTION.LIST.COLUMNS = c( + "base.hash", "function", "file", + "interacting.hash" +) + +## declare the datatype for each column in the constant 'COMMIT.INTERACTION.LIST.COLUMNS' +COMMIT.INTERACTION.LIST.DATA.TYPES = c( + "character", "character", "character", + "character" +) + +#' Read and parse the commit-interaction data. This data is present in a `.yaml` file which +#' needs to be broken down. Within the yaml file, there are different lists in which each +#' commit (hash) gets mapped to all commits it interacts with and the file/function because of +#' which they interact. +#' +#' +#' @param data.path the path to the commit-interaction data +#' +#' @return the read and parsed commit-interaction data +read.commit.interactions = function(data.path = NULL) { + + file = file.path(data.path, "commit-interactions.yaml") + # file = file.path("/scratch/hechtl/htop-new", "c5b0ccb9f9.yaml") + + commit.interaction.base = try(yaml::read_yaml(file = file), silent = TRUE) + + ## handle the case that the list of commit-interactions is empty + if (inherits(commit.interaction.base, "try-error")) { + logging::logwarn("There are no commit-interactions available for the current environment.") + logging::logwarn("Datapath: %s", data.path) + + # return a dataframe with the correct columns but zero rows + return(create.empty.commit.interaction.list()) + } + + ## extract the top level list of the yaml file which is called 'result-map' + result.map = commit.interaction.base$`result-map` + + ## extract a mapping of functions to files to be able to determine what file the current interaction is + ## based on + file.name.map = fastmap::fastmap() + function.file.list = purrr::map(result.map, 2) + file.name.map$mset(.list = function.file.list) + list.names = names(result.map) + + ## build the result dataframe by iterating over the 'result-map' list + commit.interaction.data = data.table::setDF(data.table::rbindlist(parallel::mcmapply(result.map, list.names, + SIMPLIFY = FALSE, + FUN = function(current.interaction, + function.name) { + ## get all commits that interact with the current one + insts = current.interaction[[4]] + interactions = data.table::setDF(data.table::rbindlist(lapply(insts, function(current.inst) { + base.hash = current.inst[[1]][[3]] + interacting.hashes = current.inst[[2]] + interacting.hashes.df = data.table::setDF(data.table::rbindlist(lapply(interacting.hashes, function(hash) { + ## if there is no function name in the current interaction we set the function name to 'GLOBAL' + ## as this is most likely code outside of functions, else we set the function name + if (!"function" %in% names(hash)) { + return(data.frame(func = "GLOBAL", commit.hash = hash[["commit"]], file = "GLOBAL")) + } else { + return(data.frame(func = hash[["function"]], commit.hash = hash[["commit"]], + file = file.name.map$get(hash[["function"]]))) + } + }))) + interacting.hashes.df$base.hash = base.hash + interacting.hashes.df$base.func = function.name + interacting.hashes.df$base.file = file.name.map$get(function.name) + return(interacting.hashes.df) + }))) + return(interactions) + }))) + + ## remove all duplicate entries from the resulting dataframe + commit.interaction.data = commit.interaction.data[!duplicated(commit.interaction.data), ] + return(commit.interaction.data) +} + +#' Create an empty dataframe which has the same shape as a dataframe containing commit interaction data. +#' The dataframe has the column names and column datatypes defined in \code{COMMIT.INTERACTION.LIST.COLUMNS} +#' and \code{COMMIT.INTERACTION.LIST.DATA.TYPES}, respectively. +#' +#' @return the empty dataframe +create.empty.commit.interaction.list = function() { + return (create.empty.data.frame(COMMIT.INTERACTION.LIST.COLUMNS, COMMIT.INTERACTION.LIST.DATA.TYPES)) +} + ## * Synchronicity data ---------------------------------------------------- ## column names of a dataframe containing synchronicity data (see function \code{read.synchronicity}) From b4fd2a29c9b5fd561b1106c6febb54a32b0085ab Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Mon, 12 Feb 2024 13:56:48 +0100 Subject: [PATCH 02/38] Add functionality for equals function Also removed comments and browser statements, as well as added updating of commit-interaction data when commit data is changed if commit-interactions are configured Signed-off-by: Leo Sendelbach --- util-data.R | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/util-data.R b/util-data.R index be3ae945..a904096c 100644 --- a/util-data.R +++ b/util-data.R @@ -26,6 +26,7 @@ ## Copyright 2021 by Mirabdulla Yusifli ## Copyright 2022 by Jonathan Baumann ## Copyright 2022-2023 by Maximilian Löffler +## Copyright 2024 by Leo Sendelbach ## All Rights Reserved. @@ -77,7 +78,8 @@ DATASOURCE.TO.ADDITIONAL.ARTIFACT.FUNCTION = list( "synchronicity" = "get.synchronicity", "pasta" = "get.pasta", "gender" = "get.gender", - "custom.event.timestamps" = "get.custom.event.timestamps" + "custom.event.timestamps" = "get.custom.event.timestamps", + "commit.interactions" = "get.commit.interactions" ) #' Applies a function to list keys @@ -410,16 +412,15 @@ ProjectData = R6::R6Class("ProjectData", if (!self$is.data.source.cached("commits.unfiltered")) { self$get.commits() } - - print(colnames(private$commit.interactions)) - commit.data.subset = data.frame(hash = private$commits.unfiltered$hash, author.name = private$commits.unfiltered$author.name) - commit.data.subset = commit.data.subset[!duplicated(commit.data.subset$hash),] - + commit.data.subset = data.frame(hash = private$commits.unfiltered[["hash"]], author.name = private$commits.unfiltered[["author.name"]]) + commit.data.subset = commit.data.subset[!duplicated(commit.data.subset[["hash"]]),] + commit.interaction.data = merge(private$commit.interactions, commit.data.subset, by.x = "base.hash", by.y = "hash") - colnames(commit.interaction.data)[7] = "base.author" - commit.interaction.data = merge(commit.interaction.data, commit.data.subset, by.x = "commit.hash", by.y = "hash") - colnames(commit.interaction.data)[8] = "interacting.author" + colnames(commit.interaction.data)[[7]] = "base.author" + commit.interaction.data = merge(commit.interaction.data, commit.data.subset, by.x = "commit.hash", by.y = "hash") + colnames(commit.interaction.data)[[8]] = "interacting.author" + private$commit.interactions = commit.interaction.data }, @@ -1130,6 +1131,17 @@ ProjectData = R6::R6Class("ProjectData", } } + ## add commit interaction data if wanted + if (private$project.conf$get.value("commit.interactions")) { + if (!self$is.data.source.cached("commit.interactions")) { + ## get data (no assignment because we just want to trigger anything commit.interaction related) + self$get.commit.interactions() + } else { + ## update all commit.interaction-related data + private$update.commit.interactions() + } + } + ## sort by date private$commits.unfiltered = private$commits.unfiltered[order(private$commits.unfiltered[["date"]], decreasing = FALSE), ] @@ -1242,7 +1254,6 @@ ProjectData = R6::R6Class("ProjectData", ## set the actual data private$commit.interactions = data - # browser() }, #' Get the synchronicity data. If it is not already stored in the ProjectData, this function triggers a read in From b3394eec4b0f2556f184eecd4de332a422afcbb7 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Mon, 12 Feb 2024 14:00:47 +0100 Subject: [PATCH 03/38] Remove outdated comment int 'util-read.R' outdated comment with local data path removed Signed-off-by: Leo Sendelbach --- util-read.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util-read.R b/util-read.R index 4793f876..929af7b3 100644 --- a/util-read.R +++ b/util-read.R @@ -25,6 +25,7 @@ ## Copyright 2021 by Mirabdulla Yusifli ## Copyright 2022 by Jonathan Baumann ## Copyright 2022-2023 by Maximilian Löffler +## Copyright 2024 by Leo Sendelbach ## All Rights Reserved. ## Note: @@ -870,7 +871,6 @@ COMMIT.INTERACTION.LIST.DATA.TYPES = c( read.commit.interactions = function(data.path = NULL) { file = file.path(data.path, "commit-interactions.yaml") - # file = file.path("/scratch/hechtl/htop-new", "c5b0ccb9f9.yaml") commit.interaction.base = try(yaml::read_yaml(file = file), silent = TRUE) From eeba7e29932bc973513c963fb9e716e9230d570f Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Mon, 12 Feb 2024 14:03:06 +0100 Subject: [PATCH 04/38] Add test for new functionality of 'equals' Uses 'equals' function on Project Data with new commit-interactions Signed-off-by: Leo Sendelbach --- tests/test-data.R | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tests/test-data.R b/tests/test-data.R index 9c6f4f8c..c7730692 100644 --- a/tests/test-data.R +++ b/tests/test-data.R @@ -20,6 +20,7 @@ ## Copyright 2021 by Mirabdulla Yusifli ## Copyright 2022 by Jonathan Baumann ## Copyright 2023 by Maximilian Löffler +## Copyright 2024 by Leo Sendelbach ## All Rights Reserved. @@ -511,3 +512,22 @@ test_that("Create RangeData objects from Codeface ranges and check data path", { expect_identical(range.paths, expected.paths, "RangeData data paths") }) + +test_that("Compare two ProjectData Objects with commit.interactions", { + ## configuration object for the datapath + proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, "file") + proj.conf$update.value("commit.interactions", TRUE) + proj.conf$update.value("commits.filter.untracked.files", FALSE) + proj.conf$update.value("commits.filter.base.artifact", FALSE) + + proj.data.one = ProjectData$new(project.conf = proj.conf) + proj.data.two = proj.data.one$clone(deep = TRUE) + + ## test if the project data is equal and the commit interactions are as well + expect_equal(proj.data.one$get.commit.interactions(), proj.data.two$get.commit.interactions()) + expect_true(proj.data.one$equals(proj.data.two)) + + ## change commit interactions of one project data and assert that equality check fails + proj.data.two$set.commit.interactions(create.empty.commit.interaction.list()) + expect_false(proj.data.one$equals(proj.data.two)) +}) From 8bb39f4df39b49dfaff8f19feb6db5e5fbd81fac Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Mon, 12 Feb 2024 14:08:36 +0100 Subject: [PATCH 05/38] Add test for new read functionality new test reads commit-interactions data and asserts its correctness Signed-off-by: Leo Sendelbach --- tests/test-networks-author.R | 1 + tests/test-read.R | 26 ++++++++++++++++++++++++++ util-networks.R | 1 + 3 files changed, 28 insertions(+) diff --git a/tests/test-networks-author.R b/tests/test-networks-author.R index d4d0e9fa..d29d74e0 100644 --- a/tests/test-networks-author.R +++ b/tests/test-networks-author.R @@ -22,6 +22,7 @@ ## Copyright 2018-2019 by Anselm Fehnker ## Copyright 2021 by Johannes Hostert ## Copyright 2023-2024 by Maximilian Löffler +## Copyright 2024 by Leo Sendelbach ## All Rights Reserved. diff --git a/tests/test-read.R b/tests/test-read.R index db3645d4..3cc0faff 100644 --- a/tests/test-read.R +++ b/tests/test-read.R @@ -22,6 +22,7 @@ ## Copyright 2021 by Mirabdulla Yusifli ## Copyright 2022 by Jonathan Baumann ## Copyright 2022-2024 by Maximilian Löffler +## Copyright 2024 by Leo Sendelbach ## All Rights Reserved. @@ -497,3 +498,28 @@ test_that("Read and parse the issue data.", { expect_identical(issue.data.read.github, issue.data.expected.github, info = "Issue data github.") }) +test_that("Read the commit-interactions data.", { + ## configuration object for the datapath + proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, "file") + proj.conf$update.value("commit.interactions", TRUE) + + ## read the actual data + commit.interactions.data.read = read.commit.interactions(proj.conf$get.value("datapath")) + ## build the expected data.frame + commit.interactions.data.expected = data.frame(func = c("test.c", "test2.c", "test2.c", "test2.c"), + commit.hash = c("5a5ec9675e98187e1e92561e1888aa6f04faa338", + "0a1a5c523d835459c42f33e863623138555e2526", + "418d1dc4929ad1df251d2aeb833dd45757b04a6f", + "d01921773fae4bed8186b0aa411d6a2f7a6626e6"), + file = c("test.c", "test2.c", "test2.c", "test2.c"), + base.hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", + "3a0ed78458b3976243db6829f63eba3eead26774", + "0a1a5c523d835459c42f33e863623138555e2526", + "0a1a5c523d835459c42f33e863623138555e2526"), + base.func = c("test.c", "test2.c", "test2.c", "test2.c"), + base.file = c("test.c", "test2.c", "test2.c", "test2.c")) + + ## check the results + expect_identical(commit.interactions.data.read, commit.interactions.data.expected, + info = "commit interaction data.") +}) \ No newline at end of file diff --git a/util-networks.R b/util-networks.R index d949273e..f7f59b88 100644 --- a/util-networks.R +++ b/util-networks.R @@ -23,6 +23,7 @@ ## Copyright 2021 by Niklas Schneider ## Copyright 2022 by Jonathan Baumann ## Copyright 2023-2024 by Maximilian Löffler +## Copyright 2024 by Leo Sendelbach ## All Rights Reserved. From 54b6f655248720436af116fe72521f9cb0348429 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Mon, 12 Feb 2024 14:10:06 +0100 Subject: [PATCH 06/38] Add test data files with commit interactions An empty file and an example file with four interactions Signed-off-by: Leo Sendelbach --- .../proximity/commit-interactions.yaml | 0 .../proximity/commit-interactions.yaml | 59 +++++++++++++++++++ 2 files changed, 59 insertions(+) create mode 100644 tests/codeface-data/results/testing/test_empty_proximity/proximity/commit-interactions.yaml create mode 100644 tests/codeface-data/results/testing/test_proximity/proximity/commit-interactions.yaml diff --git a/tests/codeface-data/results/testing/test_empty_proximity/proximity/commit-interactions.yaml b/tests/codeface-data/results/testing/test_empty_proximity/proximity/commit-interactions.yaml new file mode 100644 index 00000000..e69de29b diff --git a/tests/codeface-data/results/testing/test_proximity/proximity/commit-interactions.yaml b/tests/codeface-data/results/testing/test_proximity/proximity/commit-interactions.yaml new file mode 100644 index 00000000..e424236d --- /dev/null +++ b/tests/codeface-data/results/testing/test_proximity/proximity/commit-interactions.yaml @@ -0,0 +1,59 @@ +scope: REGION +result-map: + test.c: + demangled-name: test.c + file: test.c + num-instructions: 30 + insts: + - base-hash: + region: 45620620587549 + function: test.c + commit: 72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0 + repository: test-repo + interacting-hashes: + - region: 87546092348456 + function: test.c + commit: 5a5ec9675e98187e1e92561e1888aa6f04faa338 + repository: test-repo + amount: 2 + callees: + - test_callee + commits: + - commit: 3383d8e5561dfc6fb2b65e0a194df94ccb5e08af + repository: test-repo + test2.c: + demangled-name: test2.c + file: test2.c + num-instructions: 26 + insts: + - base-hash: + region: 50956672345141 + function: test2.c + commit: 3a0ed78458b3976243db6829f63eba3eead26774 + repository: test-repo + interacting-hashes: + - region: 98750276234511 + function: test2.c + commit: 0a1a5c523d835459c42f33e863623138555e2526 + repository: test-repo + amount: 1 + - base-hash: + region: 67230588834344 + function: test2.c + commit: 0a1a5c523d835459c42f33e863623138555e2526 + repository: test-repo + interacting-hashes: + - region: 33295067820043 + function: test2.c + commit: 418d1dc4929ad1df251d2aeb833dd45757b04a6f + repository: test-repo + - region: 20194653678423 + function: test2.c + commit: d01921773fae4bed8186b0aa411d6a2f7a6626e6 + repository: test-repo + amount: 3 + callees: + - test_callee + commits: + - commit: 3383d8e5561dfc6fb2b65e0a194df94ccb5e08af + repository: test-repo \ No newline at end of file From 7a5497aaf9114017d1b3b9b68b6cccd7ca8ac114 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Wed, 14 Feb 2024 17:02:21 +0100 Subject: [PATCH 07/38] Add test for reading empty commit-interactions data Checks that the empty dataframe has correct col and rownames Signed-off-by: Leo Sendelbach --- tests/test-read.R | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/test-read.R b/tests/test-read.R index 3cc0faff..f6d77199 100644 --- a/tests/test-read.R +++ b/tests/test-read.R @@ -522,4 +522,23 @@ test_that("Read the commit-interactions data.", { ## check the results expect_identical(commit.interactions.data.read, commit.interactions.data.expected, info = "commit interaction data.") +}) + +test_that("Read the empty commit-interactions data.", { + ## configuration object for the datapath + proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, "file") + proj.conf$update.value("commit.interactions", TRUE) + + ## read the actual data + commit.interactions.data.read = read.commit.interactions("./codeface-data/results/testing/ + test_empty_proximity/proximity") + ## build the expected data.frame + commit.interactions.data.expected = data.frame(matrix(nrow = 0, ncol = 4)) + colnames(commit.interactions.data.expected) = c("base.hash", "function", "file", "interacting.hash") + for(i in seq_len(4)) { + commit.interactions.data.expected[[i]] = as.character(commit.interactions.data.expected[[i]]) + } + ## check the results + expect_identical(commit.interactions.data.read, commit.interactions.data.expected, + info = "commit interaction data.") }) \ No newline at end of file From 7b8585f87675795822c07230192d6454de31dcc7 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Wed, 14 Feb 2024 17:24:37 +0100 Subject: [PATCH 08/38] Add test for change in set.commits Test that commit-interactions are updated when they are configured and commit data is changed Signed-off-by: Leo Sendelbach --- tests/test-data.R | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/test-data.R b/tests/test-data.R index c7730692..69e56f18 100644 --- a/tests/test-data.R +++ b/tests/test-data.R @@ -530,4 +530,15 @@ test_that("Compare two ProjectData Objects with commit.interactions", { ## change commit interactions of one project data and assert that equality check fails proj.data.two$set.commit.interactions(create.empty.commit.interaction.list()) expect_false(proj.data.one$equals(proj.data.two)) + + ## change commit data in one to test if commit-interactions are correctly updated + ## call get.commit.interactions() once to restore read interactions + proj.data.two$get.commit.interactions() + + ## change commits in one project data + commit.data = proj.data.one$get.commits() + commit.data[["hash"]][[5]] = 1 + proj.data.one$set.commits(commit.data) + ## inequality? + expect_equal(proj.data.one$get.commit.interactions(), proj.data.two$get.commit.interactions()) }) From d7dc713ee1cc1b9bd0b8e74967c1028805a1b1e4 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Tue, 20 Feb 2024 13:08:24 +0100 Subject: [PATCH 09/38] Add comments for update.commit.interactions Also added some linebreaks Signed-off-by: Leo Sendelbach --- util-data.R | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/util-data.R b/util-data.R index a904096c..f566fb94 100644 --- a/util-data.R +++ b/util-data.R @@ -408,19 +408,30 @@ ProjectData = R6::R6Class("ProjectData", } }, + ## * * Commit Interaction data -------------------------------------------------- + + #' Update the commit-interactions + #' + #' This method should be called whenever the field \code{commit.interactions} is changed. update.commit.interactions = function() { if (!self$is.data.source.cached("commits.unfiltered")) { self$get.commits() } - commit.data.subset = data.frame(hash = private$commits.unfiltered[["hash"]], author.name = private$commits.unfiltered[["author.name"]]) + + ## get relevant data from commits + commit.data.subset = data.frame(hash = private$commits.unfiltered[["hash"]], + author.name = private$commits.unfiltered[["author.name"]]) commit.data.subset = commit.data.subset[!duplicated(commit.data.subset[["hash"]]),] - - commit.interaction.data = merge(private$commit.interactions, commit.data.subset, by.x = "base.hash", by.y = "hash") + + ## merge commit interactions with commits and change colnames to avoid duplicates + commit.interaction.data = merge(private$commit.interactions, commit.data.subset, + by.x = "base.hash", by.y = "hash") colnames(commit.interaction.data)[[7]] = "base.author" - commit.interaction.data = merge(commit.interaction.data, commit.data.subset, by.x = "commit.hash", by.y = "hash") + commit.interaction.data = merge(commit.interaction.data, commit.data.subset, + by.x = "commit.hash", by.y = "hash") colnames(commit.interaction.data)[[8]] = "interacting.author" - + private$commit.interactions = commit.interaction.data }, From f25632c6c331f6d2c5c6ff1d9fadf369fd2d8acd Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Tue, 20 Feb 2024 13:28:16 +0100 Subject: [PATCH 10/38] Change indexes for 'match' calls in 'update.commit.interactions' Signed-off-by: Leo Sendelbach --- util-data.R | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/util-data.R b/util-data.R index f566fb94..c1aeac6f 100644 --- a/util-data.R +++ b/util-data.R @@ -426,11 +426,15 @@ ProjectData = R6::R6Class("ProjectData", ## merge commit interactions with commits and change colnames to avoid duplicates commit.interaction.data = merge(private$commit.interactions, commit.data.subset, by.x = "base.hash", by.y = "hash") - colnames(commit.interaction.data)[[7]] = "base.author" + + author.index = match("author.name", colnames(commit.interaction.data)) + colnames(commit.interaction.data)[[author.index]] = "base.author" commit.interaction.data = merge(commit.interaction.data, commit.data.subset, by.x = "commit.hash", by.y = "hash") - colnames(commit.interaction.data)[[8]] = "interacting.author" + + author.index = match("author.name", colnames(commit.interaction.data)) + colnames(commit.interaction.data)[[author.index]] = "interacting.author" private$commit.interactions = commit.interaction.data From 8fcc6d5b3fcaf9c0a3ff45faf6375e437810146f Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Tue, 20 Feb 2024 14:46:54 +0100 Subject: [PATCH 11/38] Fix test to correctly check for inequality using 'expect_false(isTRUE(all.equal(x, y)))' Signed-off-by: Leo Sendelbach --- tests/test-data.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test-data.R b/tests/test-data.R index 69e56f18..3d87a918 100644 --- a/tests/test-data.R +++ b/tests/test-data.R @@ -539,6 +539,6 @@ test_that("Compare two ProjectData Objects with commit.interactions", { commit.data = proj.data.one$get.commits() commit.data[["hash"]][[5]] = 1 proj.data.one$set.commits(commit.data) - ## inequality? - expect_equal(proj.data.one$get.commit.interactions(), proj.data.two$get.commit.interactions()) + + expect_false(isTRUE(all.equal(proj.data.one$get.commit.interactions(), proj.data.two$get.commit.interactions()))) }) From 9117be811e20c9900136d21733e4552fdff05b48 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Tue, 20 Feb 2024 14:57:43 +0100 Subject: [PATCH 12/38] Change colnames used for empty commit-interactions Also change the test to reflect this change Signed-off-by: Leo Sendelbach --- tests/test-read.R | 7 ++++--- util-read.R | 6 +++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/tests/test-read.R b/tests/test-read.R index f6d77199..2cd0df6b 100644 --- a/tests/test-read.R +++ b/tests/test-read.R @@ -533,9 +533,10 @@ test_that("Read the empty commit-interactions data.", { commit.interactions.data.read = read.commit.interactions("./codeface-data/results/testing/ test_empty_proximity/proximity") ## build the expected data.frame - commit.interactions.data.expected = data.frame(matrix(nrow = 0, ncol = 4)) - colnames(commit.interactions.data.expected) = c("base.hash", "function", "file", "interacting.hash") - for(i in seq_len(4)) { + commit.interactions.data.expected = data.frame(matrix(nrow = 0, ncol = 6)) + colnames(commit.interactions.data.expected) = c("func", "commit.hash", "file", + "base.hash", "base.func", "base.file") + for(i in seq_len(6)) { commit.interactions.data.expected[[i]] = as.character(commit.interactions.data.expected[[i]]) } ## check the results diff --git a/util-read.R b/util-read.R index 929af7b3..22206f35 100644 --- a/util-read.R +++ b/util-read.R @@ -849,14 +849,14 @@ create.empty.pasta.list = function() { ## column names of a dataframe containing commit interaction data (see function \code{read.commit.interactions}) COMMIT.INTERACTION.LIST.COLUMNS = c( - "base.hash", "function", "file", - "interacting.hash" + "func", "commit.hash", "file", + "base.hash", "base.func", "base.file" ) ## declare the datatype for each column in the constant 'COMMIT.INTERACTION.LIST.COLUMNS' COMMIT.INTERACTION.LIST.DATA.TYPES = c( "character", "character", "character", - "character" + "character", "character", "character" ) #' Read and parse the commit-interaction data. This data is present in a `.yaml` file which From 49acd59d74b05d85ae63270b339ff5fd6cc90fc5 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Tue, 20 Feb 2024 16:25:04 +0100 Subject: [PATCH 13/38] Remove previously added columns to avoid duplication 'update.commit.interactions' no longer duplicates 'base.author' and 'interacting.author' columns Signed-off-by: Leo Sendelbach --- util-data.R | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/util-data.R b/util-data.R index c1aeac6f..8efee888 100644 --- a/util-data.R +++ b/util-data.R @@ -418,6 +418,16 @@ ProjectData = R6::R6Class("ProjectData", self$get.commits() } + ## remove existing columns named 'base.author' and 'interaction.author' + indices.to.remove = which("base.author" == colnames(private$commit.interactions)) + if (length(indices.to.remove)>0) { + private$commit.interactions = private$commit.interactions[, -indices.to.remove] + } + indices.to.remove = which("interacting.author" == colnames(private$commit.interactions)) + if (length(indices.to.remove)>0) { + private$commit.interactions = private$commit.interactions[, -indices.to.remove] + } + ## get relevant data from commits commit.data.subset = data.frame(hash = private$commits.unfiltered[["hash"]], author.name = private$commits.unfiltered[["author.name"]]) From 3efb38b398157fa2bdfd01f7356170609e6ab760 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Tue, 27 Feb 2024 13:38:19 +0100 Subject: [PATCH 14/38] Change merge in 'update.commit.interactions' The merge now always keeps all commit interactions, even if there is no commit data for them (for example if the commits were made by deleted users). Also introduced a warning if that happens. Case is tested with new part in 'test-data.R' Signed-off-by: Leo Sendelbach --- tests/test-data.R | 33 ++++++++++++++++++++++++++++++++- util-data.R | 15 +++++++++++---- 2 files changed, 43 insertions(+), 5 deletions(-) diff --git a/tests/test-data.R b/tests/test-data.R index 3d87a918..98116323 100644 --- a/tests/test-data.R +++ b/tests/test-data.R @@ -540,5 +540,36 @@ test_that("Compare two ProjectData Objects with commit.interactions", { commit.data[["hash"]][[5]] = 1 proj.data.one$set.commits(commit.data) - expect_false(isTRUE(all.equal(proj.data.one$get.commit.interactions(), proj.data.two$get.commit.interactions()))) + expect_false(isTRUE(all.equal(proj.data.one$get.commit.interactions(), + proj.data.two$get.commit.interactions()))) + + ## set commit list of one project data to empty and test that last + ## two rows of result data frame are empty + proj.data.two$set.commits(create.empty.commits.list()) + + ## create empty data frame of correct size + commit.interactions.data.expected = data.frame(matrix(nrow = 4, ncol = 8)) + ## assure that the correct type is used + for(i in seq_len(8)) { + commit.interactions.data.expected[[i]] = as.character(commit.interactions.data.expected[[i]]) + } + ## set everything except for authors as expected + colnames(commit.interactions.data.expected) = c("commit.hash", "base.hash", "func", "file", + "base.func", "base.file", "base.author", + "interacting.author") + commit.interactions.data.expected[["commit.hash"]] = + c("0a1a5c523d835459c42f33e863623138555e2526", + "418d1dc4929ad1df251d2aeb833dd45757b04a6f", + "5a5ec9675e98187e1e92561e1888aa6f04faa338", + "d01921773fae4bed8186b0aa411d6a2f7a6626e6") + commit.interactions.data.expected[["base.hash"]] = + c("3a0ed78458b3976243db6829f63eba3eead26774", + "0a1a5c523d835459c42f33e863623138555e2526", + "72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", + "0a1a5c523d835459c42f33e863623138555e2526") + commit.interactions.data.expected[["func"]] = c("test2.c", "test2.c", "test.c", "test2.c") + commit.interactions.data.expected[["file"]] = c("test2.c", "test2.c", "test.c", "test2.c") + commit.interactions.data.expected[["base.func"]] = c("test2.c", "test2.c", "test.c", "test2.c") + commit.interactions.data.expected[["base.file"]] = c("test2.c", "test2.c", "test.c", "test2.c") + expect_equal(proj.data.two$get.commit.interactions(), commit.interactions.data.expected) }) diff --git a/util-data.R b/util-data.R index 8efee888..5903575c 100644 --- a/util-data.R +++ b/util-data.R @@ -420,11 +420,11 @@ ProjectData = R6::R6Class("ProjectData", ## remove existing columns named 'base.author' and 'interaction.author' indices.to.remove = which("base.author" == colnames(private$commit.interactions)) - if (length(indices.to.remove)>0) { + if (length(indices.to.remove) > 0) { private$commit.interactions = private$commit.interactions[, -indices.to.remove] } indices.to.remove = which("interacting.author" == colnames(private$commit.interactions)) - if (length(indices.to.remove)>0) { + if (length(indices.to.remove) > 0) { private$commit.interactions = private$commit.interactions[, -indices.to.remove] } @@ -435,17 +435,24 @@ ProjectData = R6::R6Class("ProjectData", ## merge commit interactions with commits and change colnames to avoid duplicates commit.interaction.data = merge(private$commit.interactions, commit.data.subset, - by.x = "base.hash", by.y = "hash") + by.x = "base.hash", by.y = "hash", all.x = TRUE) author.index = match("author.name", colnames(commit.interaction.data)) colnames(commit.interaction.data)[[author.index]] = "base.author" commit.interaction.data = merge(commit.interaction.data, commit.data.subset, - by.x = "commit.hash", by.y = "hash") + by.x = "commit.hash", by.y = "hash", all.x = TRUE) author.index = match("author.name", colnames(commit.interaction.data)) colnames(commit.interaction.data)[[author.index]] = "interacting.author" + ## warning if we have interactions without authors + if (anyNA(commit.interaction.data[["base.author"]]) || + anyNA(commit.interaction.data[["interacting.author"]])) { + logging::logwarn("There are authors in the commit-interactions that are not in the commit data! + This results in the commit-interactions having empty entries. + To clean up these entries, call cleanup.commit.interactions.") + } private$commit.interactions = commit.interaction.data }, From 099a096065fc20ebdc745e6d19c219a8fb9e5999 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Tue, 27 Feb 2024 13:54:24 +0100 Subject: [PATCH 15/38] Add additional columns to commit-interactions columns 'base.author' and 'interacting.author' are initialized as NA and will be overwritten in 'update.commit.interactions'. Also, introduce check for correctness of returned data frame at the end of 'read.commit.interactions'. Signed-off-by: Leo Sendelbach --- util-read.R | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/util-read.R b/util-read.R index 22206f35..720a14ce 100644 --- a/util-read.R +++ b/util-read.R @@ -850,13 +850,15 @@ create.empty.pasta.list = function() { ## column names of a dataframe containing commit interaction data (see function \code{read.commit.interactions}) COMMIT.INTERACTION.LIST.COLUMNS = c( "func", "commit.hash", "file", - "base.hash", "base.func", "base.file" + "base.hash", "base.func", "base.file", + "base.author", "interacting.author" ) ## declare the datatype for each column in the constant 'COMMIT.INTERACTION.LIST.COLUMNS' COMMIT.INTERACTION.LIST.DATA.TYPES = c( "character", "character", "character", - "character", "character", "character" + "character", "character", "character", + "character", "character" ) #' Read and parse the commit-interaction data. This data is present in a `.yaml` file which @@ -918,11 +920,14 @@ read.commit.interactions = function(data.path = NULL) { interacting.hashes.df$base.file = file.name.map$get(function.name) return(interacting.hashes.df) }))) + interactions["base.author"] = NA_character_ + interactions["interacting.author"] = NA_character_ return(interactions) }))) ## remove all duplicate entries from the resulting dataframe commit.interaction.data = commit.interaction.data[!duplicated(commit.interaction.data), ] + verify.data.frame.columns(commit.interaction.data, COMMIT.INTERACTION.LIST.COLUMNS, COMMIT.INTERACTION.LIST.DATA.TYPES) return(commit.interaction.data) } From 6f73cff75c142239afa43ecc5294bd90067fbf7d Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Tue, 27 Feb 2024 13:57:34 +0100 Subject: [PATCH 16/38] Change test to reflect change to dataframe columns Test now checks for 'base.author' and 'interacting.author' as intended Signed-off-by: Leo Sendelbach --- tests/test-read.R | 42 +++++++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/tests/test-read.R b/tests/test-read.R index 2cd0df6b..bb242e0b 100644 --- a/tests/test-read.R +++ b/tests/test-read.R @@ -506,19 +506,30 @@ test_that("Read the commit-interactions data.", { ## read the actual data commit.interactions.data.read = read.commit.interactions(proj.conf$get.value("datapath")) ## build the expected data.frame - commit.interactions.data.expected = data.frame(func = c("test.c", "test2.c", "test2.c", "test2.c"), - commit.hash = c("5a5ec9675e98187e1e92561e1888aa6f04faa338", - "0a1a5c523d835459c42f33e863623138555e2526", - "418d1dc4929ad1df251d2aeb833dd45757b04a6f", - "d01921773fae4bed8186b0aa411d6a2f7a6626e6"), - file = c("test.c", "test2.c", "test2.c", "test2.c"), - base.hash = c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", - "3a0ed78458b3976243db6829f63eba3eead26774", - "0a1a5c523d835459c42f33e863623138555e2526", - "0a1a5c523d835459c42f33e863623138555e2526"), - base.func = c("test.c", "test2.c", "test2.c", "test2.c"), - base.file = c("test.c", "test2.c", "test2.c", "test2.c")) + commit.interactions.data.expected = data.frame(matrix(nrow = 4, ncol = 8)) + ## assure that the correct type is used + for(i in seq_len(8)) { + commit.interactions.data.expected[[i]] = as.character(commit.interactions.data.expected[[i]]) + } + ## set everything except for authors as expected + colnames(commit.interactions.data.expected) = c("func", "commit.hash", "file", "base.hash", + "base.func", "base.file", "base.author", + "interacting.author") + commit.interactions.data.expected[["commit.hash"]] = + c("5a5ec9675e98187e1e92561e1888aa6f04faa338", + "0a1a5c523d835459c42f33e863623138555e2526", + "418d1dc4929ad1df251d2aeb833dd45757b04a6f", + "d01921773fae4bed8186b0aa411d6a2f7a6626e6") + commit.interactions.data.expected[["base.hash"]] = + c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", + "3a0ed78458b3976243db6829f63eba3eead26774", + "0a1a5c523d835459c42f33e863623138555e2526", + "0a1a5c523d835459c42f33e863623138555e2526") + commit.interactions.data.expected[["func"]] = c("test.c", "test2.c", "test2.c", "test2.c") + commit.interactions.data.expected[["file"]] = c("test.c", "test2.c", "test2.c", "test2.c") + commit.interactions.data.expected[["base.func"]] = c("test.c", "test2.c", "test2.c", "test2.c") + commit.interactions.data.expected[["base.file"]] = c("test.c", "test2.c", "test2.c", "test2.c") ## check the results expect_identical(commit.interactions.data.read, commit.interactions.data.expected, info = "commit interaction data.") @@ -533,10 +544,11 @@ test_that("Read the empty commit-interactions data.", { commit.interactions.data.read = read.commit.interactions("./codeface-data/results/testing/ test_empty_proximity/proximity") ## build the expected data.frame - commit.interactions.data.expected = data.frame(matrix(nrow = 0, ncol = 6)) + commit.interactions.data.expected = data.frame(matrix(nrow = 0, ncol = 8)) colnames(commit.interactions.data.expected) = c("func", "commit.hash", "file", - "base.hash", "base.func", "base.file") - for(i in seq_len(6)) { + "base.hash", "base.func", "base.file", + "base.author", "interacting.author") + for(i in seq_len(8)) { commit.interactions.data.expected[[i]] = as.character(commit.interactions.data.expected[[i]]) } ## check the results From fd0aa05f824b93545ae8e05833b95b3bd9809286 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Tue, 27 Feb 2024 14:32:33 +0100 Subject: [PATCH 17/38] Add 'cleanup.commit.interactions' function Function removes lines from commit-interactions that do not contain an author in either 'base.author' or 'interacting.author' Signed-off-by: Leo Sendelbach --- util-data.R | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/util-data.R b/util-data.R index 5903575c..71458d4e 100644 --- a/util-data.R +++ b/util-data.R @@ -1288,6 +1288,24 @@ ProjectData = R6::R6Class("ProjectData", private$commit.interactions = data }, + #' Remove lines in the commit-interactions data that do not contain authors. + #' This should only be called AFTER 'update.commit.interactions' has already been called, as otherwise + #' all commit-interactions data will be removed + cleanup.commit.interactions = function() { + logging::loginfo("Cleaning up commit-interactions") + + ## remove commit-interactions that do not contain author in 'base.author' + indices.to.remove = which(is.na(private$commit.interactions[["base.author"]])) + if (length(indices.to.remove) > 0) { + private$commit.interactions = private$commit.interactions[-indices.to.remove, ] + } + ## remove commit-interactions that do not contain author in 'interacting.author' + indices.to.remove = which(is.na(private$commit.interactions[["interacting.author"]])) + if (length(indices.to.remove) > 0) { + private$commit.interactions = private$commit.interactions[-indices.to.remove, ] + } + }, + #' Get the synchronicity data. If it is not already stored in the ProjectData, this function triggers a read in #' from disk. #' From ef725407bf8818c8fff96ea6f343338b7162cbe0 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Tue, 27 Feb 2024 14:35:03 +0100 Subject: [PATCH 18/38] Add test for cleanup function Test that a line is removed from a data frame with a missing author Signed-off-by: Leo Sendelbach --- tests/test-data.R | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/test-data.R b/tests/test-data.R index 98116323..7ee1d0c9 100644 --- a/tests/test-data.R +++ b/tests/test-data.R @@ -543,6 +543,12 @@ test_that("Compare two ProjectData Objects with commit.interactions", { expect_false(isTRUE(all.equal(proj.data.one$get.commit.interactions(), proj.data.two$get.commit.interactions()))) + ## The data frame should still have 4 entries: + expect_true(nrow(proj.data.one$get.commit.interactions()) == 4) + ## after cleanup is called, the data frame should only have 3 entries: + proj.data.one$cleanup.commit.interactions() + expect_true(nrow(proj.data.one$get.commit.interactions()) == 3) + ## set commit list of one project data to empty and test that last ## two rows of result data frame are empty proj.data.two$set.commits(create.empty.commits.list()) From 7068cfa10d993dcae3f5e3f76f8cafa99fa8b350 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Tue, 5 Mar 2024 13:30:03 +0100 Subject: [PATCH 19/38] Add test for author network Test easy construction of an author network with interaction as relation Signed-off-by: Leo Sendelbach --- tests/test-networks-author.R | 45 ++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/tests/test-networks-author.R b/tests/test-networks-author.R index d29d74e0..32989490 100644 --- a/tests/test-networks-author.R +++ b/tests/test-networks-author.R @@ -23,6 +23,7 @@ ## Copyright 2021 by Johannes Hostert ## Copyright 2023-2024 by Maximilian Löffler ## Copyright 2024 by Leo Sendelbach + ## All Rights Reserved. @@ -677,3 +678,47 @@ test_that("Network construction with only untracked files (no edges expected)", ## test expect_true(igraph::identical_graphs(network.built, network.expected)) }) + +test_that("Network construction with commit-interactions as relation", { + ## configuration object for the datapath + proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, "file") + proj.conf$update.value("commit.interactions", TRUE) + proj.conf$update.value("commits.filter.untracked.files", FALSE) + proj.conf$update.value("commits.filter.base.artifact", FALSE) + proj.data = ProjectData$new(project.conf = proj.conf) + + net.conf = NetworkConf$new() + net.conf$update.value("author.relation", "interaction") + + network.builder = NetworkBuilder$new(project.data = proj.data, network.conf = net.conf) + network.built = network.builder$get.author.network() + + ## build the expected nbetwork + vertices = data.frame( + name = c("Olaf", "Thomas", "Björn", "Karl"), + kind = TYPE.AUTHOR, + type = TYPE.AUTHOR + ) + edges = data.frame( + from = c("Olaf", "Thomas", "Björn", "Thomas"), + to = c("Thomas", "Karl", "Olaf", "Thomas"), + func = c("test2.c", "test2.c", "test.c", "test2.c"), + hash = c("0a1a5c523d835459c42f33e863623138555e2526", + "418d1dc4929ad1df251d2aeb833dd45757b04a6f", + "5a5ec9675e98187e1e92561e1888aa6f04faa338", + "d01921773fae4bed8186b0aa411d6a2f7a6626e6"), + file = c("test2.c", "test2.c", "test.c", "test2.c"), + base.hash = c("3a0ed78458b3976243db6829f63eba3eead26774", + "0a1a5c523d835459c42f33e863623138555e2526", + "72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", + "0a1a5c523d835459c42f33e863623138555e2526"), + base.func = c("test2.c", "test2.c", "test.c", "test2.c"), + base.file = c("test2.c", "test2.c", "test.c", "test2.c"), + weight = c(1, 1, 1, 1), + type = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA), + relation = c("interaction", "interaction", "interaction", "interaction") + ) + network = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) + + expect_true(igraph::identical_graphs(network.built, network)) +}) \ No newline at end of file From 329d97ec3de36a9e1bcadc0c7a53c1d92e8b481c Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Tue, 5 Mar 2024 13:57:00 +0100 Subject: [PATCH 20/38] Change 'util-networks.R' to use colnames Also in 'get.artifact.network.commit.interaction' distinguish between 'file' and 'function' artifact networks Signed-off-by: Leo Sendelbach --- util-networks.R | 67 +++++++++++++++++++++++++++++++------------------ 1 file changed, 43 insertions(+), 24 deletions(-) diff --git a/util-networks.R b/util-networks.R index f7f59b88..77d9dce5 100644 --- a/util-networks.R +++ b/util-networks.R @@ -134,10 +134,11 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", get.vertex.kind.for.relation = function(relation) { vertex.kind = switch(relation, - cochange = private$proj.data$get.project.conf.entry("artifact.codeface"), - callgraph = private$proj.data$get.project.conf.entry("artifact.codeface"), - mail = "MailThread", - issue = "Issue" + cochange = private$proj.data$get.project.conf.entry("artifact.codeface"), + callgraph = private$proj.data$get.project.conf.entry("artifact.codeface"), + mail = "MailThread", + issue = "Issue", + interaction = "Interaction" ) return(vertex.kind) @@ -232,17 +233,18 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", #' @return the commit-interaction author network get.author.network.commit.interaction = function() { ## get the authors that appear in the commit-interaction data as the vertices of the network - vertices = unique(c(private$proj.data$get.commit.interactions()$base.author, - private$proj.data$get.commit.interactions()$interacting.author)) + vertices = unique(c(private$proj.data$get.commit.interactions()[["base.author"]], + private$proj.data$get.commit.interactions()[["interacting.author"]])) vertices = data.frame(name = vertices) ## get the commit-interaction data as the edge data of the network edges = private$proj.data$get.commit.interactions() - ## set the authors as the 'to' and 'from' of the network - colnames(edges)[7] = "to" - colnames(edges)[8] = "from" - edges = edges[,c(7,8,1,2,3,4,5,6)] - colnames(edges)[3] = "hash" + ## set the authors as the 'to' and 'from' of the network and order the dataframe + edges = edges[, c("base.author", "interacting.author", "func", "commit.hash", + "file", "base.hash", "base.func", "base.file")] + colnames(edges)[1] = "to" + colnames(edges)[2] = "from" + colnames(edges)[4] = "hash" author.net.data = list(vertices = vertices, edges = edges) ## construct the network author.net = construct.network.from.edge.list( @@ -380,26 +382,43 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", #' @return the commit-interaction based artifact network get.artifact.network.commit.interaction = function() { ## get the commits that appear in the commit-interaction data as the vertices of the network - vertices = unique(c(private$proj.data$get.commit.interactions()$base.file, private$proj.data$get.commit.interactions()$file)) + vertices = unique(c(private$proj.data$get.commit.interactions()[["base.file"]], + private$proj.data$get.commit.interactions()[["file"]])) vertices = data.frame(name = vertices) ## get the commit-interaction data as the edge data of the network edges = private$proj.data$get.commit.interactions() - ## set the commits as the 'to' and 'from' of the network - colnames(edges)[6] = "to" - colnames(edges)[4] = "from" - edges = edges[,c(6,4,1,2,3,5)] - colnames(edges)[3] = "hash" - author.net.data = list(vertices = vertices, edges = edges) + ## set 'to' and 'from' of the network according to the config + ## and order the dataframe accordingly + proj.conf = private$proj.data$get.project.conf() + if (proj.conf$get.value("artifact") == "file") { + edges = edges[, c("file", "base.file", "func", "commit.hash", + "base.hash", "base.func", "base.author", "interacting.author")] + colnames(edges)[4] = "hash" + } else { + if (proj.conf$get.value("artifact") == "function") { + edges = edges[, c("func", "base.func", "commit.hash", "file", "base.hash", + "base.file", "base.author", "interacting.author")] + colnames(edges)[3] = "hash" + } else { + ## if neither 'function' nor 'file' was configured, send a warning + ## and return an empty network + logging::logwarn("when creating a commit-interaction artifact network, + the artifact relation should be either 'file' or 'function'!") + return(create.empty.network(directed = private$network.conf$get.value("artifact.directed"))) + } + } + colnames(edges)[1] = "to" + colnames(edges)[2] = "from" + artifact.net.data = list(vertices = vertices, edges = edges) ## construct the network - author.net = construct.network.from.edge.list( - author.net.data[["vertices"]], - author.net.data[["edges"]], + artifact.net = construct.network.from.edge.list( + artifact.net.data[["vertices"]], + artifact.net.data[["edges"]], network.conf = private$network.conf, - directed = private$network.conf$get.value("author.directed"), + directed = private$network.conf$get.value("artifact.directed"), available.edge.attributes = list(hash = "character") ) - - return(author.net) + return(artifact.net) }, #' Get the call-graph-based artifact network. From 07e7ed744209b0251217fa8f7f35d9b9875face2 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Tue, 5 Mar 2024 15:33:45 +0100 Subject: [PATCH 21/38] Add tests for artifact networks One simple test for each artifact network configuration (either 'file' or 'function') Signed-off-by: Leo Sendelbach --- tests/test-networks-artifact.R | 86 ++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) diff --git a/tests/test-networks-artifact.R b/tests/test-networks-artifact.R index 253e08ba..2ad9a643 100644 --- a/tests/test-networks-artifact.R +++ b/tests/test-networks-artifact.R @@ -212,3 +212,89 @@ patrick::with_parameters_test_that("Network construction of an empty 'comments-o "directed: FALSE" = list(test.directed = FALSE), "directed: TRUE" = list(test.directed = TRUE) )) + +test_that("Network construction with commit-interactions as relation, artifact type 'file'", { + ## configuration object for the datapath + proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, "file") + proj.conf$update.value("commit.interactions", TRUE) + proj.conf$update.value("commits.filter.untracked.files", FALSE) + proj.conf$update.value("commits.filter.base.artifact", FALSE) + proj.data = ProjectData$new(project.conf = proj.conf) + + net.conf = NetworkConf$new() + net.conf$update.value("artifact.relation", "interaction") + + network.builder = NetworkBuilder$new(project.data = proj.data, network.conf = net.conf) + network.built = network.builder$get.artifact.network() + ## build the expected nbetwork + vertices = data.frame( + name = c("test2.c", "test.c"), + kind = "Interaction", + type = TYPE.ARTIFACT + ) + edges = data.frame( + from = c("test2.c", "test2.c", "test.c", "test2.c"), + to = c("test2.c", "test2.c", "test.c", "test2.c"), + func = c("test2.c", "test2.c", "test.c", "test2.c"), + hash = c("0a1a5c523d835459c42f33e863623138555e2526", + "418d1dc4929ad1df251d2aeb833dd45757b04a6f", + "5a5ec9675e98187e1e92561e1888aa6f04faa338", + "d01921773fae4bed8186b0aa411d6a2f7a6626e6"), + base.hash = c("3a0ed78458b3976243db6829f63eba3eead26774", + "0a1a5c523d835459c42f33e863623138555e2526", + "72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", + "0a1a5c523d835459c42f33e863623138555e2526"), + base.func = c("test2.c", "test2.c", "test.c", "test2.c"), + base.author = c("Olaf", "Thomas", "Björn", "Thomas"), + interacting.author = c("Thomas", "Karl", "Olaf", "Thomas"), + weight = c(1, 1, 1, 1), + type = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA), + relation = c("interaction", "interaction", "interaction", "interaction") + ) + network = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) + + expect_true(igraph::identical_graphs(network.built, network)) +}) + +test_that("Network construction with commit-interactions as relation, artifact type 'function'", { + ## configuration object for the datapath + proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, "function") + proj.conf$update.value("commit.interactions", TRUE) + proj.conf$update.value("commits.filter.untracked.files", FALSE) + proj.conf$update.value("commits.filter.base.artifact", FALSE) + proj.data = ProjectData$new(project.conf = proj.conf) + + net.conf = NetworkConf$new() + net.conf$update.value("artifact.relation", "interaction") + + network.builder = NetworkBuilder$new(project.data = proj.data, network.conf = net.conf) + network.built = network.builder$get.artifact.network() + ## build the expected nbetwork + vertices = data.frame( + name = c("test2.c", "test.c"), + kind = "Interaction", + type = TYPE.ARTIFACT + ) + edges = data.frame( + from = c("test2.c", "test2.c", "test.c", "test2.c"), + to = c("test2.c", "test2.c", "test.c", "test2.c"), + hash = c("0a1a5c523d835459c42f33e863623138555e2526", + "418d1dc4929ad1df251d2aeb833dd45757b04a6f", + "5a5ec9675e98187e1e92561e1888aa6f04faa338", + "d01921773fae4bed8186b0aa411d6a2f7a6626e6"), + file = c("test2.c", "test2.c", "test.c", "test2.c"), + base.hash = c("3a0ed78458b3976243db6829f63eba3eead26774", + "0a1a5c523d835459c42f33e863623138555e2526", + "72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", + "0a1a5c523d835459c42f33e863623138555e2526"), + base.file = c("test2.c", "test2.c", "test.c", "test2.c"), + base.author = c("Olaf", "Thomas", "Björn", "Thomas"), + interacting.author = c("Thomas", "Karl", "Olaf", "Thomas"), + weight = c(1, 1, 1, 1), + type = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA), + relation = c("interaction", "interaction", "interaction", "interaction") + ) + network = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) + + expect_true(igraph::identical_graphs(network.built, network)) +}) From dbd07e931f96c2b4234928d8ef94db042a7ac5cf Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Tue, 5 Mar 2024 15:43:36 +0100 Subject: [PATCH 22/38] Fix artifact network construction It now uses the correct vertices depending on the configuration of either 'file' or 'function'. Signed-off-by: Leo Sendelbach --- util-networks.R | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/util-networks.R b/util-networks.R index 77d9dce5..734573fd 100644 --- a/util-networks.R +++ b/util-networks.R @@ -381,21 +381,30 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", #' #' @return the commit-interaction based artifact network get.artifact.network.commit.interaction = function() { - ## get the commits that appear in the commit-interaction data as the vertices of the network - vertices = unique(c(private$proj.data$get.commit.interactions()[["base.file"]], - private$proj.data$get.commit.interactions()[["file"]])) - vertices = data.frame(name = vertices) + ## initialize the vertices. They will be set correctly depending on the used config. + vertices = c() ## get the commit-interaction data as the edge data of the network edges = private$proj.data$get.commit.interactions() + ## set 'to' and 'from' of the network according to the config ## and order the dataframe accordingly proj.conf = private$proj.data$get.project.conf() if (proj.conf$get.value("artifact") == "file") { + ## change the vertices to the functions from the commit-interaction data + vertices = unique(c(private$proj.data$get.commit.interactions()[["base.file"]], + private$proj.data$get.commit.interactions()[["file"]])) + vertices = data.frame(name = vertices) + edges = edges[, c("file", "base.file", "func", "commit.hash", "base.hash", "base.func", "base.author", "interacting.author")] colnames(edges)[4] = "hash" } else { if (proj.conf$get.value("artifact") == "function") { + ## change the vertices to the functions from the commit-interaction data + vertices = unique(c(private$proj.data$get.commit.interactions()[["base.func"]], + private$proj.data$get.commit.interactions()[["func"]])) + vertices = data.frame(name = vertices) + edges = edges[, c("func", "base.func", "commit.hash", "file", "base.hash", "base.file", "base.author", "interacting.author")] colnames(edges)[3] = "hash" From 169dbfe516933244af28b20fdfd33a77d42f522b Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Fri, 8 Mar 2024 12:06:21 +0100 Subject: [PATCH 23/38] Change tests for artifact networks Tests now expect the correct vertex kind Signed-off-by: Leo Sendelbach --- tests/test-networks-artifact.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test-networks-artifact.R b/tests/test-networks-artifact.R index 2ad9a643..1e07476c 100644 --- a/tests/test-networks-artifact.R +++ b/tests/test-networks-artifact.R @@ -229,7 +229,7 @@ test_that("Network construction with commit-interactions as relation, artifact t ## build the expected nbetwork vertices = data.frame( name = c("test2.c", "test.c"), - kind = "Interaction", + kind = "File", type = TYPE.ARTIFACT ) edges = data.frame( @@ -272,7 +272,7 @@ test_that("Network construction with commit-interactions as relation, artifact t ## build the expected nbetwork vertices = data.frame( name = c("test2.c", "test.c"), - kind = "Interaction", + kind = "Function", type = TYPE.ARTIFACT ) edges = data.frame( From 8736025b29058a33c94c968086ab55a2270e07d9 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Fri, 8 Mar 2024 12:10:47 +0100 Subject: [PATCH 24/38] Change vertex kind for artifact networks vertices now have the correct 'kind' attribute also restructured if statements in artifact network construction Signed-off-by: Leo Sendelbach --- util-networks.R | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/util-networks.R b/util-networks.R index 734573fd..852fc27e 100644 --- a/util-networks.R +++ b/util-networks.R @@ -138,7 +138,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", callgraph = private$proj.data$get.project.conf.entry("artifact.codeface"), mail = "MailThread", issue = "Issue", - interaction = "Interaction" + interaction = private$proj.data$get.project.conf.entry("artifact.codeface") ) return(vertex.kind) @@ -388,8 +388,8 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", ## set 'to' and 'from' of the network according to the config ## and order the dataframe accordingly - proj.conf = private$proj.data$get.project.conf() - if (proj.conf$get.value("artifact") == "file") { + proj.conf.artifact = private$proj.data$get.project.conf.entry("artifact") + if (proj.conf.artifact == "file") { ## change the vertices to the functions from the commit-interaction data vertices = unique(c(private$proj.data$get.commit.interactions()[["base.file"]], private$proj.data$get.commit.interactions()[["file"]])) @@ -398,23 +398,21 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", edges = edges[, c("file", "base.file", "func", "commit.hash", "base.hash", "base.func", "base.author", "interacting.author")] colnames(edges)[4] = "hash" + } else if (proj.conf.artifact == "function") { + ## change the vertices to the functions from the commit-interaction data + vertices = unique(c(private$proj.data$get.commit.interactions()[["base.func"]], + private$proj.data$get.commit.interactions()[["func"]])) + vertices = data.frame(name = vertices) + + edges = edges[, c("func", "base.func", "commit.hash", "file", "base.hash", + "base.file", "base.author", "interacting.author")] + colnames(edges)[3] = "hash" } else { - if (proj.conf$get.value("artifact") == "function") { - ## change the vertices to the functions from the commit-interaction data - vertices = unique(c(private$proj.data$get.commit.interactions()[["base.func"]], - private$proj.data$get.commit.interactions()[["func"]])) - vertices = data.frame(name = vertices) - - edges = edges[, c("func", "base.func", "commit.hash", "file", "base.hash", - "base.file", "base.author", "interacting.author")] - colnames(edges)[3] = "hash" - } else { - ## if neither 'function' nor 'file' was configured, send a warning - ## and return an empty network - logging::logwarn("when creating a commit-interaction artifact network, - the artifact relation should be either 'file' or 'function'!") - return(create.empty.network(directed = private$network.conf$get.value("artifact.directed"))) - } + ## if neither 'function' nor 'file' was configured, send a warning + ## and return an empty network + logging::logwarn("when creating a commit-interaction artifact network, + the artifact relation should be either 'file' or 'function'!") + return(create.empty.network(directed = private$network.conf$get.value("artifact.directed"))) } colnames(edges)[1] = "to" colnames(edges)[2] = "from" From a924e86268f7109b048fcfc032a7fa9ba58f3e5f Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Tue, 12 Mar 2024 15:49:56 +0100 Subject: [PATCH 25/38] Add commits to 'NEWS.md' Same points as before with more references to commits Signed-off-by: Leo Sendelbach --- NEWS.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/NEWS.md b/NEWS.md index e58c8611..446761a5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,18 @@ # coronet – Changelog +## unversioned + +### Added + +- Add commit-interaction data and add functions `read.commit.interactions` for reading, as well as `get.commit.interactions`, `set.commit.interactions` and utility functions for working with commit-interaction data (PR #252, 5da0e60e029bdf427520be440fedb0f71d9f7a15, 7792a4e9a087c042a3ef9b7f79a67490305ce85e, 178265dcc69abc0d6e430dfcbc4b87e7565ce615, 80e6ac5f24e6d0248e77be391f93a59b6b17862d, 1ffa607bbe400bd212388dc543263ba5bec4e34c) +- Add commit-interaction networks that can be created with `create.author.network` and `create.artifact.network` if the `artifact.relation` and `author.relation` is configured to be `interaction` (PR #252, 5da0e60e029bdf427520be440fedb0f71d9f7a15, deddd4ce9d2a570ea57088ea73d4312f81e73049, 0e269af77bc098f2d3157fac349d2032efd6cf49, d96b10b45ec55cdf2dd02c60833d4116358d6d31) +- Add tests for new commit-interaction functionality (PR #252, 3e5b8962e18c3dde45085fa764c9d084327e2773, 7685ec4745bd43fba7a373bf5544f41bff346ed9, b291cb338e1b3896c8fd9769f45c515bddb8cf48, eea1b053350094084bab957975e1b306e6c9dc23, 3d4a521e47dc81aaae8ae01ff78ca8d514bb7d85, 05ea1ce1c3330f3fb8fb28ccbc08b85fbd4ec2c8, 99103f27ad0c8ee1bd62cdcee10778a98020db70, fd6064a83a7735020ad5250d092e266af5bbada0) + +### Changed/Improved + +### Fixed + ## 4.4 ### Announcement From 48d9de1ee5c28dc6360ed04615decb8179ded49e Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Wed, 13 Mar 2024 14:51:14 +0100 Subject: [PATCH 26/38] Change warning to use 'logging::logwarn' Warning is now uniform with other warnings in project Signed-off-by: Leo Sendelbach --- util-networks-misc.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util-networks-misc.R b/util-networks-misc.R index a183f603..c9abd08a 100644 --- a/util-networks-misc.R +++ b/util-networks-misc.R @@ -151,7 +151,7 @@ get.expanded.adjacency = function(network, authors, weighted = FALSE) { # write a warning with the number of authors from the network that we ignore warning.string = sprintf("The network had %d authors that will not be displayed in the matrix!", network.authors.num - nrow(matrix.data)) - warning(warning.string) + logging::logwarn(warning.string) } ## save the activity data per author From 91b9c3bf65b33d82f1be2a28d2c078295993aea6 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Tue, 19 Mar 2024 14:17:54 +0100 Subject: [PATCH 27/38] Fix issues pointed out on PR comments Also made small changes to testing data to include a commit with specified function and added a warning that should only occur when the commit-interaction data was generated incorrectly Signed-off-by: Leo Sendelbach --- .../proximity/commit-interactions.yaml | 22 ++++++-------- tests/test-data.R | 11 +++---- tests/test-networks-artifact.R | 30 +++++++++---------- tests/test-networks-author.R | 16 +++++----- tests/test-read.R | 10 +++---- util-data.R | 5 +++- util-networks.R | 4 +-- util-read.R | 6 +++- 8 files changed, 54 insertions(+), 50 deletions(-) diff --git a/tests/codeface-data/results/testing/test_proximity/proximity/commit-interactions.yaml b/tests/codeface-data/results/testing/test_proximity/proximity/commit-interactions.yaml index e424236d..83445eeb 100644 --- a/tests/codeface-data/results/testing/test_proximity/proximity/commit-interactions.yaml +++ b/tests/codeface-data/results/testing/test_proximity/proximity/commit-interactions.yaml @@ -1,18 +1,17 @@ scope: REGION result-map: - test.c: - demangled-name: test.c - file: test.c + test_function: + demangled-name: test_function + file: test3.c num-instructions: 30 insts: - base-hash: region: 45620620587549 - function: test.c - commit: 72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0 + function: test_function + commit: 1143db502761379c2bfcecc2007fc34282e7ee61 repository: test-repo interacting-hashes: - region: 87546092348456 - function: test.c commit: 5a5ec9675e98187e1e92561e1888aa6f04faa338 repository: test-repo amount: 2 @@ -21,34 +20,31 @@ result-map: commits: - commit: 3383d8e5561dfc6fb2b65e0a194df94ccb5e08af repository: test-repo - test2.c: - demangled-name: test2.c + test2: + demangled-name: test2 file: test2.c num-instructions: 26 insts: - base-hash: region: 50956672345141 - function: test2.c commit: 3a0ed78458b3976243db6829f63eba3eead26774 repository: test-repo interacting-hashes: - region: 98750276234511 - function: test2.c commit: 0a1a5c523d835459c42f33e863623138555e2526 repository: test-repo amount: 1 - base-hash: region: 67230588834344 - function: test2.c commit: 0a1a5c523d835459c42f33e863623138555e2526 repository: test-repo interacting-hashes: - region: 33295067820043 - function: test2.c + function: test2 commit: 418d1dc4929ad1df251d2aeb833dd45757b04a6f repository: test-repo - region: 20194653678423 - function: test2.c + function: test2 commit: d01921773fae4bed8186b0aa411d6a2f7a6626e6 repository: test-repo amount: 3 diff --git a/tests/test-data.R b/tests/test-data.R index 7ee1d0c9..9049e3ce 100644 --- a/tests/test-data.R +++ b/tests/test-data.R @@ -571,11 +571,12 @@ test_that("Compare two ProjectData Objects with commit.interactions", { commit.interactions.data.expected[["base.hash"]] = c("3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526", - "72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", + "1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526") - commit.interactions.data.expected[["func"]] = c("test2.c", "test2.c", "test.c", "test2.c") - commit.interactions.data.expected[["file"]] = c("test2.c", "test2.c", "test.c", "test2.c") - commit.interactions.data.expected[["base.func"]] = c("test2.c", "test2.c", "test.c", "test2.c") - commit.interactions.data.expected[["base.file"]] = c("test2.c", "test2.c", "test.c", "test2.c") + commit.interactions.data.expected[["func"]] = c("GLOBAL", "test2", "GLOBAL", "test2") + commit.interactions.data.expected[["file"]] = c("GLOBAL", "test2.c", "GLOBAL", "test2.c") + commit.interactions.data.expected[["base.func"]] = c("test2", "test2", "test_function", "test2") + commit.interactions.data.expected[["base.file"]] = c("test2.c", "test2.c", "test3.c", "test2.c") + expect_equal(proj.data.two$get.commit.interactions(), commit.interactions.data.expected) }) diff --git a/tests/test-networks-artifact.R b/tests/test-networks-artifact.R index 1e07476c..13fad5f1 100644 --- a/tests/test-networks-artifact.R +++ b/tests/test-networks-artifact.R @@ -228,24 +228,24 @@ test_that("Network construction with commit-interactions as relation, artifact t network.built = network.builder$get.artifact.network() ## build the expected nbetwork vertices = data.frame( - name = c("test2.c", "test.c"), + name = c("test2.c", "test3.c", "GLOBAL"), kind = "File", type = TYPE.ARTIFACT ) edges = data.frame( - from = c("test2.c", "test2.c", "test.c", "test2.c"), - to = c("test2.c", "test2.c", "test.c", "test2.c"), - func = c("test2.c", "test2.c", "test.c", "test2.c"), + from = c("GLOBAL", "test2.c", "GLOBAL", "test2.c"), + to = c("test2.c", "test2.c", "test3.c", "test2.c"), + func = c("GLOBAL", "test2", "GLOBAL", "test2"), hash = c("0a1a5c523d835459c42f33e863623138555e2526", "418d1dc4929ad1df251d2aeb833dd45757b04a6f", "5a5ec9675e98187e1e92561e1888aa6f04faa338", "d01921773fae4bed8186b0aa411d6a2f7a6626e6"), base.hash = c("3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526", - "72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", + "1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526"), - base.func = c("test2.c", "test2.c", "test.c", "test2.c"), - base.author = c("Olaf", "Thomas", "Björn", "Thomas"), + base.func = c("test2", "test2", "test_function", "test2"), + base.author = c("Olaf", "Thomas", "Karl", "Thomas"), interacting.author = c("Thomas", "Karl", "Olaf", "Thomas"), weight = c(1, 1, 1, 1), type = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA), @@ -269,26 +269,26 @@ test_that("Network construction with commit-interactions as relation, artifact t network.builder = NetworkBuilder$new(project.data = proj.data, network.conf = net.conf) network.built = network.builder$get.artifact.network() - ## build the expected nbetwork + ## build the expected network vertices = data.frame( - name = c("test2.c", "test.c"), + name = c("test2", "test_function", "GLOBAL"), kind = "Function", type = TYPE.ARTIFACT ) edges = data.frame( - from = c("test2.c", "test2.c", "test.c", "test2.c"), - to = c("test2.c", "test2.c", "test.c", "test2.c"), + from = c("GLOBAL", "test2", "GLOBAL", "test2"), + to = c("test2", "test2", "test_function", "test2"), hash = c("0a1a5c523d835459c42f33e863623138555e2526", "418d1dc4929ad1df251d2aeb833dd45757b04a6f", "5a5ec9675e98187e1e92561e1888aa6f04faa338", "d01921773fae4bed8186b0aa411d6a2f7a6626e6"), - file = c("test2.c", "test2.c", "test.c", "test2.c"), + file = c("GLOBAL", "test2.c", "GLOBAL", "test2.c"), base.hash = c("3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526", - "72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", + "1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526"), - base.file = c("test2.c", "test2.c", "test.c", "test2.c"), - base.author = c("Olaf", "Thomas", "Björn", "Thomas"), + base.file = c("test2.c", "test2.c", "test3.c", "test2.c"), + base.author = c("Olaf", "Thomas", "Karl", "Thomas"), interacting.author = c("Thomas", "Karl", "Olaf", "Thomas"), weight = c(1, 1, 1, 1), type = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA), diff --git a/tests/test-networks-author.R b/tests/test-networks-author.R index 32989490..cabb598e 100644 --- a/tests/test-networks-author.R +++ b/tests/test-networks-author.R @@ -693,27 +693,27 @@ test_that("Network construction with commit-interactions as relation", { network.builder = NetworkBuilder$new(project.data = proj.data, network.conf = net.conf) network.built = network.builder$get.author.network() - ## build the expected nbetwork + ## build the expected network vertices = data.frame( - name = c("Olaf", "Thomas", "Björn", "Karl"), + name = c("Olaf", "Thomas", "Karl"), kind = TYPE.AUTHOR, type = TYPE.AUTHOR ) edges = data.frame( - from = c("Olaf", "Thomas", "Björn", "Thomas"), + from = c("Olaf", "Thomas", "Karl", "Thomas"), to = c("Thomas", "Karl", "Olaf", "Thomas"), - func = c("test2.c", "test2.c", "test.c", "test2.c"), + func = c("GLOBAL", "test2", "GLOBAL", "test2"), hash = c("0a1a5c523d835459c42f33e863623138555e2526", "418d1dc4929ad1df251d2aeb833dd45757b04a6f", "5a5ec9675e98187e1e92561e1888aa6f04faa338", "d01921773fae4bed8186b0aa411d6a2f7a6626e6"), - file = c("test2.c", "test2.c", "test.c", "test2.c"), + file = c("GLOBAL", "test2.c", "GLOBAL", "test2.c"), base.hash = c("3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526", - "72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", + "1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526"), - base.func = c("test2.c", "test2.c", "test.c", "test2.c"), - base.file = c("test2.c", "test2.c", "test.c", "test2.c"), + base.func = c("test2", "test2", "test_function", "test2"), + base.file = c("test2.c", "test2.c", "test3.c", "test2.c"), weight = c(1, 1, 1, 1), type = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA), relation = c("interaction", "interaction", "interaction", "interaction") diff --git a/tests/test-read.R b/tests/test-read.R index bb242e0b..bafafe12 100644 --- a/tests/test-read.R +++ b/tests/test-read.R @@ -522,14 +522,14 @@ test_that("Read the commit-interactions data.", { "418d1dc4929ad1df251d2aeb833dd45757b04a6f", "d01921773fae4bed8186b0aa411d6a2f7a6626e6") commit.interactions.data.expected[["base.hash"]] = - c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", + c("1143db502761379c2bfcecc2007fc34282e7ee61", "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526") - commit.interactions.data.expected[["func"]] = c("test.c", "test2.c", "test2.c", "test2.c") - commit.interactions.data.expected[["file"]] = c("test.c", "test2.c", "test2.c", "test2.c") - commit.interactions.data.expected[["base.func"]] = c("test.c", "test2.c", "test2.c", "test2.c") - commit.interactions.data.expected[["base.file"]] = c("test.c", "test2.c", "test2.c", "test2.c") + commit.interactions.data.expected[["func"]] = c("GLOBAL", "GLOBAL", "test2", "test2") + commit.interactions.data.expected[["file"]] = c("GLOBAL", "GLOBAL", "test2.c", "test2.c") + commit.interactions.data.expected[["base.func"]] = c("test_function", "test2", "test2", "test2") + commit.interactions.data.expected[["base.file"]] = c("test3.c", "test2.c", "test2.c", "test2.c") ## check the results expect_identical(commit.interactions.data.read, commit.interactions.data.expected, info = "commit interaction data.") diff --git a/util-data.R b/util-data.R index 71458d4e..b13a3373 100644 --- a/util-data.R +++ b/util-data.R @@ -1260,7 +1260,7 @@ ProjectData = R6::R6Class("ProjectData", ## if the commit-interaction data have not yet been read do this if (!self$is.data.source.cached("commit.interactions")) { - if(is.null(data.path)) { + if (is.null(data.path)) { commit.interaction.data = read.commit.interactions(self$get.data.path()) } else { commit.interaction.data = read.commit.interactions(data.path) @@ -1282,6 +1282,9 @@ ProjectData = R6::R6Class("ProjectData", if (is.null(data)) { data = create.empty.commit.interaction.list() + } else { + ## verify the format of the given dataframe + verify.data.frame.columns(data, COMMIT.INTERACTION.LIST.COLUMNS, COMMIT.INTERACTION.LIST.DATA.TYPES) } ## set the actual data diff --git a/util-networks.R b/util-networks.R index 852fc27e..d1a5eb62 100644 --- a/util-networks.R +++ b/util-networks.R @@ -390,7 +390,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", ## and order the dataframe accordingly proj.conf.artifact = private$proj.data$get.project.conf.entry("artifact") if (proj.conf.artifact == "file") { - ## change the vertices to the functions from the commit-interaction data + ## change the vertices to the files from the commit-interaction data vertices = unique(c(private$proj.data$get.commit.interactions()[["base.file"]], private$proj.data$get.commit.interactions()[["file"]])) vertices = data.frame(name = vertices) @@ -408,7 +408,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", "base.file", "base.author", "interacting.author")] colnames(edges)[3] = "hash" } else { - ## if neither 'function' nor 'file' was configured, send a warning + ## If neither 'function' nor 'file' was configured, send a warning ## and return an empty network logging::logwarn("when creating a commit-interaction artifact network, the artifact relation should be either 'file' or 'function'!") diff --git a/util-read.R b/util-read.R index 720a14ce..dc469bd2 100644 --- a/util-read.R +++ b/util-read.R @@ -903,13 +903,17 @@ read.commit.interactions = function(data.path = NULL) { ## get all commits that interact with the current one insts = current.interaction[[4]] interactions = data.table::setDF(data.table::rbindlist(lapply(insts, function(current.inst) { - base.hash = current.inst[[1]][[3]] + base.hash = current.inst[[1]]$`commit` interacting.hashes = current.inst[[2]] interacting.hashes.df = data.table::setDF(data.table::rbindlist(lapply(interacting.hashes, function(hash) { ## if there is no function name in the current interaction we set the function name to 'GLOBAL' ## as this is most likely code outside of functions, else we set the function name if (!"function" %in% names(hash)) { return(data.frame(func = "GLOBAL", commit.hash = hash[["commit"]], file = "GLOBAL")) + } else if (is.null(file.name.map$get(hash[["function"]]))) { + ## This case should never occur if the data was generated correctly! + warning("An interacting hash specifies a function that does not exist in the data!") + return(data.frame(matrix(nrow = 3, ncol = 0))) } else { return(data.frame(func = hash[["function"]], commit.hash = hash[["commit"]], file = file.name.map$get(hash[["function"]]))) From 8d4965afcea4da8e820d52a095571d05b9c7c704 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Wed, 20 Mar 2024 14:41:10 +0100 Subject: [PATCH 28/38] Change call to 'read_yaml' Now uses a custom handler for type 'int' that converts the int to a string, which lets us read the 'region' value for the commits Signed-off-by: Leo Sendelbach --- .../testing/test_proximity/proximity/commit-interactions.yaml | 2 +- util-read.R | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/codeface-data/results/testing/test_proximity/proximity/commit-interactions.yaml b/tests/codeface-data/results/testing/test_proximity/proximity/commit-interactions.yaml index 83445eeb..8e8b0186 100644 --- a/tests/codeface-data/results/testing/test_proximity/proximity/commit-interactions.yaml +++ b/tests/codeface-data/results/testing/test_proximity/proximity/commit-interactions.yaml @@ -52,4 +52,4 @@ result-map: - test_callee commits: - commit: 3383d8e5561dfc6fb2b65e0a194df94ccb5e08af - repository: test-repo \ No newline at end of file + repository: test-repo diff --git a/util-read.R b/util-read.R index dc469bd2..016da771 100644 --- a/util-read.R +++ b/util-read.R @@ -874,7 +874,9 @@ read.commit.interactions = function(data.path = NULL) { file = file.path(data.path, "commit-interactions.yaml") - commit.interaction.base = try(yaml::read_yaml(file = file), silent = TRUE) + commit.interaction.base = try(yaml::read_yaml(file = file, + handlers = list(int = function(x) {as.character(x)})), + silent = TRUE) ## handle the case that the list of commit-interactions is empty if (inherits(commit.interaction.base, "try-error")) { From 1addce944a637084b7983f34bf1fadc523174b33 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Thu, 4 Apr 2024 13:16:28 +0200 Subject: [PATCH 29/38] Change to adress comments by @bockthom Fix style issues, modify README.md, add small test and add some comments for clarity Signed-off-by: Leo Sendelbach --- README.md | 10 +++ install.R | 4 +- tests/README.md | 1 + tests/test-data.R | 8 +++ tests/test-networks-artifact.R | 8 +-- tests/test-networks-author.R | 4 +- util-conf.R | 4 +- util-data.R | 119 ++++++++++++++++++--------------- util-networks.R | 14 ++-- util-read.R | 29 +++++--- 10 files changed, 121 insertions(+), 80 deletions(-) diff --git a/README.md b/README.md index 62c029b3..0595555d 100644 --- a/README.md +++ b/README.md @@ -142,6 +142,8 @@ Alternatively, you can run `Rscript install.R` to install the packages. - `jsonlite`: For parsing the issue data - `rTensor`: For calculating EDCPTD centrality - `Matrix`: For sparse matrix representation of large adjacency matrices +- `fastmap`: For fast implementation of a map +- `purrr`: For fast implementtion of a mapping function ### Submodule @@ -264,6 +266,11 @@ Relations determine which information is used to construct edges among the verti * For artifact networks (configured via `artifact.relation` in the [`NetworkConf`](#networkconf)), source-code artifacts are connected when they reference each other (i.e., one artifact calls a function contained in the other artifact). * For bipartite networks (configured via `artifact.relation` in the [`NetworkConf`](#networkconf)), authors get linked to all source-code artifacts they have changed in their respective commits (same as for the relation `cochange`). +- `commit.interaction` + * For author networks (configured via `author.relation` in the [`NetworkConf`](#networkconf)), authors who contribute to interacting commits are connected with an edge. + * For artifact networks (configured via `artifact.relation` in the [`NetworkConf`](#networkconf)), artifacts are connected when there is a interacting between two commits that occur in the artifacts. + * This relation does not apply for bipartite networks. + #### Edge-construction algorithms for author networks When constructing author networks, we use events in time (i.e., commits, e-mails, issue events) to model interactions among authors on the same artifact as edges. Therefore, we group the events on artifacts, based on the configured relation (see the [previous section](#relations)). @@ -597,6 +604,9 @@ There is no way to update the entries, except for the revision-based parameters. - `custom.event.timestamps.locked`: * Lock custom event timestamps to prevent them from being read if empty or not yet present when calling the getter. * [`TRUE`, *`FALSE`*] +- `commit.interactions`: + * Alloow construction of author and artifact networks using commit interaction data + * [`TRUE`, *`FALSE`*] ### NetworkConf diff --git a/install.R b/install.R index 99f047cc..94d403d9 100644 --- a/install.R +++ b/install.R @@ -44,7 +44,9 @@ packages = c( "viridis", "jsonlite", "rTensor", - "Matrix" + "Matrix", + "fastmap", + "purrr" ) diff --git a/tests/README.md b/tests/README.md index 6eb55791..cfe453fb 100644 --- a/tests/README.md +++ b/tests/README.md @@ -16,6 +16,7 @@ We have two test projects you can use when writing your tests: * Commit messages * Pasta * Synchronicity + * Commit Interactions * Custom event timestamps in `custom-events.list` * Revisions 2. - Casestudy: `test_empty` diff --git a/tests/test-data.R b/tests/test-data.R index 9049e3ce..98456846 100644 --- a/tests/test-data.R +++ b/tests/test-data.R @@ -99,6 +99,13 @@ test_that("Compare two ProjectData objects on empty data", { proj.data.two$set.project.conf.entry("commit.messages", "message") proj.data.two$get.commit.messages() expect_true(proj.data.one$equals(proj.data.two), "Two identical ProjectData objects (commit.messages).") + + proj.data.one$set.project.conf.entry("commit.interactions", TRUE) + proj.data.one$get.commit.interactions() + expect_false(proj.data.one$equals(proj.data.two), "Two non-identical ProjectData objects (commit.interactions).") + proj.data.two$set.project.conf.entry("commit.interactions", TRUE) + proj.data.two$get.commit.interactions() + expect_true(proj.data.one$equals(proj.data.two), "Two identical ProjectData objects (commit.interactions).") }) test_that("Compare two ProjectData objects on non-empty data", { @@ -540,6 +547,7 @@ test_that("Compare two ProjectData Objects with commit.interactions", { commit.data[["hash"]][[5]] = 1 proj.data.one$set.commits(commit.data) + ## use isTRUE to compress result of all.equal into a single boolean expect_false(isTRUE(all.equal(proj.data.one$get.commit.interactions(), proj.data.two$get.commit.interactions()))) diff --git a/tests/test-networks-artifact.R b/tests/test-networks-artifact.R index 13fad5f1..e52dd973 100644 --- a/tests/test-networks-artifact.R +++ b/tests/test-networks-artifact.R @@ -222,7 +222,7 @@ test_that("Network construction with commit-interactions as relation, artifact t proj.data = ProjectData$new(project.conf = proj.conf) net.conf = NetworkConf$new() - net.conf$update.value("artifact.relation", "interaction") + net.conf$update.value("artifact.relation", "commit.interaction") network.builder = NetworkBuilder$new(project.data = proj.data, network.conf = net.conf) network.built = network.builder$get.artifact.network() @@ -249,7 +249,7 @@ test_that("Network construction with commit-interactions as relation, artifact t interacting.author = c("Thomas", "Karl", "Olaf", "Thomas"), weight = c(1, 1, 1, 1), type = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA), - relation = c("interaction", "interaction", "interaction", "interaction") + relation = c("commit.interaction", "commit.interaction", "commit.interaction", "commit.interaction") ) network = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) @@ -265,7 +265,7 @@ test_that("Network construction with commit-interactions as relation, artifact t proj.data = ProjectData$new(project.conf = proj.conf) net.conf = NetworkConf$new() - net.conf$update.value("artifact.relation", "interaction") + net.conf$update.value("artifact.relation", "commit.interaction") network.builder = NetworkBuilder$new(project.data = proj.data, network.conf = net.conf) network.built = network.builder$get.artifact.network() @@ -292,7 +292,7 @@ test_that("Network construction with commit-interactions as relation, artifact t interacting.author = c("Thomas", "Karl", "Olaf", "Thomas"), weight = c(1, 1, 1, 1), type = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA), - relation = c("interaction", "interaction", "interaction", "interaction") + relation = c("commit.interaction", "commit.interaction", "commit.interaction", "commit.interaction") ) network = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) diff --git a/tests/test-networks-author.R b/tests/test-networks-author.R index cabb598e..4f580ef2 100644 --- a/tests/test-networks-author.R +++ b/tests/test-networks-author.R @@ -688,7 +688,7 @@ test_that("Network construction with commit-interactions as relation", { proj.data = ProjectData$new(project.conf = proj.conf) net.conf = NetworkConf$new() - net.conf$update.value("author.relation", "interaction") + net.conf$update.value("author.relation", "commit.interaction") network.builder = NetworkBuilder$new(project.data = proj.data, network.conf = net.conf) network.built = network.builder$get.author.network() @@ -716,7 +716,7 @@ test_that("Network construction with commit-interactions as relation", { base.file = c("test2.c", "test2.c", "test3.c", "test2.c"), weight = c(1, 1, 1, 1), type = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA), - relation = c("interaction", "interaction", "interaction", "interaction") + relation = c("commit.interaction", "commit.interaction", "commit.interaction", "commit.interaction") ) network = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) diff --git a/util-conf.R b/util-conf.R index 434fbf96..64a9ed79 100644 --- a/util-conf.R +++ b/util-conf.R @@ -790,7 +790,7 @@ NetworkConf = R6::R6Class("NetworkConf", inherit = Conf, author.relation = list( default = "mail", type = "character", - allowed = c("mail", "cochange", "issue", "interaction"), + allowed = c("mail", "cochange", "issue", "commit.interaction"), allowed.number = Inf ), author.directed = list( @@ -821,7 +821,7 @@ NetworkConf = R6::R6Class("NetworkConf", inherit = Conf, artifact.relation = list( default = "cochange", type = "character", - allowed = c("cochange", "callgraph", "mail", "issue", "interaction"), + allowed = c("cochange", "callgraph", "mail", "issue", "commit.interaction"), allowed.number = Inf ), artifact.directed = list( diff --git a/util-data.R b/util-data.R index b13a3373..ab4a4988 100644 --- a/util-data.R +++ b/util-data.R @@ -78,8 +78,8 @@ DATASOURCE.TO.ADDITIONAL.ARTIFACT.FUNCTION = list( "synchronicity" = "get.synchronicity", "pasta" = "get.pasta", "gender" = "get.gender", - "custom.event.timestamps" = "get.custom.event.timestamps", - "commit.interactions" = "get.commit.interactions" + "commit.interactions" = "get.commit.interactions", + "custom.event.timestamps" = "get.custom.event.timestamps" ) #' Applies a function to list keys @@ -125,7 +125,8 @@ CONF.PARAMETERS.NO.RESET.ENVIRONMENT = c("commit.messages", "issues.locked", "mails.locked", "custom.event.timestamps", - "custom.event.timestamps.locked") + "custom.event.timestamps.locked", + "commit.interactions") ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / @@ -164,7 +165,7 @@ ProjectData = R6::R6Class("ProjectData", commits = create.empty.commits.list(), # data.frame commits.unfiltered = create.empty.commits.list(), # data.frame commit.messages = create.empty.commit.message.list(), # data.frame - commit.interactions = create.empty.commit.interaction.list(), + commit.interactions = create.empty.commit.interaction.list(), # data.frame ## mails mails.unfiltered = create.empty.mails.list(), # data.frame mails = create.empty.mails.list(), # data.frame @@ -414,46 +415,49 @@ ProjectData = R6::R6Class("ProjectData", #' #' This method should be called whenever the field \code{commit.interactions} is changed. update.commit.interactions = function() { - if (!self$is.data.source.cached("commits.unfiltered")) { - self$get.commits() - } + if (self$is.data.source.cached("commit.interactions")) { + if (!self$is.data.source.cached("commits.unfiltered")) { + self$get.commits() + } - ## remove existing columns named 'base.author' and 'interaction.author' - indices.to.remove = which("base.author" == colnames(private$commit.interactions)) - if (length(indices.to.remove) > 0) { - private$commit.interactions = private$commit.interactions[, -indices.to.remove] - } - indices.to.remove = which("interacting.author" == colnames(private$commit.interactions)) - if (length(indices.to.remove) > 0) { - private$commit.interactions = private$commit.interactions[, -indices.to.remove] - } + ## remove existing columns named 'base.author' and 'interaction.author' + indices.to.remove = which("base.author" == colnames(private$commit.interactions)) + if (length(indices.to.remove) > 0) { + private$commit.interactions = private$commit.interactions[, -indices.to.remove] + } + indices.to.remove = which("interacting.author" == colnames(private$commit.interactions)) + if (length(indices.to.remove) > 0) { + private$commit.interactions = private$commit.interactions[, -indices.to.remove] + } - ## get relevant data from commits - commit.data.subset = data.frame(hash = private$commits.unfiltered[["hash"]], - author.name = private$commits.unfiltered[["author.name"]]) - commit.data.subset = commit.data.subset[!duplicated(commit.data.subset[["hash"]]),] + ## get relevant data from commits + commit.data.subset = data.frame(hash = private$commits.unfiltered[["hash"]], + author.name = private$commits.unfiltered[["author.name"]]) + commit.data.subset = commit.data.subset[!duplicated(commit.data.subset[["hash"]]),] - ## merge commit interactions with commits and change colnames to avoid duplicates - commit.interaction.data = merge(private$commit.interactions, commit.data.subset, - by.x = "base.hash", by.y = "hash", all.x = TRUE) + ## merge commit interactions with commits and change colnames to avoid duplicates + commit.interaction.data = merge(private$commit.interactions, commit.data.subset, + by.x = "base.hash", by.y = "hash", all.x = TRUE) - author.index = match("author.name", colnames(commit.interaction.data)) - colnames(commit.interaction.data)[[author.index]] = "base.author" + author.index = match("author.name", colnames(commit.interaction.data)) + colnames(commit.interaction.data)[[author.index]] = "base.author" - commit.interaction.data = merge(commit.interaction.data, commit.data.subset, - by.x = "commit.hash", by.y = "hash", all.x = TRUE) + commit.interaction.data = merge(commit.interaction.data, commit.data.subset, + by.x = "commit.hash", by.y = "hash", all.x = TRUE) - author.index = match("author.name", colnames(commit.interaction.data)) - colnames(commit.interaction.data)[[author.index]] = "interacting.author" + author.index = match("author.name", colnames(commit.interaction.data)) + colnames(commit.interaction.data)[[author.index]] = "interacting.author" - ## warning if we have interactions without authors - if (anyNA(commit.interaction.data[["base.author"]]) || - anyNA(commit.interaction.data[["interacting.author"]])) { - logging::logwarn("There are authors in the commit-interactions that are not in the commit data! - This results in the commit-interactions having empty entries. - To clean up these entries, call cleanup.commit.interactions.") + ## warning if we have interactions without authors + if (anyNA(commit.interaction.data[["base.author"]]) || + anyNA(commit.interaction.data[["interacting.author"]])) { + logging::logwarn("There are commits in the commit-interactions that are not in + the commit data, possibly due to incomplete commit data or deleted users. + This results in the commit-interactions having empty entries. + To clean up these entries, call cleanup.commit.interactions.") + } + private$commit.interactions = commit.interaction.data } - private$commit.interactions = commit.interaction.data }, ## * * Gender data -------------------------------------------------- @@ -858,6 +862,7 @@ ProjectData = R6::R6Class("ProjectData", private$pasta.commits = create.empty.pasta.list() private$gender = create.empty.gender.list() private$synchronicity = create.empty.synchronicity.list() + private$commit.interactions = create.empty.commit.interaction.list() }, ## * * configuration ----------------------------------------------- @@ -1258,19 +1263,26 @@ ProjectData = R6::R6Class("ProjectData", get.commit.interactions = function(data.path = NULL) { logging::loginfo("Getting commit interactions.") - ## if the commit-interaction data have not yet been read do this - if (!self$is.data.source.cached("commit.interactions")) { - if (is.null(data.path)) { - commit.interaction.data = read.commit.interactions(self$get.data.path()) - } else { - commit.interaction.data = read.commit.interactions(data.path) - } + ## if commit-interaction data are to be read, do this + if (private$project.conf$get.value("commit.interactions")) { + ## if the commit-interaction data have not yet been read do this + if (!self$is.data.source.cached("commit.interactions")) { + if (is.null(data.path)) { + commit.interaction.data = read.commit.interactions(self$get.data.path()) + } else { + commit.interaction.data = read.commit.interactions(data.path) + } - ## cache the result - private$commit.interactions = commit.interaction.data - private$update.commit.interactions() + ## cache the result + private$commit.interactions = commit.interaction.data + private$update.commit.interactions() + } + } else { + logging::logwarn("You have not set the ProjectConf parameter + 'commit.interactions' to 'TRUE'! Ignoring...") + ## mark commit-interaction data as empty + private$commit.interactions = NULL } - return(private$commit.interactions) }, @@ -1291,9 +1303,10 @@ ProjectData = R6::R6Class("ProjectData", private$commit.interactions = data }, - #' Remove lines in the commit-interactions data that do not contain authors. - #' This should only be called AFTER 'update.commit.interactions' has already been called, as otherwise - #' all commit-interactions data will be removed + #' Remove lines in the commit-interaction data for which the corresponding commit is missing in the + #' commit data, indicated by a missing author in the commit-interaction data. + #' This should only be called AFTER \code{update.commit.interactions} has already been called, as otherwise + #' all commit-interactions data will be removed. cleanup.commit.interactions = function() { logging::loginfo("Cleaning up commit-interactions") @@ -1879,8 +1892,8 @@ ProjectData = R6::R6Class("ProjectData", "commit.messages" = "commit.messages", "synchronicity" = "synchronicity", "pasta" = "pasta", - "custom.event.timestamps" = "custom.event.timestamps", - "commit.interactions" = "commit.interactions" + "commit.interactions" = "commit.interactions", + "custom.event.timestamps" = "custom.event.timestamps" ) ) sources = self$get.cached.data.sources.internal(source.type) @@ -1912,7 +1925,7 @@ ProjectData = R6::R6Class("ProjectData", ## define the data sources unfiltered.data.sources = c("commits.unfiltered", "mails.unfiltered", "issues.unfiltered") additional.data.sources = c("authors", "commit.messages", "synchronicity", "pasta", - "gender", "custom.event.timestamps", "commit.interactions") + "gender", "commit.interactions", "custom.event.timestamps") main.data.sources = c("issues", "commits", "mails") ## set the right data sources to look for according to the argument diff --git a/util-networks.R b/util-networks.R index d1a5eb62..d957257c 100644 --- a/util-networks.R +++ b/util-networks.R @@ -138,7 +138,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", callgraph = private$proj.data$get.project.conf.entry("artifact.codeface"), mail = "MailThread", issue = "Issue", - interaction = private$proj.data$get.project.conf.entry("artifact.codeface") + commit.interaction = private$proj.data$get.project.conf.entry("artifact.codeface") ) return(vertex.kind) @@ -377,7 +377,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", return(artifacts.net) }, - #' Build and get the the commit-interaction based artifact network. + #' Build and get the commit-interaction based artifact network. #' #' @return the commit-interaction based artifact network get.artifact.network.commit.interaction = function() { @@ -397,7 +397,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", edges = edges[, c("file", "base.file", "func", "commit.hash", "base.hash", "base.func", "base.author", "interacting.author")] - colnames(edges)[4] = "hash" + colnames(edges)[colnames(edges)=="commit.hash"] = "hash" } else if (proj.conf.artifact == "function") { ## change the vertices to the functions from the commit-interaction data vertices = unique(c(private$proj.data$get.commit.interactions()[["base.func"]], @@ -406,12 +406,12 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", edges = edges[, c("func", "base.func", "commit.hash", "file", "base.hash", "base.file", "base.author", "interacting.author")] - colnames(edges)[3] = "hash" + colnames(edges)[colnames(edges)=="commit.hash"] = "hash" } else { ## If neither 'function' nor 'file' was configured, send a warning ## and return an empty network logging::logwarn("when creating a commit-interaction artifact network, - the artifact relation should be either 'file' or 'function'!") + the artifact should be either 'file' or 'function'!") return(create.empty.network(directed = private$network.conf$get.value("artifact.directed"))) } colnames(edges)[1] = "to" @@ -826,7 +826,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", network = switch( relation, cochange = private$get.author.network.cochange(), - interaction = private$get.author.network.commit.interaction(), + commit.interaction = private$get.author.network.commit.interaction(), mail = private$get.author.network.mail(), issue = private$get.author.network.issue(), stop(sprintf("The author relation '%s' does not exist.", rel)) @@ -894,7 +894,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", callgraph = private$get.artifact.network.callgraph(), mail = private$get.artifact.network.mail(), issue = private$get.artifact.network.issue(), - interaction = private$get.artifact.network.commit.interaction(), + commit.interaction = private$get.artifact.network.commit.interaction(), stop(sprintf("The artifact relation '%s' does not exist.", relation)) ) diff --git a/util-read.R b/util-read.R index 016da771..5443d36f 100644 --- a/util-read.R +++ b/util-read.R @@ -44,6 +44,8 @@ requireNamespace("digest") # for sha1 hashing of IDs requireNamespace("sqldf") # for SQL-selections on data.frames requireNamespace("data.table") # for faster data.frame processing requireNamespace("yaml") # for reading commit interaction data +requireNamespace("fastmap") # for fast implementation of a map +requireNamespace("purrr") # for fast mapping function ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / ## Helper functions -------------------------------------------------------- @@ -866,7 +868,6 @@ COMMIT.INTERACTION.LIST.DATA.TYPES = c( #' commit (hash) gets mapped to all commits it interacts with and the file/function because of #' which they interact. #' -#' #' @param data.path the path to the commit-interaction data #' #' @return the read and parsed commit-interaction data @@ -888,27 +889,31 @@ read.commit.interactions = function(data.path = NULL) { } ## extract the top level list of the yaml file which is called 'result-map' - result.map = commit.interaction.base$`result-map` + result.map = commit.interaction.base[["result-map"]] ## extract a mapping of functions to files to be able to determine what file the current interaction is ## based on + ## 1) create an empty map file.name.map = fastmap::fastmap() + ## 2) create a mapping between functions and files as a list function.file.list = purrr::map(result.map, 2) + ## 3) set the map using the list file.name.map$mset(.list = function.file.list) list.names = names(result.map) ## build the result dataframe by iterating over the 'result-map' list - commit.interaction.data = data.table::setDF(data.table::rbindlist(parallel::mcmapply(result.map, list.names, - SIMPLIFY = FALSE, - FUN = function(current.interaction, - function.name) { + commit.interaction.data = data.table::setDF(data.table::rbindlist( + parallel::mcmapply(result.map, + list.names, + SIMPLIFY = FALSE, + FUN = function(current.interaction, function.name) { ## get all commits that interact with the current one insts = current.interaction[[4]] interactions = data.table::setDF(data.table::rbindlist(lapply(insts, function(current.inst) { - base.hash = current.inst[[1]]$`commit` + base.hash = current.inst[[1]][["commit"]] interacting.hashes = current.inst[[2]] interacting.hashes.df = data.table::setDF(data.table::rbindlist(lapply(interacting.hashes, function(hash) { - ## if there is no function name in the current interaction we set the function name to 'GLOBAL' + ## if there is no function name in the current interaction, we set the function name to 'GLOBAL' ## as this is most likely code outside of functions, else we set the function name if (!"function" %in% names(hash)) { return(data.frame(func = "GLOBAL", commit.hash = hash[["commit"]], file = "GLOBAL")) @@ -921,11 +926,13 @@ read.commit.interactions = function(data.path = NULL) { file = file.name.map$get(hash[["function"]]))) } }))) - interacting.hashes.df$base.hash = base.hash - interacting.hashes.df$base.func = function.name - interacting.hashes.df$base.file = file.name.map$get(function.name) + interacting.hashes.df[["base.hash"]] = base.hash + interacting.hashes.df[["base.func"]] = function.name + interacting.hashes.df[["base.file"]] = file.name.map$get(function.name) return(interacting.hashes.df) }))) + ## Initialize author data as 'NA', since it is not available from the commit-interaction data. + ## Author data will be merged from commit data in \code{update.commit.interactions}. interactions["base.author"] = NA_character_ interactions["interacting.author"] = NA_character_ return(interactions) From 13359651629fbe4ab8c5eb20583fde4e420e12eb Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Fri, 5 Apr 2024 14:03:45 +0200 Subject: [PATCH 30/38] Add global variable and change function names Also add more available edge attributes to network construction Signed-off-by: Leo Sendelbach --- util-data.R | 8 +++++++- util-networks.R | 6 ++++-- util-read.R | 19 +++++++++++++------ 3 files changed, 24 insertions(+), 9 deletions(-) diff --git a/util-data.R b/util-data.R index ab4a4988..0443b183 100644 --- a/util-data.R +++ b/util-data.R @@ -1962,7 +1962,8 @@ ProjectData = R6::R6Class("ProjectData", #' \code{"commits"}, and \code{"issues"}. [default: "commits"] #' #' @return a named list of data classes, with the corresponding data columns as names - get.data.columns.for.data.source = function(data.source = c("commits", "mails", "issues")) { + get.data.columns.for.data.source = function(data.source = c("commits", "mails", + "issues", "commit.interactions")) { ## check arguments data.source = match.arg(arg = data.source, several.ok = FALSE) @@ -1970,6 +1971,11 @@ ProjectData = R6::R6Class("ProjectData", ## get the needed data method first data.fun = DATASOURCE.TO.ARTIFACT.FUNCTION[[data.source]] + ## if 'data.fun' is NULL, check 'DATASOURCE.TO.ADDITIONAL.ARTIFACT.FUNCTION' + if (is.null(data.fun)) { + data.fun = DATASOURCE.TO.ADDITIONAL.ARTIFACT.FUNCTION[[data.source]] + } + ## get the column classes with corresponding names columns = lapply(self[[data.fun]](), class) diff --git a/util-networks.R b/util-networks.R index d957257c..bfe7a998 100644 --- a/util-networks.R +++ b/util-networks.R @@ -252,7 +252,8 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", author.net.data[["edges"]], network.conf = private$network.conf, directed = private$network.conf$get.value("author.directed"), - available.edge.attributes = list(hash = "character") + available.edge.attributes = private$proj.data$ + get.data.columns.for.data.source("commit.interactions") ) return(author.net) }, @@ -423,7 +424,8 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", artifact.net.data[["edges"]], network.conf = private$network.conf, directed = private$network.conf$get.value("artifact.directed"), - available.edge.attributes = list(hash = "character") + available.edge.attributes = private$proj.data$ + get.data.columns.for.data.source("commit.interactions") ) return(artifact.net) }, diff --git a/util-read.R b/util-read.R index 5443d36f..fa0bbff1 100644 --- a/util-read.R +++ b/util-read.R @@ -863,6 +863,8 @@ COMMIT.INTERACTION.LIST.DATA.TYPES = c( "character", "character" ) +COMMIT.INTERACTION.GLOBAL.FILE.FUNCTION.NAME = "GLOBAL" + #' Read and parse the commit-interaction data. This data is present in a `.yaml` file which #' needs to be broken down. Within the yaml file, there are different lists in which each #' commit (hash) gets mapped to all commits it interacts with and the file/function because of @@ -895,7 +897,8 @@ read.commit.interactions = function(data.path = NULL) { ## based on ## 1) create an empty map file.name.map = fastmap::fastmap() - ## 2) create a mapping between functions and files as a list + ## 2) create a mapping between functions and files as a named list + ## which can be directly converted to a map function.file.list = purrr::map(result.map, 2) ## 3) set the map using the list file.name.map$mset(.list = function.file.list) @@ -916,19 +919,23 @@ read.commit.interactions = function(data.path = NULL) { ## if there is no function name in the current interaction, we set the function name to 'GLOBAL' ## as this is most likely code outside of functions, else we set the function name if (!"function" %in% names(hash)) { - return(data.frame(func = "GLOBAL", commit.hash = hash[["commit"]], file = "GLOBAL")) + return(data.frame(func = COMMIT.INTERACTION.GLOBAL.FILE.FUNCTION.NAME, + commit.hash = hash[["commit"]], + file = COMMIT.INTERACTION.GLOBAL.FILE.FUNCTION.NAME)) } else if (is.null(file.name.map$get(hash[["function"]]))) { ## This case should never occur if the data was generated correctly! warning("An interacting hash specifies a function that does not exist in the data!") return(data.frame(matrix(nrow = 3, ncol = 0))) } else { - return(data.frame(func = hash[["function"]], commit.hash = hash[["commit"]], - file = file.name.map$get(hash[["function"]]))) + file.name = file.name.map$get(hash[["function"]]) + func.name = paste(file.name, hash[("function")], sep = "::") + return(data.frame(func = func.name, commit.hash = hash[["commit"]], file = file.name)) } }))) + base.file.name = file.name.map$get(function.name) interacting.hashes.df[["base.hash"]] = base.hash - interacting.hashes.df[["base.func"]] = function.name - interacting.hashes.df[["base.file"]] = file.name.map$get(function.name) + interacting.hashes.df[["base.func"]] = paste(base.file.name, function.name, sep = "::") + interacting.hashes.df[["base.file"]] = base.file.name return(interacting.hashes.df) }))) ## Initialize author data as 'NA', since it is not available from the commit-interaction data. From 8ce1f0705ee2d04ed72a8e564e1896db21163b52 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Fri, 5 Apr 2024 14:07:32 +0200 Subject: [PATCH 31/38] Change tests to match new function names Also use patrick to test for directedness Signed-off-by: Leo Sendelbach --- tests/test-data.R | 5 +++-- tests/test-networks-artifact.R | 36 ++++++++++++++++++++++------------ tests/test-networks-author.R | 17 ++++++++++------ tests/test-read.R | 5 +++-- 4 files changed, 40 insertions(+), 23 deletions(-) diff --git a/tests/test-data.R b/tests/test-data.R index 98456846..e4bea8c4 100644 --- a/tests/test-data.R +++ b/tests/test-data.R @@ -581,9 +581,10 @@ test_that("Compare two ProjectData Objects with commit.interactions", { "0a1a5c523d835459c42f33e863623138555e2526", "1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526") - commit.interactions.data.expected[["func"]] = c("GLOBAL", "test2", "GLOBAL", "test2") + commit.interactions.data.expected[["func"]] = c("GLOBAL", "test2.c::test2", "GLOBAL", "test2.c::test2") commit.interactions.data.expected[["file"]] = c("GLOBAL", "test2.c", "GLOBAL", "test2.c") - commit.interactions.data.expected[["base.func"]] = c("test2", "test2", "test_function", "test2") + commit.interactions.data.expected[["base.func"]] = c("test2.c::test2", "test2.c::test2", + "test3.c::test_function", "test2.c::test2") commit.interactions.data.expected[["base.file"]] = c("test2.c", "test2.c", "test3.c", "test2.c") expect_equal(proj.data.two$get.commit.interactions(), commit.interactions.data.expected) diff --git a/tests/test-networks-artifact.R b/tests/test-networks-artifact.R index e52dd973..122c96ee 100644 --- a/tests/test-networks-artifact.R +++ b/tests/test-networks-artifact.R @@ -213,7 +213,7 @@ patrick::with_parameters_test_that("Network construction of an empty 'comments-o "directed: TRUE" = list(test.directed = TRUE) )) -test_that("Network construction with commit-interactions as relation, artifact type 'file'", { +patrick::with_parameters_test_that("Network construction with commit-interactions as relation, artifact type 'file'", { ## configuration object for the datapath proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, "file") proj.conf$update.value("commit.interactions", TRUE) @@ -222,7 +222,8 @@ test_that("Network construction with commit-interactions as relation, artifact t proj.data = ProjectData$new(project.conf = proj.conf) net.conf = NetworkConf$new() - net.conf$update.value("artifact.relation", "commit.interaction") + net.conf$update.values(updated.values = list(artifact.relation = "commit.interaction", + artifact.directed = test.directed)) network.builder = NetworkBuilder$new(project.data = proj.data, network.conf = net.conf) network.built = network.builder$get.artifact.network() @@ -235,7 +236,7 @@ test_that("Network construction with commit-interactions as relation, artifact t edges = data.frame( from = c("GLOBAL", "test2.c", "GLOBAL", "test2.c"), to = c("test2.c", "test2.c", "test3.c", "test2.c"), - func = c("GLOBAL", "test2", "GLOBAL", "test2"), + func = c("GLOBAL", "test2.c::test2", "GLOBAL", "test2.c::test2"), hash = c("0a1a5c523d835459c42f33e863623138555e2526", "418d1dc4929ad1df251d2aeb833dd45757b04a6f", "5a5ec9675e98187e1e92561e1888aa6f04faa338", @@ -244,19 +245,23 @@ test_that("Network construction with commit-interactions as relation, artifact t "0a1a5c523d835459c42f33e863623138555e2526", "1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526"), - base.func = c("test2", "test2", "test_function", "test2"), + base.func = c("test2.c::test2", "test2.c::test2", + "test3.c::test_function", "test2.c::test2"), base.author = c("Olaf", "Thomas", "Karl", "Thomas"), interacting.author = c("Thomas", "Karl", "Olaf", "Thomas"), weight = c(1, 1, 1, 1), type = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA), relation = c("commit.interaction", "commit.interaction", "commit.interaction", "commit.interaction") ) - network = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) + network = igraph::graph.data.frame(edges, directed = test.directed, vertices = vertices) expect_true(igraph::identical_graphs(network.built, network)) -}) +}, patrick::cases( + "directed: FALSE" = list(test.directed = FALSE), + "directed: TRUE" = list(test.directed = TRUE) +)) -test_that("Network construction with commit-interactions as relation, artifact type 'function'", { +patrick::with_parameters_test_that("Network construction with commit-interactions as relation, artifact type 'function'", { ## configuration object for the datapath proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, "function") proj.conf$update.value("commit.interactions", TRUE) @@ -265,19 +270,21 @@ test_that("Network construction with commit-interactions as relation, artifact t proj.data = ProjectData$new(project.conf = proj.conf) net.conf = NetworkConf$new() - net.conf$update.value("artifact.relation", "commit.interaction") + net.conf$update.values(updated.values = list(artifact.relation = "commit.interaction", + artifact.directed = test.directed)) network.builder = NetworkBuilder$new(project.data = proj.data, network.conf = net.conf) network.built = network.builder$get.artifact.network() ## build the expected network vertices = data.frame( - name = c("test2", "test_function", "GLOBAL"), + name = c("test2.c::test2", "test3.c::test_function", "GLOBAL"), kind = "Function", type = TYPE.ARTIFACT ) edges = data.frame( - from = c("GLOBAL", "test2", "GLOBAL", "test2"), - to = c("test2", "test2", "test_function", "test2"), + from = c("GLOBAL", "test2.c::test2", "GLOBAL", "test2.c::test2"), + to = c("test2.c::test2", "test2.c::test2", + "test3.c::test_function", "test2.c::test2"), hash = c("0a1a5c523d835459c42f33e863623138555e2526", "418d1dc4929ad1df251d2aeb833dd45757b04a6f", "5a5ec9675e98187e1e92561e1888aa6f04faa338", @@ -294,7 +301,10 @@ test_that("Network construction with commit-interactions as relation, artifact t type = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA), relation = c("commit.interaction", "commit.interaction", "commit.interaction", "commit.interaction") ) - network = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) + network = igraph::graph.data.frame(edges, directed = test.directed, vertices = vertices) expect_true(igraph::identical_graphs(network.built, network)) -}) +}, patrick::cases( + "directed: FALSE" = list(test.directed = FALSE), + "directed: TRUE" = list(test.directed = TRUE) +)) diff --git a/tests/test-networks-author.R b/tests/test-networks-author.R index 4f580ef2..9ffa3472 100644 --- a/tests/test-networks-author.R +++ b/tests/test-networks-author.R @@ -679,7 +679,7 @@ test_that("Network construction with only untracked files (no edges expected)", expect_true(igraph::identical_graphs(network.built, network.expected)) }) -test_that("Network construction with commit-interactions as relation", { +patrick::with_parameters_test_that("Network construction with commit-interactions as relation", { ## configuration object for the datapath proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, "file") proj.conf$update.value("commit.interactions", TRUE) @@ -688,7 +688,8 @@ test_that("Network construction with commit-interactions as relation", { proj.data = ProjectData$new(project.conf = proj.conf) net.conf = NetworkConf$new() - net.conf$update.value("author.relation", "commit.interaction") + net.conf$update.values(updated.values = list(author.relation = "commit.interaction", + author.directed = test.directed)) network.builder = NetworkBuilder$new(project.data = proj.data, network.conf = net.conf) network.built = network.builder$get.author.network() @@ -702,7 +703,7 @@ test_that("Network construction with commit-interactions as relation", { edges = data.frame( from = c("Olaf", "Thomas", "Karl", "Thomas"), to = c("Thomas", "Karl", "Olaf", "Thomas"), - func = c("GLOBAL", "test2", "GLOBAL", "test2"), + func = c("GLOBAL", "test2.c::test2", "GLOBAL", "test2.c::test2"), hash = c("0a1a5c523d835459c42f33e863623138555e2526", "418d1dc4929ad1df251d2aeb833dd45757b04a6f", "5a5ec9675e98187e1e92561e1888aa6f04faa338", @@ -712,13 +713,17 @@ test_that("Network construction with commit-interactions as relation", { "0a1a5c523d835459c42f33e863623138555e2526", "1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526"), - base.func = c("test2", "test2", "test_function", "test2"), + base.func = c("test2.c::test2", "test2.c::test2", + "test3.c::test_function", "test2.c::test2"), base.file = c("test2.c", "test2.c", "test3.c", "test2.c"), weight = c(1, 1, 1, 1), type = c(TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA, TYPE.EDGES.INTRA), relation = c("commit.interaction", "commit.interaction", "commit.interaction", "commit.interaction") ) - network = igraph::graph.data.frame(edges, directed = FALSE, vertices = vertices) + network = igraph::graph.data.frame(edges, directed = test.directed, vertices = vertices) expect_true(igraph::identical_graphs(network.built, network)) -}) \ No newline at end of file +}, patrick::cases( + "directed: FALSE" = list(test.directed = FALSE), + "directed: TRUE" = list(test.directed = TRUE) +)) \ No newline at end of file diff --git a/tests/test-read.R b/tests/test-read.R index bafafe12..58c9bd3c 100644 --- a/tests/test-read.R +++ b/tests/test-read.R @@ -526,9 +526,10 @@ test_that("Read the commit-interactions data.", { "3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526", "0a1a5c523d835459c42f33e863623138555e2526") - commit.interactions.data.expected[["func"]] = c("GLOBAL", "GLOBAL", "test2", "test2") + commit.interactions.data.expected[["func"]] = c("GLOBAL", "GLOBAL", "test2.c::test2", "test2.c::test2") commit.interactions.data.expected[["file"]] = c("GLOBAL", "GLOBAL", "test2.c", "test2.c") - commit.interactions.data.expected[["base.func"]] = c("test_function", "test2", "test2", "test2") + commit.interactions.data.expected[["base.func"]] = c("test3.c::test_function", "test2.c::test2", + "test2.c::test2", "test2.c::test2") commit.interactions.data.expected[["base.file"]] = c("test3.c", "test2.c", "test2.c", "test2.c") ## check the results expect_identical(commit.interactions.data.read, commit.interactions.data.expected, From 7c92b723056e1851d88791bf8c10ece44a474f27 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Wed, 10 Apr 2024 12:38:16 +0200 Subject: [PATCH 32/38] Fix typos and change data frame access Read method now exclusively uses names to access data frame Signed-off-by: Leo Sendelbach --- README.md | 4 ++-- tests/README.md | 2 +- util-networks.R | 12 ++++++------ util-read.R | 10 +++++----- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 0595555d..3fb9d44f 100644 --- a/README.md +++ b/README.md @@ -268,7 +268,7 @@ Relations determine which information is used to construct edges among the verti - `commit.interaction` * For author networks (configured via `author.relation` in the [`NetworkConf`](#networkconf)), authors who contribute to interacting commits are connected with an edge. - * For artifact networks (configured via `artifact.relation` in the [`NetworkConf`](#networkconf)), artifacts are connected when there is a interacting between two commits that occur in the artifacts. + * For artifact networks (configured via `artifact.relation` in the [`NetworkConf`](#networkconf)), artifacts are connected when there is an interaction between two commits that occur in the artifacts. * This relation does not apply for bipartite networks. #### Edge-construction algorithms for author networks @@ -605,7 +605,7 @@ There is no way to update the entries, except for the revision-based parameters. * Lock custom event timestamps to prevent them from being read if empty or not yet present when calling the getter. * [`TRUE`, *`FALSE`*] - `commit.interactions`: - * Alloow construction of author and artifact networks using commit interaction data + * Allow construction of author and artifact networks using commit-interaction data * [`TRUE`, *`FALSE`*] ### NetworkConf diff --git a/tests/README.md b/tests/README.md index cfe453fb..b6558dc1 100644 --- a/tests/README.md +++ b/tests/README.md @@ -16,7 +16,7 @@ We have two test projects you can use when writing your tests: * Commit messages * Pasta * Synchronicity - * Commit Interactions + * Commit interactions * Custom event timestamps in `custom-events.list` * Revisions 2. - Casestudy: `test_empty` diff --git a/util-networks.R b/util-networks.R index bfe7a998..aa9511b2 100644 --- a/util-networks.R +++ b/util-networks.R @@ -134,10 +134,10 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", get.vertex.kind.for.relation = function(relation) { vertex.kind = switch(relation, - cochange = private$proj.data$get.project.conf.entry("artifact.codeface"), - callgraph = private$proj.data$get.project.conf.entry("artifact.codeface"), - mail = "MailThread", - issue = "Issue", + cochange = private$proj.data$get.project.conf.entry("artifact.codeface"), + callgraph = private$proj.data$get.project.conf.entry("artifact.codeface"), + mail = "MailThread", + issue = "Issue", commit.interaction = private$proj.data$get.project.conf.entry("artifact.codeface") ) @@ -398,7 +398,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", edges = edges[, c("file", "base.file", "func", "commit.hash", "base.hash", "base.func", "base.author", "interacting.author")] - colnames(edges)[colnames(edges)=="commit.hash"] = "hash" + colnames(edges)[colnames(edges) == "commit.hash"] = "hash" } else if (proj.conf.artifact == "function") { ## change the vertices to the functions from the commit-interaction data vertices = unique(c(private$proj.data$get.commit.interactions()[["base.func"]], @@ -407,7 +407,7 @@ NetworkBuilder = R6::R6Class("NetworkBuilder", edges = edges[, c("func", "base.func", "commit.hash", "file", "base.hash", "base.file", "base.author", "interacting.author")] - colnames(edges)[colnames(edges)=="commit.hash"] = "hash" + colnames(edges)[colnames(edges) == "commit.hash"] = "hash" } else { ## If neither 'function' nor 'file' was configured, send a warning ## and return an empty network diff --git a/util-read.R b/util-read.R index fa0bbff1..892c59cd 100644 --- a/util-read.R +++ b/util-read.R @@ -898,8 +898,8 @@ read.commit.interactions = function(data.path = NULL) { ## 1) create an empty map file.name.map = fastmap::fastmap() ## 2) create a mapping between functions and files as a named list - ## which can be directly converted to a map - function.file.list = purrr::map(result.map, 2) + ## which can be directly converted to a map + function.file.list = purrr::map(result.map, "file") ## 3) set the map using the list file.name.map$mset(.list = function.file.list) list.names = names(result.map) @@ -911,10 +911,10 @@ read.commit.interactions = function(data.path = NULL) { SIMPLIFY = FALSE, FUN = function(current.interaction, function.name) { ## get all commits that interact with the current one - insts = current.interaction[[4]] + insts = current.interaction[["insts"]] interactions = data.table::setDF(data.table::rbindlist(lapply(insts, function(current.inst) { - base.hash = current.inst[[1]][["commit"]] - interacting.hashes = current.inst[[2]] + base.hash = current.inst[["base-hash"]][["commit"]] + interacting.hashes = current.inst[["interacting-hashes"]] interacting.hashes.df = data.table::setDF(data.table::rbindlist(lapply(interacting.hashes, function(hash) { ## if there is no function name in the current interaction, we set the function name to 'GLOBAL' ## as this is most likely code outside of functions, else we set the function name From bc4938675a4b66f17de332eb2b04b66591d54003 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Wed, 10 Apr 2024 13:10:19 +0200 Subject: [PATCH 33/38] Change NEWS.md with new commit hashes after rebase Also remove to points from it as per @bockthom's suggestions Signed-off-by: Leo Sendelbach --- NEWS.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/NEWS.md b/NEWS.md index 446761a5..2b28ff65 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,9 +6,8 @@ ### Added -- Add commit-interaction data and add functions `read.commit.interactions` for reading, as well as `get.commit.interactions`, `set.commit.interactions` and utility functions for working with commit-interaction data (PR #252, 5da0e60e029bdf427520be440fedb0f71d9f7a15, 7792a4e9a087c042a3ef9b7f79a67490305ce85e, 178265dcc69abc0d6e430dfcbc4b87e7565ce615, 80e6ac5f24e6d0248e77be391f93a59b6b17862d, 1ffa607bbe400bd212388dc543263ba5bec4e34c) -- Add commit-interaction networks that can be created with `create.author.network` and `create.artifact.network` if the `artifact.relation` and `author.relation` is configured to be `interaction` (PR #252, 5da0e60e029bdf427520be440fedb0f71d9f7a15, deddd4ce9d2a570ea57088ea73d4312f81e73049, 0e269af77bc098f2d3157fac349d2032efd6cf49, d96b10b45ec55cdf2dd02c60833d4116358d6d31) -- Add tests for new commit-interaction functionality (PR #252, 3e5b8962e18c3dde45085fa764c9d084327e2773, 7685ec4745bd43fba7a373bf5544f41bff346ed9, b291cb338e1b3896c8fd9769f45c515bddb8cf48, eea1b053350094084bab957975e1b306e6c9dc23, 3d4a521e47dc81aaae8ae01ff78ca8d514bb7d85, 05ea1ce1c3330f3fb8fb28ccbc08b85fbd4ec2c8, 99103f27ad0c8ee1bd62cdcee10778a98020db70, fd6064a83a7735020ad5250d092e266af5bbada0) +- Add commit-interaction data and add functions `read.commit.interactions` for reading, as well as `get.commit.interactions`, `set.commit.interactions` and utility functions for working with commit-interaction data (PR #252, d82857fbebd1111bb16588a4223bb24a8dcd07de, b4fd2a29c9b5fd561b1106c6febb54a32b0085ab, fd0aa05f824b93545ae8e05833b95b3bd9809286) as well as tests for these features (PR #252, eeba7e29932bc973513c963fb9e716e9230d570f, 8bb39f4df39b49dfaff8f19feb6db5e5fbd81fac, 54b6f655248720436af116fe72521f9cb0348429, 7a5497aaf9114017d1b3b9b68b6cccd7ca8ac114, 7b8585f87675795822c07230192d6454de31dcc7, ef725407bf8818c8fff96ea6f343338b7162cbe0) +- Add commit-interaction networks that can be created with `create.author.network` and `create.artifact.network` if the `artifact.relation` and `author.relation` is configured to be `commit.interaction` (PR #252, d82857fbebd1111bb16588a4223bb24a8dcd07de, 329d97ec3de36a9e1bcadc0c7a53c1d92e8b481c) as well as tests for these features (PR #252, 07e7ed744209b0251217fa8f7f35d9b9875face2, 7068cfa10d993dcae3f5e3f76f8cafa99fa8b350) ### Changed/Improved From bca35760eb0aac86c04923f2d534b2d8cece204e Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Thu, 18 Apr 2024 13:29:50 +0200 Subject: [PATCH 34/38] Add Configuration for filtering commit interactions Filtering happens in 'get.commit.interactions' if 'filter.commit.interactions' is TRUE, as it is per default. Signed-off-by: Leo Sendelbach --- README.md | 3 +++ tests/test-data.R | 5 +++++ tests/test-networks-artifact.R | 2 ++ tests/test-networks-author.R | 1 + util-conf.R | 6 ++++++ util-data.R | 5 +++++ 6 files changed, 22 insertions(+) diff --git a/README.md b/README.md index 3fb9d44f..1c6e78fc 100644 --- a/README.md +++ b/README.md @@ -607,6 +607,9 @@ There is no way to update the entries, except for the revision-based parameters. - `commit.interactions`: * Allow construction of author and artifact networks using commit-interaction data * [`TRUE`, *`FALSE`*] +- `filter.commit.interactions`: + * Filter out entries from commit interaction data that are not matched to a specific function or file + * [*`TRUE`*, `FALSE`] ### NetworkConf diff --git a/tests/test-data.R b/tests/test-data.R index e4bea8c4..893661fb 100644 --- a/tests/test-data.R +++ b/tests/test-data.R @@ -526,6 +526,7 @@ test_that("Compare two ProjectData Objects with commit.interactions", { proj.conf$update.value("commit.interactions", TRUE) proj.conf$update.value("commits.filter.untracked.files", FALSE) proj.conf$update.value("commits.filter.base.artifact", FALSE) + proj.conf$update.value("filter.commit.interactions", FALSE) proj.data.one = ProjectData$new(project.conf = proj.conf) proj.data.two = proj.data.one$clone(deep = TRUE) @@ -588,4 +589,8 @@ test_that("Compare two ProjectData Objects with commit.interactions", { commit.interactions.data.expected[["base.file"]] = c("test2.c", "test2.c", "test3.c", "test2.c") expect_equal(proj.data.two$get.commit.interactions(), commit.interactions.data.expected) + + ## reactivate filtering of commit interactions + proj.data.two$set.project.conf.entry("filter.commit.interactions", TRUE) + expect_true(nrow(proj.data.two$get.commit.interactions()) == 2) }) diff --git a/tests/test-networks-artifact.R b/tests/test-networks-artifact.R index 122c96ee..67d1ec6c 100644 --- a/tests/test-networks-artifact.R +++ b/tests/test-networks-artifact.R @@ -219,6 +219,7 @@ patrick::with_parameters_test_that("Network construction with commit-interaction proj.conf$update.value("commit.interactions", TRUE) proj.conf$update.value("commits.filter.untracked.files", FALSE) proj.conf$update.value("commits.filter.base.artifact", FALSE) + proj.conf$update.value("filter.commit.interactions", FALSE) proj.data = ProjectData$new(project.conf = proj.conf) net.conf = NetworkConf$new() @@ -267,6 +268,7 @@ patrick::with_parameters_test_that("Network construction with commit-interaction proj.conf$update.value("commit.interactions", TRUE) proj.conf$update.value("commits.filter.untracked.files", FALSE) proj.conf$update.value("commits.filter.base.artifact", FALSE) + proj.conf$update.value("filter.commit.interactions", FALSE) proj.data = ProjectData$new(project.conf = proj.conf) net.conf = NetworkConf$new() diff --git a/tests/test-networks-author.R b/tests/test-networks-author.R index 9ffa3472..04d2c392 100644 --- a/tests/test-networks-author.R +++ b/tests/test-networks-author.R @@ -685,6 +685,7 @@ patrick::with_parameters_test_that("Network construction with commit-interaction proj.conf$update.value("commit.interactions", TRUE) proj.conf$update.value("commits.filter.untracked.files", FALSE) proj.conf$update.value("commits.filter.base.artifact", FALSE) + proj.conf$update.value("filter.commit.interactions", FALSE) proj.data = ProjectData$new(project.conf = proj.conf) net.conf = NetworkConf$new() diff --git a/util-conf.R b/util-conf.R index 64a9ed79..12bdca19 100644 --- a/util-conf.R +++ b/util-conf.R @@ -474,6 +474,12 @@ ProjectConf = R6::R6Class("ProjectConf", inherit = Conf, allowed = c(TRUE, FALSE), allowed.number = 1 ), + filter.commit.interactions = list( + default = TRUE, + type = "logical", + allowed = c(TRUE, FALSE), + allowed.number = 1 + ), custom.event.timestamps.file = list( default = NA, type = "character", diff --git a/util-data.R b/util-data.R index 0443b183..673a19d8 100644 --- a/util-data.R +++ b/util-data.R @@ -1273,6 +1273,11 @@ ProjectData = R6::R6Class("ProjectData", commit.interaction.data = read.commit.interactions(data.path) } + ## filter commit interactions if configured + if (private$project.conf$get.value("filter.commit.interactions")) { + commit.interaction.data = subset(commit.interaction.data, + file != COMMIT.INTERACTION.GLOBAL.FILE.FUNCTION.NAME) + } ## cache the result private$commit.interactions = commit.interaction.data private$update.commit.interactions() From f8ea987b138173cf0509c7910e0572d8ee1b3f1f Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Thu, 18 Apr 2024 14:46:45 +0200 Subject: [PATCH 35/38] Add helper function for prefixing function names Helper function 'prefix.function.with.file.names' in 'util-read.R' Signed-off-by: Leo Sendelbach --- util-read.R | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/util-read.R b/util-read.R index 892c59cd..f5261a3f 100644 --- a/util-read.R +++ b/util-read.R @@ -70,6 +70,16 @@ remove.deleted.and.empty.user = function(data, columns = c("author.name")) { return(data) } +#' Concatenation of function and file names, e.g. 'file::function' +#' +#' @param file.name the name of the file +#' @param function.name the name of the function +#' +#' @return the concatenated function name +prefix.function.with.file.names = function(file.name, function.name) { + return(paste(file.name, function.name, sep = "::")) +} + ## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / ## Main data sources ------------------------------------------------------- @@ -168,7 +178,7 @@ read.commits = function(data.path, artifact) { ## (we have proximity-based data as foundation) if (artifact == "function") { ## artifact = file name + "::" . function name - artifacts.new = paste(commit.data[["file"]], commit.data[["artifact"]], sep = "::") + artifacts.new = prefix.function.with.file.names(commit.data[["file"]], commit.data[["artifact"]]) ## clean up empty artifacts and File_Level artifact artifacts.new = gsub("^::$", "", artifacts.new) @@ -928,13 +938,13 @@ read.commit.interactions = function(data.path = NULL) { return(data.frame(matrix(nrow = 3, ncol = 0))) } else { file.name = file.name.map$get(hash[["function"]]) - func.name = paste(file.name, hash[("function")], sep = "::") + func.name = prefix.function.with.file.names(file.name, hash[("function")]) return(data.frame(func = func.name, commit.hash = hash[["commit"]], file = file.name)) } }))) base.file.name = file.name.map$get(function.name) interacting.hashes.df[["base.hash"]] = base.hash - interacting.hashes.df[["base.func"]] = paste(base.file.name, function.name, sep = "::") + interacting.hashes.df[["base.func"]] = prefix.function.with.file.names(base.file.name, function.name) interacting.hashes.df[["base.file"]] = base.file.name return(interacting.hashes.df) }))) From 7d8be96d6a55eebd96c8e8ee609dd52c637cda9e Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Thu, 18 Apr 2024 14:51:18 +0200 Subject: [PATCH 36/38] Change 'NEWS.md' to include new commits Now also contains an entry for new helper method Signed-off-by: Leo Sendelbach --- NEWS.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 2b28ff65..7df8b15f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,8 +6,9 @@ ### Added -- Add commit-interaction data and add functions `read.commit.interactions` for reading, as well as `get.commit.interactions`, `set.commit.interactions` and utility functions for working with commit-interaction data (PR #252, d82857fbebd1111bb16588a4223bb24a8dcd07de, b4fd2a29c9b5fd561b1106c6febb54a32b0085ab, fd0aa05f824b93545ae8e05833b95b3bd9809286) as well as tests for these features (PR #252, eeba7e29932bc973513c963fb9e716e9230d570f, 8bb39f4df39b49dfaff8f19feb6db5e5fbd81fac, 54b6f655248720436af116fe72521f9cb0348429, 7a5497aaf9114017d1b3b9b68b6cccd7ca8ac114, 7b8585f87675795822c07230192d6454de31dcc7, ef725407bf8818c8fff96ea6f343338b7162cbe0) +- Add commit-interaction data and add functions `read.commit.interactions` for reading, as well as `get.commit.interactions`, `set.commit.interactions` and utility functions for working with commit-interaction data (PR #252, d82857fbebd1111bb16588a4223bb24a8dcd07de, b4fd2a29c9b5fd561b1106c6febb54a32b0085ab, fd0aa05f824b93545ae8e05833b95b3bd9809286, bca35760eb0aac86c04923f2d534b2d8cece204e) as well as tests for these features (PR #252, eeba7e29932bc973513c963fb9e716e9230d570f, 8bb39f4df39b49dfaff8f19feb6db5e5fbd81fac, 54b6f655248720436af116fe72521f9cb0348429, 7a5497aaf9114017d1b3b9b68b6cccd7ca8ac114, 7b8585f87675795822c07230192d6454de31dcc7, ef725407bf8818c8fff96ea6f343338b7162cbe0) - Add commit-interaction networks that can be created with `create.author.network` and `create.artifact.network` if the `artifact.relation` and `author.relation` is configured to be `commit.interaction` (PR #252, d82857fbebd1111bb16588a4223bb24a8dcd07de, 329d97ec3de36a9e1bcadc0c7a53c1d92e8b481c) as well as tests for these features (PR #252, 07e7ed744209b0251217fa8f7f35d9b9875face2, 7068cfa10d993dcae3f5e3f76f8cafa99fa8b350) +- Add helper function for prefixing function names with file names in `util-read.R` (PR #252, f8ea987b138173cf0509c7910e0572d8ee1b3f1f) ### Changed/Improved From b8857cf64006fffce262f3d109471ae2c2003e7b Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Tue, 23 Apr 2024 12:01:04 +0200 Subject: [PATCH 37/38] Change some comments and variable names Helper function is now called `prefix.function.with.file.name` and config parameter is called `commit.interactions.filter.global`. Signed-off-by: Leo Sendelbach --- README.md | 2 +- tests/test-data.R | 4 ++-- tests/test-networks-artifact.R | 4 ++-- tests/test-networks-author.R | 2 +- util-conf.R | 2 +- util-data.R | 2 +- util-read.R | 10 +++++----- 7 files changed, 13 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 1c6e78fc..e8bc0877 100644 --- a/README.md +++ b/README.md @@ -607,7 +607,7 @@ There is no way to update the entries, except for the revision-based parameters. - `commit.interactions`: * Allow construction of author and artifact networks using commit-interaction data * [`TRUE`, *`FALSE`*] -- `filter.commit.interactions`: +- `commit.interactions.filter.global`: * Filter out entries from commit interaction data that are not matched to a specific function or file * [*`TRUE`*, `FALSE`] diff --git a/tests/test-data.R b/tests/test-data.R index 893661fb..aa665ac4 100644 --- a/tests/test-data.R +++ b/tests/test-data.R @@ -526,7 +526,7 @@ test_that("Compare two ProjectData Objects with commit.interactions", { proj.conf$update.value("commit.interactions", TRUE) proj.conf$update.value("commits.filter.untracked.files", FALSE) proj.conf$update.value("commits.filter.base.artifact", FALSE) - proj.conf$update.value("filter.commit.interactions", FALSE) + proj.conf$update.value("commit.interactions.filter.global", FALSE) proj.data.one = ProjectData$new(project.conf = proj.conf) proj.data.two = proj.data.one$clone(deep = TRUE) @@ -591,6 +591,6 @@ test_that("Compare two ProjectData Objects with commit.interactions", { expect_equal(proj.data.two$get.commit.interactions(), commit.interactions.data.expected) ## reactivate filtering of commit interactions - proj.data.two$set.project.conf.entry("filter.commit.interactions", TRUE) + proj.data.two$set.project.conf.entry("commit.interactions.filter.global", TRUE) expect_true(nrow(proj.data.two$get.commit.interactions()) == 2) }) diff --git a/tests/test-networks-artifact.R b/tests/test-networks-artifact.R index 67d1ec6c..79251c60 100644 --- a/tests/test-networks-artifact.R +++ b/tests/test-networks-artifact.R @@ -219,7 +219,7 @@ patrick::with_parameters_test_that("Network construction with commit-interaction proj.conf$update.value("commit.interactions", TRUE) proj.conf$update.value("commits.filter.untracked.files", FALSE) proj.conf$update.value("commits.filter.base.artifact", FALSE) - proj.conf$update.value("filter.commit.interactions", FALSE) + proj.conf$update.value("commit.interactions.filter.global", FALSE) proj.data = ProjectData$new(project.conf = proj.conf) net.conf = NetworkConf$new() @@ -268,7 +268,7 @@ patrick::with_parameters_test_that("Network construction with commit-interaction proj.conf$update.value("commit.interactions", TRUE) proj.conf$update.value("commits.filter.untracked.files", FALSE) proj.conf$update.value("commits.filter.base.artifact", FALSE) - proj.conf$update.value("filter.commit.interactions", FALSE) + proj.conf$update.value("commit.interactions.filter.global", FALSE) proj.data = ProjectData$new(project.conf = proj.conf) net.conf = NetworkConf$new() diff --git a/tests/test-networks-author.R b/tests/test-networks-author.R index 04d2c392..8f9dd11b 100644 --- a/tests/test-networks-author.R +++ b/tests/test-networks-author.R @@ -685,7 +685,7 @@ patrick::with_parameters_test_that("Network construction with commit-interaction proj.conf$update.value("commit.interactions", TRUE) proj.conf$update.value("commits.filter.untracked.files", FALSE) proj.conf$update.value("commits.filter.base.artifact", FALSE) - proj.conf$update.value("filter.commit.interactions", FALSE) + proj.conf$update.value("commit.interactions.filter.global", FALSE) proj.data = ProjectData$new(project.conf = proj.conf) net.conf = NetworkConf$new() diff --git a/util-conf.R b/util-conf.R index 12bdca19..ff345c00 100644 --- a/util-conf.R +++ b/util-conf.R @@ -474,7 +474,7 @@ ProjectConf = R6::R6Class("ProjectConf", inherit = Conf, allowed = c(TRUE, FALSE), allowed.number = 1 ), - filter.commit.interactions = list( + commit.interactions.filter.global = list( default = TRUE, type = "logical", allowed = c(TRUE, FALSE), diff --git a/util-data.R b/util-data.R index 673a19d8..988146a5 100644 --- a/util-data.R +++ b/util-data.R @@ -1274,7 +1274,7 @@ ProjectData = R6::R6Class("ProjectData", } ## filter commit interactions if configured - if (private$project.conf$get.value("filter.commit.interactions")) { + if (private$project.conf$get.value("commit.interactions.filter.global")) { commit.interaction.data = subset(commit.interaction.data, file != COMMIT.INTERACTION.GLOBAL.FILE.FUNCTION.NAME) } diff --git a/util-read.R b/util-read.R index f5261a3f..f4fe7025 100644 --- a/util-read.R +++ b/util-read.R @@ -70,13 +70,13 @@ remove.deleted.and.empty.user = function(data, columns = c("author.name")) { return(data) } -#' Concatenation of function and file names, e.g. 'file::function' +#' Concatenate function and file name, i.e. 'file::function' #' #' @param file.name the name of the file #' @param function.name the name of the function #' #' @return the concatenated function name -prefix.function.with.file.names = function(file.name, function.name) { +prefix.function.with.file.name = function(file.name, function.name) { return(paste(file.name, function.name, sep = "::")) } @@ -178,7 +178,7 @@ read.commits = function(data.path, artifact) { ## (we have proximity-based data as foundation) if (artifact == "function") { ## artifact = file name + "::" . function name - artifacts.new = prefix.function.with.file.names(commit.data[["file"]], commit.data[["artifact"]]) + artifacts.new = prefix.function.with.file.name(commit.data[["file"]], commit.data[["artifact"]]) ## clean up empty artifacts and File_Level artifact artifacts.new = gsub("^::$", "", artifacts.new) @@ -938,13 +938,13 @@ read.commit.interactions = function(data.path = NULL) { return(data.frame(matrix(nrow = 3, ncol = 0))) } else { file.name = file.name.map$get(hash[["function"]]) - func.name = prefix.function.with.file.names(file.name, hash[("function")]) + func.name = prefix.function.with.file.name(file.name, hash[("function")]) return(data.frame(func = func.name, commit.hash = hash[["commit"]], file = file.name)) } }))) base.file.name = file.name.map$get(function.name) interacting.hashes.df[["base.hash"]] = base.hash - interacting.hashes.df[["base.func"]] = prefix.function.with.file.names(base.file.name, function.name) + interacting.hashes.df[["base.func"]] = prefix.function.with.file.name(base.file.name, function.name) interacting.hashes.df[["base.file"]] = base.file.name return(interacting.hashes.df) }))) From ee54b1afb5ef25743d675ef7650037a1f02efd29 Mon Sep 17 00:00:00 2001 From: Leo Sendelbach Date: Tue, 23 Apr 2024 12:44:52 +0200 Subject: [PATCH 38/38] Add missing copyright headers Copyright headers in `install.R` and `util-conf` Signed-off-by: Leo Sendelbach --- install.R | 1 + util-conf.R | 1 + 2 files changed, 2 insertions(+) diff --git a/install.R b/install.R index 94d403d9..5a8d5743 100644 --- a/install.R +++ b/install.R @@ -19,6 +19,7 @@ ## Copyright 2020-2023 by Thomas Bock ## Copyright 2019 by Anselm Fehnker ## Copyright 2021 by Christian Hechtl +## Copyright 2024 by Leo Sendelbach ## All Rights Reserved. ## ## Adapted from https://github.com/siemens/codeface/blob/be382e9171fb91b4aa99b99b09b2ef64a6dba0d5/packages.r diff --git a/util-conf.R b/util-conf.R index ff345c00..9ae2fd73 100644 --- a/util-conf.R +++ b/util-conf.R @@ -26,6 +26,7 @@ ## Copyright 2021 by Johannes Hostert ## Copyright 2021 by Mirabdulla Yusifli ## Copyright 2022 by Jonathan Baumann +## Copyright 2024 by Leo Sendelbach ## All Rights Reserved.