Skip to content

Commit

Permalink
Merge pull request #255 from MaLoefUDS/dev
Browse files Browse the repository at this point in the history
Work on the simplification of multi-relation edges in networks

Reviewed-by: Thomas Bock <bockthom@cs.uni-saarland.de>
  • Loading branch information
bockthom authored Mar 22, 2024
2 parents 56ff0b3 + b017092 commit 1d3d1a3
Show file tree
Hide file tree
Showing 6 changed files with 260 additions and 12 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/pull_request.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
## with this program; if not, write to the Free Software Foundation, Inc.,
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
##
## Copyright 2023 by Maximilian Löffler <s8maloef@cs.uni-saarland.de>
## Copyright 2023-2024 by Maximilian Löffler <s8maloef@cs.uni-saarland.de>
## Copyright 2024 by Thomas Bock <bockthom@cs.uni-saarland.de>
## All Rights Reserved.

Expand Down Expand Up @@ -41,7 +41,7 @@ jobs:

steps:
- name: Checkout Repo
uses: actions/checkout@v3
uses: actions/checkout@v4

- name: Update system
run: |
Expand Down
4 changes: 3 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@
- Add a new `split.data.by.bins` function (not to be confused with a previously existing function that had the same name and was renamed in this context), which splits data based on given activity-based bins (PR #244, ece569ceaf557bb38cd0cfad437b69b30fe8a698, ed5feb214a123b605c9513262f187cfd72b9e1f4)
- Add new `assert.sparse.matrices.equal` function to compare two sparse matrices for equality for testing purposes (PR #248, 9784cdf12d1497ee122e2ae73b768b8c334210d4, d9f1a8d90e00a634d7caeb5e7f8f262776496838)
- Add tests for file `util-networks-misc.R` for issue #242 (PR #248, f3202a6f96723d11c170346556d036cf087521c8, 030574b9d0f3435db4032d0e195a3d407fb7244b, 380b02234275127297fcd508772c69db21c216de, 8b803c50d60fc593e4e527a08fd4c2068d801a48, 7335c3dd4d0302b024a66d18701d9800ed3fe806, 6b600df04bec1fe70c272604f274ec5309840e65)
- Add the possibility to simplify edges of multiple-relation networks into a single edge at all instead of a single edge per relation (PR #250, 2105ea89b5227e7c9fa78fea9de1977f2d9e8faa)
- Add the possibility to simplify edges of multiple-relation networks into a single edge at all instead of a single edge per relation (PR #250, PR #255, 2105ea89b5227e7c9fa78fea9de1977f2d9e8faa, a34b5bd50351b9ccf3cc45fc323cfa2e84d65ea0, 34516415fed599eba0cc7d3cc4a9acd6b26db252, 78f43514962d7651e6b7a1e80ee22ce012f32535, d310fdc38690f0d701cd32c92112c33f7fdde0ff, 58d77b01ecc6a237104a4e72ee5fb9025efeaaf2)
- Add tests for network simplification (PR #255, 338b06941eec1c9cfdb121e78ce0d9db6b75da19, 8a6f47bc115c10fbbe4eee21985d97aee5c9dc91, e01908c94eccc4dda5f2b3c0746b0eab0172dc07, 7b6848fb86f69db088ce6ef2bea8315ac94d48f9)
- Add `get.bin.dates.from.ranges` function to convert date ranges into bins format (PR #249, a1842e9be46596321ee86860fd87d17a3c88f50f, 858b1812ebfc3194cc6a03c99f3ee7d161d1ca15)
- Add network simplification to showcase file (PR #255, dc32d44f9db7dfc3cc795ef5d6b86609d6c1936f)

### Changed/Improved

Expand Down
19 changes: 19 additions & 0 deletions showcase.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
## Copyright 2021 by Johannes Hostert <s8johost@stud.uni-saarland.de>
## Copyright 2021 by Niklas Schneider <s8nlschn@stud.uni-saarland.de>
## Copyright 2022 by Jonathan Baumann <joba00002@stud.uni-saarland.de>
## Copyright 2024 by Maximilian Löffler <s8maloef@stud.uni-saarland.de>
## All Rights Reserved.


Expand Down Expand Up @@ -361,6 +362,24 @@ g.motifs = motifs.count(network = g,
remove.duplicates = TRUE, raw.data = FALSE)


## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / /
## Network simplification --------------------------------------------------

## construct sample network
g = y$get.multi.network()
g = igraph::delete_edges(g, c(5, 6))
g = igraph::delete_vertices(g, c(2, 4, 5, 6, 7, 8))
g = g + igraph::edges(c("Björn", "Olaf", "Björn", "Olaf"), type = TYPE.EDGES.INTRA, weight = 1,
relation = "cochange", artifact.type = "Feature")

## merge edges between vertice pairs that stem from the same data source
g.simplified = simplify.network(g)
plot.network(g.simplified)

## merge all edges between vertice pairs
g.simplified = simplify.network(g, simplify.multiple.relations = TRUE)
plot.network(g.simplified)

## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / /
## Plots -------------------------------------------------------------------

Expand Down
214 changes: 214 additions & 0 deletions tests/test-networks.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
##
## Copyright 2018-2019 by Claus Hunsen <hunsen@fim.uni-passau.de>
## Copyright 2021 by Niklas Schneider <s8nlschn@stud.uni-saarland.de>
## Copyright 2024 by Maximilian Löffler <s8maloef@stud.uni-saarland.de>
## All Rights Reserved.


Expand Down Expand Up @@ -78,6 +79,198 @@ test_that("Simplify network with more than one relation", {

})

test_that("Simplify basic multi-relational network", {

##
## Simplify networks with vertices connected by multi-relational edges
##

## create artifact network with vertices connected by "cochange" and "mail" edges
network =
igraph::make_empty_graph(n = 0, directed = FALSE) +
igraph::vertices("A", "B", type = TYPE.ARTIFACT, kind = "feature")
for (i in 1:3) {
network = igraph::add.edges(network, c("A", "B"), type = TYPE.EDGES.INTRA, relation = "mail")
network = igraph::add.edges(network, c("A", "B"), type = TYPE.EDGES.INTRA, relation = "cochange")
}

network.expected = igraph::make_empty_graph(n = 0, directed = FALSE) +
igraph::vertices("A", "B", type = TYPE.ARTIFACT, kind = "feature") +
igraph::edges("A", "B", type = TYPE.EDGES.INTRA, relation = "mail") +
igraph::edges("A", "B", type = TYPE.EDGES.INTRA, relation = "cochange")

## simplify network without simplifying multiple relations into single edges
network.simplified = simplify.network(network, simplify.multiple.relations = FALSE)
assert.networks.equal(network.simplified, network.expected)

## simplify network with simplifying multiple relations into single edges
network.simplified = simplify.network(network, simplify.multiple.relations = TRUE)
expect_identical(igraph::ecount(simplify.network(network.simplified)), 1)
expect_identical(igraph::E(network.simplified)$type[[1]], "Unipartite")
expect_identical(igraph::E(network.simplified)$relation[[1]], c("cochange", "mail"))
})

test_that("Simplify author-network with relation = c('cochange', 'mail') using both algorithms", {

## configurations
proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT)
proj.conf$update.value("commits.filter.base.artifact", FALSE)
net.conf = NetworkConf$new()
net.conf$update.values(updated.values = list(author.relation = c("cochange", "mail"), simplify = TRUE))

## construct objects
proj.data = ProjectData$new(project.conf = proj.conf)
network.builder = NetworkBuilder$new(project.data = proj.data, network.conf = net.conf)

## vertex attributes
authors = data.frame(name = c("Björn", "Olaf", "Karl", "Thomas", "udo", "Fritz fritz@example.org", "georg", "Hans"),
kind = TYPE.AUTHOR,
type = TYPE.AUTHOR)


## ---------------------- simplify.multiple.relations == FALSE -------------------------- ##

## edge attributes
data = data.frame(comb.1. = c("Björn", "Olaf", "Olaf", "Karl", # cochange
"Björn", "Olaf"), # mail
comb.2. = c("Olaf", "Karl", "Thomas", "Thomas", # cochange
"Olaf", "Thomas")) # mail
data$date = list(get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45")),
get.date.from.string(c("2016-07-12 16:05:41", "2016-07-12 16:06:10")),
get.date.from.string(c("2016-07-12 16:05:41", "2016-07-12 16:06:32")),
get.date.from.string(c("2016-07-12 16:06:10", "2016-07-12 16:06:32")), # cochange
get.date.from.string(c("2016-07-12 15:58:40", "2016-07-12 15:58:50")),
get.date.from.string(c("2016-07-12 16:04:40", "2016-07-12 16:05:37"))) # mail
data$artifact.type = list(c("Feature", "Feature"), c("Feature", "Feature"),
c("Feature", "Feature"), c("Feature", "Feature"), # cochange
c("Mail", "Mail"), c("Mail", "Mail")) # mail
data$hash = list(c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338"),
c("3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61"),
c("3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526"),
c("1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526"),
as.character(c(NA, NA)), as.character(c(NA, NA)))
data$file = list(c("test.c", "test.c"), c("test2.c", "test3.c"), c("test2.c", "test2.c"), c("test3.c", "test2.c"),
as.character(c(NA, NA)), as.character(c(NA, NA)))
data$artifact = list(c("A", "A"), c("Base_Feature", "Base_Feature"), c("Base_Feature", "Base_Feature"),
c("Base_Feature", "Base_Feature"), as.character(c(NA, NA)), as.character(c(NA, NA)))
data$weight = rep(2, 6)
data$type = rep(TYPE.EDGES.INTRA, 6)
data$relation = c(rep("cochange", 4), rep("mail", 2))
data$message.id = list(as.character(c(NA, NA)), as.character(c(NA, NA)), as.character(c(NA, NA)), as.character(c(NA, NA)),
c("<4cbaa9ef0802201124v37f1eec8g89a412dfbfc8383a@mail.gmail.com>",
"<6784529b0802032245r5164f984l342f0f0dc94aa420@mail.gmail.com>"),
c("<65a1sf31sagd684dfv31@mail.gmail.com>",
"<9b06e8d20801220234h659c18a3g95c12ac38248c7e0@mail.gmail.com>"))
data$thread = list(as.character(c(NA, NA)), as.character(c(NA, NA)), as.character(c(NA, NA)), as.character(c(NA, NA)),
c("<thread-13#8>", "<thread-13#8>"), c("<thread-13#9>", "<thread-13#9>"))

## build expected network
network.expected = igraph::graph.data.frame(data, vertices = authors,
directed = net.conf$get.value("author.directed"))

## build simplified network
network.built = network.builder$get.author.network()

assert.networks.equal(network.built, network.expected)


## ---------------------- simplify.multiple.relations == TRUE --------------------------- ##

data = data.frame(comb.1. = c("Björn", "Olaf", "Olaf", "Karl"),
comb.2. = c("Olaf", "Karl", "Thomas", "Thomas"))

data$date = list(get.date.from.string(c("2016-07-12 15:58:59", "2016-07-12 16:00:45", # cochange
"2016-07-12 15:58:40", "2016-07-12 15:58:50")), # mail
get.date.from.string(c("2016-07-12 16:05:41", "2016-07-12 16:06:10")), # cochange
get.date.from.string(c("2016-07-12 16:05:41", "2016-07-12 16:06:32", # cochange
"2016-07-12 16:04:40", "2016-07-12 16:05:37")), # mail
get.date.from.string(c("2016-07-12 16:06:10", "2016-07-12 16:06:32"))) # cochange
data$artifact.type = list(c("Feature", "Feature", "Mail", "Mail"),
c("Feature", "Feature"),
c("Feature", "Feature", "Mail", "Mail"),
c("Feature", "Feature"))
data$hash = list(as.character(c("72c8dd25d3dd6d18f46e2b26a5f5b1e2e8dc28d0", "5a5ec9675e98187e1e92561e1888aa6f04faa338", NA, NA)),
c("3a0ed78458b3976243db6829f63eba3eead26774", "1143db502761379c2bfcecc2007fc34282e7ee61"),
as.character(c("3a0ed78458b3976243db6829f63eba3eead26774", "0a1a5c523d835459c42f33e863623138555e2526", NA, NA)),
c("1143db502761379c2bfcecc2007fc34282e7ee61", "0a1a5c523d835459c42f33e863623138555e2526"))
data$file = list(as.character(c("test.c", "test.c", NA, NA)), c("test2.c", "test3.c"),
as.character(c("test2.c", "test2.c", NA, NA)), c("test3.c", "test2.c"))
data$artifact = list(as.character(c("A", "A", NA, NA)), c("Base_Feature", "Base_Feature"),
as.character(c("Base_Feature", "Base_Feature", NA, NA)), c("Base_Feature", "Base_Feature"))
data$weight = c(4, 2, 4, 2)
data$type = rep(TYPE.EDGES.INTRA, 4)
data$relation = list(c("cochange", "mail"), c("cochange"), c("cochange", "mail"), c("cochange"))
data$message.id = list(as.character(c(NA, NA, "<4cbaa9ef0802201124v37f1eec8g89a412dfbfc8383a@mail.gmail.com>",
"<6784529b0802032245r5164f984l342f0f0dc94aa420@mail.gmail.com>")),
as.character(c(NA, NA)),
as.character(c(NA, NA, "<65a1sf31sagd684dfv31@mail.gmail.com>",
"<9b06e8d20801220234h659c18a3g95c12ac38248c7e0@mail.gmail.com>")),
as.character(c(NA, NA)))
data$thread = list(as.character(c(NA, NA, "<thread-13#8>", "<thread-13#8>")),
as.character(c(NA, NA)),
as.character(c(NA, NA, "<thread-13#9>", "<thread-13#9>")),
as.character(c(NA, NA)))

## build expected network
network.expected = igraph::graph.data.frame(data, vertices = authors,
directed = net.conf$get.value("author.directed"))

## build simplified network
network.builder$update.network.conf(updated.values = list(simplify.multiple.relations = TRUE))
network.built = network.builder$get.author.network()

assert.networks.equal(network.built, network.expected)

})

test_that("Simplify multiple basic multi-relational networks", {

##
## Simplify networks with vertices connected by multi-relational edges
##

## create artifact network with vertices connected by "cochange" and "mail edges"
network.A =
igraph::make_empty_graph(n = 0, directed = FALSE) +
igraph::vertices("A", "B", type = TYPE.ARTIFACT, kind = "feature")
network.B =
igraph::make_empty_graph(n = 0, directed = FALSE) +
igraph::vertices("C", "D", type = TYPE.AUTHOR, kind = TYPE.AUTHOR)
for (i in 1:3) {
network.A = igraph::add.edges(network.A, c("A", "B"), type = TYPE.EDGES.INTRA, relation = "mail")
network.A = igraph::add.edges(network.A, c("A", "B"), type = TYPE.EDGES.INTRA, relation = "cochange")
network.B = igraph::add.edges(network.B, c("C", "D"), type = TYPE.EDGES.INTRA, relation = "mail")
network.B = igraph::add.edges(network.B, c("C", "D"), type = TYPE.EDGES.INTRA, relation = "cochange")
}

network.A.expected = igraph::make_empty_graph(n = 0, directed = FALSE) +
igraph::vertices("A", "B", type = TYPE.ARTIFACT, kind = "feature") +
igraph::edges("A", "B", type = TYPE.EDGES.INTRA, relation = "mail") +
igraph::edges("A", "B", type = TYPE.EDGES.INTRA, relation = "cochange")
network.B.expected = igraph::make_empty_graph(n = 0, directed = FALSE) +
igraph::vertices("C", "D", type = TYPE.AUTHOR, kind = TYPE.AUTHOR) +
igraph::edges("C", "D", type = TYPE.EDGES.INTRA, relation = "mail") +
igraph::edges("C", "D", type = TYPE.EDGES.INTRA, relation = "cochange")
networks = list(A = network.A, B = network.B)

## simplify networks without simplifying multiple relations into single edges
networks.simplified = simplify.networks(networks, simplify.multiple.relations = FALSE)
expect_true(length(networks.simplified) == 2)
expect_identical(names(networks.simplified), names(networks))
assert.networks.equal(networks.simplified[["A"]], network.A.expected)
assert.networks.equal(networks.simplified[["B"]], network.B.expected)

## simplify networks with simplifying multiple relations into single edges
networks.simplified = simplify.networks(networks, simplify.multiple.relations = TRUE)
expect_true(length(networks.simplified) == 2)
expect_identical(names(networks.simplified), names(networks))
for (i in 1:2) {
expect_identical(igraph::ecount(simplify.network(networks.simplified[[i]])), 1)
expect_identical(igraph::E(networks.simplified[[i]])$type[[1]], "Unipartite")
expect_identical(igraph::E(networks.simplified[[i]])$relation[[1]], c("cochange", "mail"))
}
})


## / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / / /
## Merge -------------------------------------------------------------------
Expand Down Expand Up @@ -727,3 +920,24 @@ test_that("Get the data sources from a network with one relation", {

expect_identical(expected.data.sources, get.data.sources.from.relations(network), info = "data sources: mails")
})

test_that("Get the data sources from a network with multiple relations on a single edge", {
expected.data.sources = c("commits", "mails")

## configurations
proj.conf = ProjectConf$new(CF.DATA, CF.SELECTION.PROCESS, CASESTUDY, ARTIFACT)
proj.conf$update.value("commits.filter.base.artifact", FALSE)
## construct data object
proj.data = ProjectData$new(project.conf = proj.conf)

## construct network builder
net.conf = NetworkConf$new()
network.builder = NetworkBuilder$new(project.data = proj.data, network.conf = net.conf)
network.builder$update.network.conf(updated.values = list(author.relation = c("mail", "cochange")))

## build network
network = network.builder$get.author.network()
network = simplify.network(network, simplify.multiple.relations = TRUE)

expect_identical(expected.data.sources, get.data.sources.from.relations(network), info = "data sources: commits, mails")
})
Loading

0 comments on commit 1d3d1a3

Please sign in to comment.