From aed6ee98e8cb60363947ad3ec27abc1102e8abc3 Mon Sep 17 00:00:00 2001 From: KrystynaGrzesiak Date: Mon, 22 Jul 2024 11:21:22 +0200 Subject: [PATCH] adds continuous target, updates tests and fixes some small bugs --- R/sequences.R | 49 +++++++++++++------- kmerFilters_fork2.Rproj => kmerFilters.Rproj | 0 man/get_target_additive.Rd | 10 +++- man/get_target_interactions.Rd | 5 +- man/get_target_logic.Rd | 11 +++-- tests/testthat/test-sequences.R | 44 ++++++++++++++++-- 6 files changed, 94 insertions(+), 25 deletions(-) rename kmerFilters_fork2.Rproj => kmerFilters.Rproj (100%) diff --git a/R/sequences.R b/R/sequences.R index d27c477..7c565a3 100644 --- a/R/sequences.R +++ b/R/sequences.R @@ -198,6 +198,8 @@ generate_kmer_data <- function(n_seq, #' @param zero_weight a single value denoting the weight of no-motifs case. If #' \code{NULL}, then we sample the weight from the uniform distribution on the #' [-2, -1] interval. Default to \code{NULL}. +#' @param binary logical, indicating whether the produced target variable should +#' be binary or continuous. #' #' @return a binary vector of target variable sampled based on interaction model #' and provided/calculated probabilities. @@ -229,7 +231,8 @@ generate_kmer_data <- function(n_seq, #' @export get_target_interactions <- function(kmer_dat, - zero_weight = NULL) { + zero_weight = NULL, + binary = TRUE) { target <- attr(kmer_dat, "target") n_motifs <- length(attr(kmer_dat, "motifs_set")) @@ -254,9 +257,12 @@ get_target_interactions <- function(kmer_dat, target[target] <- target_weights[target] target[!target] <- zero_weight - probs <- exp(target)/(1 + exp(target)) - - rbinom_vec(probs) + if(binary) { + probs <- exp(target)/(1 + exp(target)) + rbinom_vec(probs) + } else { + target + } } #' Logistic regression response @@ -303,7 +309,8 @@ get_target_interactions <- function(kmer_dat, get_target_additive <- function(kmer_dat, weights = NULL, - zero_weight = NULL) { + zero_weight = NULL, + binary = TRUE) { if(length(weights) != length(attr(kmer_dat, "motifs_set")) & !is.null(weights)) stop("The length of weights vector should equal number of motifs!") @@ -322,9 +329,12 @@ get_target_additive <- function(kmer_dat, target[target] <- target_weights[target] target[!target] <- zero_weight - probs <- exp(target)/(1 + exp(target)) - - rbinom_vec(probs) + if(binary) { + probs <- exp(target)/(1 + exp(target)) + rbinom_vec(probs) + } else { + target + } } @@ -358,8 +368,9 @@ get_target_additive <- function(kmer_dat, #' Default to 3. #' #' @param expressions a matrix of binary variables corresponding to custom -#' logic expressions. It's dimension should be related to the length of -#' \code{weights} vector if it's provided. Default to \code{NULL}. +#' logic expressions. You can create them based on motifs. It's dimension should +#' be related to the length of \code{weights} vector if it's provided. Default +#' to \code{NULL}. If \code{NULL}, random logic expressions will be created. #' #' @details #' Here, we consider new variables, \eqn{L_1, \ldots, L_l} where each of them @@ -391,7 +402,8 @@ get_target_logic <- function(kmer_dat, weights = NULL, n_exp = NULL, max_exp_depth = NULL, - expressions = NULL) { + expressions = NULL, + binary = TRUE) { motifs_set <- attr(kmer_dat, "motifs_set") motifs_map <- attr(kmer_dat, "motifs_map") @@ -405,7 +417,7 @@ get_target_logic <- function(kmer_dat, if(is.null(max_exp_depth)) max_exp_depth <- min(length(motifs_set) - 1, 3) if(is.null(weights)) - weights <- runif(n_exp) + weights <- runif(ifelse(is.null(expressions), n_exp, ncol(expressions))) if(is.null(zero_weight)) zero_weight <- runif(1, -2, -1) @@ -430,16 +442,19 @@ get_target_logic <- function(kmer_dat, as.numeric(eval(parse(text = do.call(sprintf, expr_params)))) }) - } - target_weights <- expressions_motifs %*% weights + target_weights <- expressions_motifs %*% weights + } target[target] <- target_weights[target] target[!target] <- zero_weight - probs <- exp(target)/(1 + exp(target)) - - rbinom_vec(probs) + if(binary) { + probs <- exp(target)/(1 + exp(target)) + rbinom_vec(probs) + } else { + target + } } diff --git a/kmerFilters_fork2.Rproj b/kmerFilters.Rproj similarity index 100% rename from kmerFilters_fork2.Rproj rename to kmerFilters.Rproj diff --git a/man/get_target_additive.Rd b/man/get_target_additive.Rd index 0dab782..15abbf4 100644 --- a/man/get_target_additive.Rd +++ b/man/get_target_additive.Rd @@ -4,7 +4,12 @@ \alias{get_target_additive} \title{Logistic regression response} \usage{ -get_target_additive(kmer_dat, weights = NULL, zero_weight = NULL) +get_target_additive( + kmer_dat, + weights = NULL, + zero_weight = NULL, + binary = TRUE +) } \arguments{ \item{kmer_dat}{output of \code{\link{generate_kmer_data}}} @@ -21,6 +26,9 @@ calculated based on the formula provided in details section. Default to \item{zero_weight}{a single value denoting the weight of no-motifs case. If \code{NULL}, then we sample the weight from the uniform distribution on the [-2, -1] interval. Default to \code{NULL}.} + +\item{binary}{logical, indicating whether the produced target variable should +be binary or continuous.} } \value{ a binary vector of target variable sampled based on additive model. diff --git a/man/get_target_interactions.Rd b/man/get_target_interactions.Rd index f2ed315..faf3871 100644 --- a/man/get_target_interactions.Rd +++ b/man/get_target_interactions.Rd @@ -4,7 +4,7 @@ \alias{get_target_interactions} \title{Logistic regression with interactions} \usage{ -get_target_interactions(kmer_dat, zero_weight = NULL) +get_target_interactions(kmer_dat, zero_weight = NULL, binary = TRUE) } \arguments{ \item{kmer_dat}{output of \code{\link{generate_kmer_data}}} @@ -12,6 +12,9 @@ get_target_interactions(kmer_dat, zero_weight = NULL) \item{zero_weight}{a single value denoting the weight of no-motifs case. If \code{NULL}, then we sample the weight from the uniform distribution on the [-2, -1] interval. Default to \code{NULL}.} + +\item{binary}{logical, indicating whether the produced target variable should +be binary or continuous.} } \value{ a binary vector of target variable sampled based on interaction model diff --git a/man/get_target_logic.Rd b/man/get_target_logic.Rd index 39e5e1c..1191210 100644 --- a/man/get_target_logic.Rd +++ b/man/get_target_logic.Rd @@ -11,7 +11,8 @@ get_target_logic( weights = NULL, n_exp = NULL, max_exp_depth = NULL, - expressions = NULL + expressions = NULL, + binary = TRUE ) } \arguments{ @@ -39,8 +40,12 @@ when \code{random} equals \code{TRUE}.} Default to 3.} \item{expressions}{a matrix of binary variables corresponding to custom -logic expressions. It's dimension should be related to the length of -\code{weights} vector if it's provided. Default to \code{NULL}.} +logic expressions. You can create them based on motifs. It's dimension should +be related to the length of \code{weights} vector if it's provided. Default +to \code{NULL}. If \code{NULL}, random logic expressions will be created.} + +\item{binary}{logical, indicating whether the produced target variable should +be binary or continuous.} } \description{ This function samples target variable according to the logic regression diff --git a/tests/testthat/test-sequences.R b/tests/testthat/test-sequences.R index e5f1487..0e6ed71 100644 --- a/tests/testthat/test-sequences.R +++ b/tests/testthat/test-sequences.R @@ -116,6 +116,16 @@ test_that("Interaction model works", { c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L), get_target_interactions(results) ) + + expect_equal( + c(3.2669325505849, 3.2669325505849, 0.824915252858773, 3.2669325505849, + 0.824915252858773, 1.01497719204053, 0.745780731318519, + 2.05459328205325, 1.16902287956327, 1.77926696231589, + -1.69982326356694, -1.69982326356694, -1.69982326356694, + -1.69982326356694, -1.69982326356694, -1.69982326356694, + -1.69982326356694, -1.69982326356694, -1.69982326356694, + -1.69982326356694), get_target_interactions(results, binary = FALSE) + ) }) @@ -135,12 +145,18 @@ test_that("Additive model works", { 0L, 0L, 0L, 0L, 1L), get_target_additive(results) ) - #test if conditions work + expect_equal( + c(2.05890567833558, 2.05890567833558, 0.39565183990635), + get_target_additive(results, binary = FALSE)[1:3] + ) + expect_error(get_target_additive(results, weights = 0.1), "The length of weights vector should equal number of motifs!") }) + + test_that("Logic model works", { set.seed(1) @@ -155,18 +171,40 @@ test_that("Logic model works", { c(0L, 0L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 1L), get_target_logic(results) ) + + expect_equal( + c(0.854367734864354, 0.854367734864354, 0.854367734864354, + 0.854367734864354), get_target_logic(results, binary = FALSE)[1:4] + ) + + expressions = matrix(rbinom(n_seq*2, 1, .5), nrow = n_seq) + + expect_error( + get_target_logic(results, expressions = expressions, weights = 1), + "You have to provide weight for each column" + ) + + expect_identical( + c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, + 0L, 0L, 0L, 0L, 0L), + get_target_logic(results, expressions = expressions, weights = c(1, 2)) + ) + + motifs <- generate_motifs(alph, 1, 1, 4, 6) + results <- generate_kmer_data(n_seq, sequence_length, alph, + motifs, n_injections = 1) + + expect_error(get_target_logic(results), "You need at least 2 motifs") }) test_that("rbinom_vec works", { set.seed(1) - probs <- runif(10) expect_identical(rbinom_vec(probs), c(0L, 0L, 0L, 1L, 0L, 1L, 1L, 0L, 1L, 0L)) - expect_error(rbinom_vec(2),"Provided probabilities should be greater") })