From aed6ee98e8cb60363947ad3ec27abc1102e8abc3 Mon Sep 17 00:00:00 2001
From: KrystynaGrzesiak <krygrz11@gmail.com>
Date: Mon, 22 Jul 2024 11:21:22 +0200
Subject: [PATCH] adds continuous target, updates tests and fixes some small
 bugs

---
 R/sequences.R                                | 49 +++++++++++++-------
 kmerFilters_fork2.Rproj => kmerFilters.Rproj |  0
 man/get_target_additive.Rd                   | 10 +++-
 man/get_target_interactions.Rd               |  5 +-
 man/get_target_logic.Rd                      | 11 +++--
 tests/testthat/test-sequences.R              | 44 ++++++++++++++++--
 6 files changed, 94 insertions(+), 25 deletions(-)
 rename kmerFilters_fork2.Rproj => kmerFilters.Rproj (100%)

diff --git a/R/sequences.R b/R/sequences.R
index d27c477..7c565a3 100644
--- a/R/sequences.R
+++ b/R/sequences.R
@@ -198,6 +198,8 @@ generate_kmer_data <- function(n_seq,
 #' @param zero_weight a single value denoting the weight of no-motifs case. If
 #' \code{NULL}, then we sample the weight from the uniform distribution on the
 #' [-2, -1] interval. Default to \code{NULL}.
+#' @param binary logical, indicating whether the produced target variable should
+#' be binary or continuous.
 #'
 #' @return a binary vector of target variable sampled based on interaction model
 #' and provided/calculated probabilities.
@@ -229,7 +231,8 @@ generate_kmer_data <- function(n_seq,
 #' @export
 
 get_target_interactions <- function(kmer_dat,
-                                    zero_weight = NULL) {
+                                    zero_weight = NULL,
+                                    binary = TRUE) {
 
     target <- attr(kmer_dat, "target")
     n_motifs <- length(attr(kmer_dat, "motifs_set"))
@@ -254,9 +257,12 @@ get_target_interactions <- function(kmer_dat,
     target[target] <- target_weights[target]
     target[!target] <- zero_weight
 
-    probs <- exp(target)/(1 + exp(target))
-
-    rbinom_vec(probs)
+    if(binary) {
+        probs <- exp(target)/(1 + exp(target))
+        rbinom_vec(probs)
+    } else {
+        target
+    }
 }
 
 #' Logistic regression response
@@ -303,7 +309,8 @@ get_target_interactions <- function(kmer_dat,
 
 get_target_additive <- function(kmer_dat,
                                 weights = NULL,
-                                zero_weight = NULL) {
+                                zero_weight = NULL,
+                                binary = TRUE) {
 
     if(length(weights) != length(attr(kmer_dat, "motifs_set")) & !is.null(weights))
         stop("The length of weights vector should equal number of motifs!")
@@ -322,9 +329,12 @@ get_target_additive <- function(kmer_dat,
     target[target] <- target_weights[target]
     target[!target] <- zero_weight
 
-    probs <- exp(target)/(1 + exp(target))
-
-    rbinom_vec(probs)
+    if(binary) {
+        probs <- exp(target)/(1 + exp(target))
+        rbinom_vec(probs)
+    } else {
+        target
+    }
 }
 
 
@@ -358,8 +368,9 @@ get_target_additive <- function(kmer_dat,
 #' Default to 3.
 #'
 #' @param expressions a matrix of binary variables corresponding to custom
-#' logic expressions. It's dimension should be related to the length of
-#' \code{weights} vector if it's provided. Default to \code{NULL}.
+#' logic expressions. You can create them based on motifs. It's dimension should
+#' be related to the length of \code{weights} vector if it's provided. Default
+#' to \code{NULL}. If \code{NULL}, random logic expressions will be created.
 #'
 #' @details
 #' Here, we consider new variables, \eqn{L_1, \ldots, L_l} where each of them
@@ -391,7 +402,8 @@ get_target_logic <- function(kmer_dat,
                              weights = NULL,
                              n_exp = NULL,
                              max_exp_depth = NULL,
-                             expressions = NULL) {
+                             expressions = NULL,
+                             binary = TRUE) {
 
     motifs_set <- attr(kmer_dat, "motifs_set")
     motifs_map <- attr(kmer_dat, "motifs_map")
@@ -405,7 +417,7 @@ get_target_logic <- function(kmer_dat,
     if(is.null(max_exp_depth))
         max_exp_depth <- min(length(motifs_set) - 1, 3)
     if(is.null(weights))
-        weights <- runif(n_exp)
+        weights <- runif(ifelse(is.null(expressions), n_exp, ncol(expressions)))
     if(is.null(zero_weight))
         zero_weight <- runif(1, -2, -1)
 
@@ -430,16 +442,19 @@ get_target_logic <- function(kmer_dat,
 
             as.numeric(eval(parse(text = do.call(sprintf, expr_params))))
         })
-    }
 
-    target_weights <- expressions_motifs %*% weights
+        target_weights <- expressions_motifs %*% weights
+    }
 
     target[target] <- target_weights[target]
     target[!target] <- zero_weight
 
-    probs <- exp(target)/(1 + exp(target))
-
-    rbinom_vec(probs)
+    if(binary) {
+        probs <- exp(target)/(1 + exp(target))
+        rbinom_vec(probs)
+    } else {
+        target
+    }
 }
 
 
diff --git a/kmerFilters_fork2.Rproj b/kmerFilters.Rproj
similarity index 100%
rename from kmerFilters_fork2.Rproj
rename to kmerFilters.Rproj
diff --git a/man/get_target_additive.Rd b/man/get_target_additive.Rd
index 0dab782..15abbf4 100644
--- a/man/get_target_additive.Rd
+++ b/man/get_target_additive.Rd
@@ -4,7 +4,12 @@
 \alias{get_target_additive}
 \title{Logistic regression response}
 \usage{
-get_target_additive(kmer_dat, weights = NULL, zero_weight = NULL)
+get_target_additive(
+  kmer_dat,
+  weights = NULL,
+  zero_weight = NULL,
+  binary = TRUE
+)
 }
 \arguments{
 \item{kmer_dat}{output of \code{\link{generate_kmer_data}}}
@@ -21,6 +26,9 @@ calculated based on the formula provided in details section. Default to
 \item{zero_weight}{a single value denoting the weight of no-motifs case. If
 \code{NULL}, then we sample the weight from the uniform distribution on the
 [-2, -1] interval. Default to \code{NULL}.}
+
+\item{binary}{logical, indicating whether the produced target variable should
+be binary or continuous.}
 }
 \value{
 a binary vector of target variable sampled based on additive model.
diff --git a/man/get_target_interactions.Rd b/man/get_target_interactions.Rd
index f2ed315..faf3871 100644
--- a/man/get_target_interactions.Rd
+++ b/man/get_target_interactions.Rd
@@ -4,7 +4,7 @@
 \alias{get_target_interactions}
 \title{Logistic regression with interactions}
 \usage{
-get_target_interactions(kmer_dat, zero_weight = NULL)
+get_target_interactions(kmer_dat, zero_weight = NULL, binary = TRUE)
 }
 \arguments{
 \item{kmer_dat}{output of \code{\link{generate_kmer_data}}}
@@ -12,6 +12,9 @@ get_target_interactions(kmer_dat, zero_weight = NULL)
 \item{zero_weight}{a single value denoting the weight of no-motifs case. If
 \code{NULL}, then we sample the weight from the uniform distribution on the
 [-2, -1] interval. Default to \code{NULL}.}
+
+\item{binary}{logical, indicating whether the produced target variable should
+be binary or continuous.}
 }
 \value{
 a binary vector of target variable sampled based on interaction model
diff --git a/man/get_target_logic.Rd b/man/get_target_logic.Rd
index 39e5e1c..1191210 100644
--- a/man/get_target_logic.Rd
+++ b/man/get_target_logic.Rd
@@ -11,7 +11,8 @@ get_target_logic(
   weights = NULL,
   n_exp = NULL,
   max_exp_depth = NULL,
-  expressions = NULL
+  expressions = NULL,
+  binary = TRUE
 )
 }
 \arguments{
@@ -39,8 +40,12 @@ when \code{random} equals \code{TRUE}.}
 Default to 3.}
 
 \item{expressions}{a matrix of binary variables corresponding to custom
-logic expressions. It's dimension should be related to the length of
-\code{weights} vector if it's provided. Default to \code{NULL}.}
+logic expressions. You can create them based on motifs. It's dimension should
+be related to the length of \code{weights} vector if it's provided. Default
+to \code{NULL}. If \code{NULL}, random logic expressions will be created.}
+
+\item{binary}{logical, indicating whether the produced target variable should
+be binary or continuous.}
 }
 \description{
 This function samples target variable according to the logic regression
diff --git a/tests/testthat/test-sequences.R b/tests/testthat/test-sequences.R
index e5f1487..0e6ed71 100644
--- a/tests/testthat/test-sequences.R
+++ b/tests/testthat/test-sequences.R
@@ -116,6 +116,16 @@ test_that("Interaction model works", {
         c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 1L,
           1L, 1L, 0L, 0L, 0L), get_target_interactions(results)
     )
+
+    expect_equal(
+        c(3.2669325505849, 3.2669325505849, 0.824915252858773, 3.2669325505849,
+          0.824915252858773, 1.01497719204053, 0.745780731318519,
+          2.05459328205325, 1.16902287956327, 1.77926696231589,
+          -1.69982326356694, -1.69982326356694, -1.69982326356694,
+          -1.69982326356694, -1.69982326356694, -1.69982326356694,
+          -1.69982326356694, -1.69982326356694, -1.69982326356694,
+          -1.69982326356694), get_target_interactions(results, binary = FALSE)
+    )
 })
 
 
@@ -135,12 +145,18 @@ test_that("Additive model works", {
           0L, 0L, 0L, 0L, 1L), get_target_additive(results)
     )
 
-    #test if conditions work
+    expect_equal(
+        c(2.05890567833558, 2.05890567833558, 0.39565183990635),
+        get_target_additive(results, binary = FALSE)[1:3]
+    )
+
     expect_error(get_target_additive(results, weights = 0.1),
                  "The length of weights vector should equal number of motifs!")
 })
 
 
+
+
 test_that("Logic model works", {
 
     set.seed(1)
@@ -155,18 +171,40 @@ test_that("Logic model works", {
         c(0L, 0L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L,
           1L, 0L, 0L, 0L, 1L), get_target_logic(results)
     )
+
+    expect_equal(
+        c(0.854367734864354, 0.854367734864354, 0.854367734864354,
+          0.854367734864354), get_target_logic(results, binary = FALSE)[1:4]
+    )
+
+    expressions = matrix(rbinom(n_seq*2, 1, .5), nrow = n_seq)
+
+    expect_error(
+        get_target_logic(results, expressions = expressions, weights = 1),
+        "You have to provide weight for each column"
+    )
+
+    expect_identical(
+        c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L,
+          0L, 0L, 0L, 0L, 0L),
+        get_target_logic(results, expressions = expressions, weights = c(1, 2))
+    )
+
+    motifs <- generate_motifs(alph, 1, 1, 4, 6)
+    results <- generate_kmer_data(n_seq, sequence_length, alph,
+                                  motifs, n_injections = 1)
+
+    expect_error(get_target_logic(results), "You need at least 2 motifs")
 })
 
 
 test_that("rbinom_vec works", {
 
     set.seed(1)
-
     probs <- runif(10)
 
     expect_identical(rbinom_vec(probs),
                      c(0L, 0L, 0L, 1L, 0L, 1L, 1L, 0L, 1L, 0L))
-
     expect_error(rbinom_vec(2),"Provided probabilities should be greater")
 })