From 92a02801db7b122f9343029caad2085bdc5dc3bb Mon Sep 17 00:00:00 2001
From: Liang Zhang <psychelzh@outlook.com>
Date: Fri, 26 Jan 2024 14:10:13 +0800
Subject: [PATCH] Use rownames instead of `id` attributes

Signed-off-by: Liang Zhang <psychelzh@outlook.com>
---
 R/factor_analysis.R        |  6 +++---
 R/prepare_data_neural.R    | 28 +++++++++++++++++-----------
 _scripts/confirm_factors.R | 10 +++++++++-
 _scripts/preproc_neural.R  |  8 ++++----
 4 files changed, 33 insertions(+), 19 deletions(-)

diff --git a/R/factor_analysis.R b/R/factor_analysis.R
index fa52000..0adc72d 100644
--- a/R/factor_analysis.R
+++ b/R/factor_analysis.R
@@ -142,10 +142,10 @@ prepare_model <- function(config, theory, col_ov, col_lv,
 }
 
 extract_latent_scores <- function(fit, data = NULL, id_cols_data = NULL) {
-  scores <- as_tibble(unclass(lavPredict(fit, data)))
+  scores <- lavPredict(fit, data)
   if (!is.null(data)) {
     id_cols_data <- substitute(id_cols_data) %||% quote(user_id)
-    scores <- bind_cols(select(data, {{ id_cols_data }}), scores)
+    rownames(scores) <- as.character(pull(data, {{ id_cols_data }}))
   }
   scores
 }
@@ -153,7 +153,7 @@ extract_latent_scores <- function(fit, data = NULL, id_cols_data = NULL) {
 # Special for g factor estimation ----
 prepare_config_vars <- function(num_vars_total, n_steps) {
   num_vars_base <- num_vars_total %/% n_steps
-  tibble(
+  tibble::tibble(
     num_vars = seq(num_vars_base, num_vars_total, num_vars_base),
     use_pairs = num_vars * 2 <= num_vars_total
   )
diff --git a/R/prepare_data_neural.R b/R/prepare_data_neural.R
index b033e1c..9790a53 100644
--- a/R/prepare_data_neural.R
+++ b/R/prepare_data_neural.R
@@ -1,9 +1,8 @@
 # functional connectivity data preparation ----
 prepare_data_fc <- function(ts) {
-  structure(
-    do.call(rbind, lapply(ts$data, calc_fc)),
-    id = pull(ts, user_id, name = subject)
-  )
+  fc <- do.call(rbind, lapply(ts$data, calc_fc))
+  rownames(fc) <- as.character(ts$user_id)
+  fc
 }
 
 prepare_ts_merged <- function(files) {
@@ -14,6 +13,7 @@ prepare_ts_merged <- function(files) {
     ) |>
     mutate(
       user_id = data.camp::users_id_mapping[subject],
+      .keep = "unused",
       .before = 1L
     )
 }
@@ -36,19 +36,24 @@ prepare_files_ts <- function(session, task, config, atlas) {
 
 # confounds data preparation ----
 compose_confounds_cpm <- function(users_demography, fd_mean) {
-  data <- users_demography |>
+  users_demography |>
     select(user_id, user_sex, user_age) |>
     inner_join(fd_mean, by = join_by(user_id)) |>
-    mutate(scanner = str_remove_all(subject, "\\d")) |>
+    mutate(
+      scanner = str_remove_all(
+        names(data.camp::users_id_mapping)[
+          match(user_id, data.camp::users_id_mapping)
+        ],
+        "\\d"
+      )
+    ) |>
     fastDummies::dummy_columns(
       "scanner",
       remove_first_dummy = TRUE,
       remove_selected_columns = TRUE
-    )
-  structure(
-    as.matrix(select(data, !c(user_id, subject))),
-    id = pull(data, user_id, name = subject)
-  )
+    ) |>
+    column_to_rownames("user_id") |>
+    as.matrix()
 }
 
 prepare_fd_mean <- function(confounds) {
@@ -70,6 +75,7 @@ prepare_data_confounds <- function(files) {
     ) |>
     mutate(
       user_id = data.camp::users_id_mapping[subject],
+      .keep = "unused",
       .before = 1L
     )
 }
diff --git a/_scripts/confirm_factors.R b/_scripts/confirm_factors.R
index f9cff5d..596d29f 100644
--- a/_scripts/confirm_factors.R
+++ b/_scripts/confirm_factors.R
@@ -139,7 +139,15 @@ list(
   tarchetypes::tar_combine(
     scores_factor,
     zutils::select_list(targets_cfa, starts_with("scores")),
-    command = bind_rows(!!!.x, .id = ".id") |>
+    command = list(!!!.x) |>
+      map(
+        \(x) {
+          unclass(x) |>
+            as_tibble(rownames = "user_id") |>
+            mutate(user_id = bit64::as.integer64(user_id))
+        }
+      ) |>
+      bind_rows(.id = ".id") |>
       zutils::separate_wider_dsv(
         ".id",
         c(names(hypers_model), names(hypers_config_dims)),
diff --git a/_scripts/preproc_neural.R b/_scripts/preproc_neural.R
index d7f08bf..71b6e37 100644
--- a/_scripts/preproc_neural.R
+++ b/_scripts/preproc_neural.R
@@ -5,7 +5,7 @@ tar_option_set(
   controller = if (Sys.info()["nodename"] == "shadow") {
     crew.cluster::crew_controller_sge(
       name = "fc",
-      workers = 40,
+      workers = 10,
       seconds_idle = 30
     )
   } else {
@@ -56,9 +56,9 @@ list(
     tar_target(
       subjs_keep_neural,
       bind_rows(expr_join_fd) |>
-        filter(all(fd <= 0.3), .by = c(user_id, subject)) |>
-        distinct(user_id, subject) |>
-        pull(user_id, name = subject)
+        filter(all(fd <= 0.3), .by = user_id) |>
+        pull(user_id) |>
+        unique()
     ),
     params_fmri_tasks |>
       dplyr::summarise(