From 92a02801db7b122f9343029caad2085bdc5dc3bb Mon Sep 17 00:00:00 2001 From: Liang Zhang Date: Fri, 26 Jan 2024 14:10:13 +0800 Subject: [PATCH] Use rownames instead of `id` attributes Signed-off-by: Liang Zhang --- R/factor_analysis.R | 6 +++--- R/prepare_data_neural.R | 28 +++++++++++++++++----------- _scripts/confirm_factors.R | 10 +++++++++- _scripts/preproc_neural.R | 8 ++++---- 4 files changed, 33 insertions(+), 19 deletions(-) diff --git a/R/factor_analysis.R b/R/factor_analysis.R index fa52000..0adc72d 100644 --- a/R/factor_analysis.R +++ b/R/factor_analysis.R @@ -142,10 +142,10 @@ prepare_model <- function(config, theory, col_ov, col_lv, } extract_latent_scores <- function(fit, data = NULL, id_cols_data = NULL) { - scores <- as_tibble(unclass(lavPredict(fit, data))) + scores <- lavPredict(fit, data) if (!is.null(data)) { id_cols_data <- substitute(id_cols_data) %||% quote(user_id) - scores <- bind_cols(select(data, {{ id_cols_data }}), scores) + rownames(scores) <- as.character(pull(data, {{ id_cols_data }})) } scores } @@ -153,7 +153,7 @@ extract_latent_scores <- function(fit, data = NULL, id_cols_data = NULL) { # Special for g factor estimation ---- prepare_config_vars <- function(num_vars_total, n_steps) { num_vars_base <- num_vars_total %/% n_steps - tibble( + tibble::tibble( num_vars = seq(num_vars_base, num_vars_total, num_vars_base), use_pairs = num_vars * 2 <= num_vars_total ) diff --git a/R/prepare_data_neural.R b/R/prepare_data_neural.R index b033e1c..9790a53 100644 --- a/R/prepare_data_neural.R +++ b/R/prepare_data_neural.R @@ -1,9 +1,8 @@ # functional connectivity data preparation ---- prepare_data_fc <- function(ts) { - structure( - do.call(rbind, lapply(ts$data, calc_fc)), - id = pull(ts, user_id, name = subject) - ) + fc <- do.call(rbind, lapply(ts$data, calc_fc)) + rownames(fc) <- as.character(ts$user_id) + fc } prepare_ts_merged <- function(files) { @@ -14,6 +13,7 @@ prepare_ts_merged <- function(files) { ) |> mutate( user_id = data.camp::users_id_mapping[subject], + .keep = "unused", .before = 1L ) } @@ -36,19 +36,24 @@ prepare_files_ts <- function(session, task, config, atlas) { # confounds data preparation ---- compose_confounds_cpm <- function(users_demography, fd_mean) { - data <- users_demography |> + users_demography |> select(user_id, user_sex, user_age) |> inner_join(fd_mean, by = join_by(user_id)) |> - mutate(scanner = str_remove_all(subject, "\\d")) |> + mutate( + scanner = str_remove_all( + names(data.camp::users_id_mapping)[ + match(user_id, data.camp::users_id_mapping) + ], + "\\d" + ) + ) |> fastDummies::dummy_columns( "scanner", remove_first_dummy = TRUE, remove_selected_columns = TRUE - ) - structure( - as.matrix(select(data, !c(user_id, subject))), - id = pull(data, user_id, name = subject) - ) + ) |> + column_to_rownames("user_id") |> + as.matrix() } prepare_fd_mean <- function(confounds) { @@ -70,6 +75,7 @@ prepare_data_confounds <- function(files) { ) |> mutate( user_id = data.camp::users_id_mapping[subject], + .keep = "unused", .before = 1L ) } diff --git a/_scripts/confirm_factors.R b/_scripts/confirm_factors.R index f9cff5d..596d29f 100644 --- a/_scripts/confirm_factors.R +++ b/_scripts/confirm_factors.R @@ -139,7 +139,15 @@ list( tarchetypes::tar_combine( scores_factor, zutils::select_list(targets_cfa, starts_with("scores")), - command = bind_rows(!!!.x, .id = ".id") |> + command = list(!!!.x) |> + map( + \(x) { + unclass(x) |> + as_tibble(rownames = "user_id") |> + mutate(user_id = bit64::as.integer64(user_id)) + } + ) |> + bind_rows(.id = ".id") |> zutils::separate_wider_dsv( ".id", c(names(hypers_model), names(hypers_config_dims)), diff --git a/_scripts/preproc_neural.R b/_scripts/preproc_neural.R index d7f08bf..71b6e37 100644 --- a/_scripts/preproc_neural.R +++ b/_scripts/preproc_neural.R @@ -5,7 +5,7 @@ tar_option_set( controller = if (Sys.info()["nodename"] == "shadow") { crew.cluster::crew_controller_sge( name = "fc", - workers = 40, + workers = 10, seconds_idle = 30 ) } else { @@ -56,9 +56,9 @@ list( tar_target( subjs_keep_neural, bind_rows(expr_join_fd) |> - filter(all(fd <= 0.3), .by = c(user_id, subject)) |> - distinct(user_id, subject) |> - pull(user_id, name = subject) + filter(all(fd <= 0.3), .by = user_id) |> + pull(user_id) |> + unique() ), params_fmri_tasks |> dplyr::summarise(