Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change ci approx to aki #5

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,9 @@ dev/false_positive_study.RData
dev/false_positive_study.pdf
dev/*rds
dev/*rda
dev/*csv
dev/*xlsx
dev/*RData
dev/*txt
dev/approximation_bias_correction_cache*
temp*
38 changes: 24 additions & 14 deletions R/ppcseq.R
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,8 @@ identify_outliers = function(.data,
tol_rel_obj = 0.01,
just_discovery = F,
seed = sample(1:99999, size = 1),
adj_prob_theshold_2 = NULL
adj_prob_theshold_2 = NULL,
return_fit = FALSE
) {
# Prepare column same enquo
.sample = enquo(.sample)
Expand Down Expand Up @@ -281,23 +282,32 @@ identify_outliers = function(.data,
merge_results(

# Calculate CI 2 for discovery for plotting
res_discovery %>%
left_join(
(.) %>%
attr("fit") %>%
fit_to_counts_rng_approximated(adj_prob_theshold_2, how_many_posterior_draws_2, truncation_compensation = 0.7352941, cores) %>%
select(S, G, .lower_1 = .lower, .upper_1 = .upper)
),
res_test, formula,
res_discovery,

# Just used for article comparative analyses between 1 and 2 steps
# %>%
# left_join(
# (.) %>%
# attr("fit") %>%
# fit_to_counts_rng_approximated(adj_prob_theshold_2, how_many_posterior_draws_2, truncation_compensation = 0.7352941, cores) %>%
# select(S, G, .lower_1 = .lower, .upper_1 = .upper)
# ),

res_test,
formula,
!!.transcript,
!!.abundance,
!!.sample,
do_check_only_on_detrimental
) %>%

# Add fit attribute if any
add_attr(res_discovery %>% attr("fit"), "fit 1") %>%
add_attr(res_test %>% attr("fit"), "fit 2") %>%
# If return_fit
when(
return_fit ~ (.) %>%
add_attr(res_discovery %>% attr("fit"), "fit 1") %>%
add_attr(res_test %>% attr("fit"), "fit 2"),
~ (.)
) %>%

# Add total draws
add_attr(res_test %>% attr("total_draws"), "total_draws")
Expand Down Expand Up @@ -483,7 +493,7 @@ do_inference = function(my_df,
as_matrix() %>% t

# Get the matrix of the idexes of the outlier data points
# to explude from the model if it is the second passage
# to exclude from the model if it is the second passage
to_exclude_MPI = get_outlier_data_to_exlude(counts_MPI, to_exclude, shards)

# Package data
Expand Down Expand Up @@ -547,7 +557,7 @@ do_inference = function(my_df,

ifelse_pipe(
approximate_posterior_analysis,
~ .x %>% fit_to_counts_rng_approximated(adj_prob_theshold, how_many_posterior_draws * 10, truncation_compensation, cores),
~ .x %>% fit_to_counts_rng_approximated(adj_prob_theshold, how_many_posterior_draws * 10, truncation_compensation, cores, how_many_to_check),
~ .x %>% fit_to_counts_rng(adj_prob_theshold)
) %>%

Expand Down
149 changes: 110 additions & 39 deletions R/utilities.R
Original file line number Diff line number Diff line change
Expand Up @@ -448,8 +448,8 @@ merge_results = function(res_discovery, res_test, formula, .transcript, .abundan
!!.abundance,
!!.sample,
mean,
`.lower_1`,
`.upper_1`,
# `.lower_1`,
# `.upper_1`,
`exposure rate`,
slope_1 = slope,
one_of(parse_formula(formula))
Expand Down Expand Up @@ -525,8 +525,6 @@ format_results = function(.data, formula, .transcript, .abundance, .sample, do_c
)
}



#' Select only significant genes plus background for efficient normalisation
#'
#' @importFrom rstan sampling
Expand Down Expand Up @@ -564,7 +562,6 @@ select_to_check_and_house_keeping = function(.data, .do_check, .significance, .t
}
}


#' add_exposure_rate
#'
#' @importFrom tidyr separate
Expand Down Expand Up @@ -655,49 +652,123 @@ fit_to_counts_rng = function(fit, adj_prob_theshold){
#'
#' @export

fit_to_counts_rng_approximated = function(fit, adj_prob_theshold, how_many_posterior_draws, truncation_compensation, cores){
fit_to_counts_rng_approximated = function(fit, adj_prob_theshold, how_many_posterior_draws, truncation_compensation, cores, how_many_to_check){

writeLines(sprintf("executing %s", "fit_to_counts_rng_approximated"))

draws_mu =
fit %>% extract("lambda_log_param") %>% `[[` (1) %>% as.data.frame() %>% setNames(sprintf("mu.%s", colnames(.))) %>%
as_tibble() %>% mutate(.draw = 1:n()) %>% gather(par, mu, -.draw) %>% separate(par, c("par", "S", "G"), sep="\\.") %>% select(-par)
draws_sigma =
fit %>% extract("sigma_raw") %>% `[[` (1) %>% as.data.frame() %>% setNames(sprintf("sigma.%s", colnames(.) %>% gsub("V", "", .))) %>%
as_tibble() %>% mutate(.draw = 1:n()) %>% gather(par, sigma, -.draw) %>% separate(par, c("par", "G"), sep="\\.") %>% select(-par)
draws_exposure =
fit %>% extract("exposure_rate") %>% `[[` (1) %>% as.data.frame() %>% setNames(sprintf("exposure.%s", colnames(.) %>% gsub("V", "", .))) %>%
as_tibble() %>% mutate(.draw = 1:n()) %>% gather(par, exposure, -.draw) %>% separate(par, c("par", "S"), sep="\\.") %>% select(-par)

draws_mu %>%
left_join(draws_sigma, by = c(".draw", "G")) %>%
left_join(draws_exposure, by = c(".draw", "S")) %>%
nest(data = -c(S, G)) %>%
draws_mu = fit %>% extract("lambda_log_param") %>% .[[1]] %>% .[,,1:how_many_to_check]

# %>% as.data.frame() %>% setNames(sprintf("mu.%s", colnames(.))) %>%
# as_tibble() %>% mutate(.draw = 1:n()) %>% gather(par, mu, -.draw) %>% separate(par, c("par", "S", "G"), sep="\\.") %>% select(-par)

draws_sigma = fit %>% extract("sigma_raw") %>% .[[1]] %>% .[,1:how_many_to_check]

# %>% as.data.frame() %>% setNames(sprintf("sigma.%s", colnames(.) %>% gsub("V", "", .))) %>%
# as_tibble() %>% mutate(.draw = 1:n()) %>% gather(par, sigma, -.draw) %>% separate(par, c("par", "G"), sep="\\.") %>% select(-par)

draws_exposure = fit %>% extract("exposure_rate") %>% .[[1]]

# %>% as.data.frame() %>% setNames(sprintf("exposure.%s", colnames(.) %>% gsub("V", "", .))) %>%
# as_tibble() %>% mutate(.draw = 1:n()) %>% gather(par, exposure, -.draw) %>% separate(par, c("par", "S"), sep="\\.") %>% select(-par)

expand_grid(S = 1:(dim(draws_mu)[2]), G = 1:(dim(draws_mu)[3])) %>%
mutate(truncation_compensation = !!truncation_compensation) %>%
mutate(
CI = map(
data,
~ {
.x_supersampled = .x %>% sample_n(how_many_posterior_draws, replace = T)
draws = rnbinom(n =how_many_posterior_draws, mu = exp(.x_supersampled$mu + .x_supersampled$exposure), size = 1/exp(.x_supersampled$sigma) * truncation_compensation )
draws %>%
# Process quantile
quantile(c(adj_prob_theshold, 1 - adj_prob_theshold)) %>%
tibble::as_tibble(rownames="prop") %>%
tidyr::spread(prop, value) %>%
setNames(c(".lower", ".upper")) %>%
# Add mean and sd
dplyr::mutate(mean = mean(draws), sd = sd(draws))
}
CI = pmap(
list( S,G, truncation_compensation),
~ tibble(
mu = draws_mu[,..1, ..2],
sigma = draws_sigma[,..2] ,
exposure = draws_exposure[,..1],
truncation_compensation = ..3
) %>%
get_CI_semi_analytically_pnbinom( adj_prob_theshold ) %>%

# Add mean and sd
dplyr::mutate(
mean = mean(exp(draws_mu[,..1, ..2] + draws_exposure[,..1])),
sd = mean(1/exp(draws_sigma[,..2]) * truncation_compensation)
)
)
) %>%
select(-data) %>%
unnest(CI) %>%

# Adapt to old dataset
mutate(.variable = "counts_rng") %>%
mutate(S = as.integer(S), G = as.integer(G))
unnest(CI)

# draws_mu %>%
# left_join(draws_sigma, by = c(".draw", "G")) %>%
# left_join(draws_exposure, by = c(".draw", "S")) %>%
# nest(data = -c(S, G)) %>%
# mutate(
# CI = future_map(
# data,
# ~ {
# get_CI_semi_analytically_pnbinom(
# .x,
# adj_prob_theshold
# ) %>%
# # Add mean and sd
# dplyr::mutate(
# mean = mean(exp(.x$mu + .x$exposure)),
# sd = mean(1/exp(.x$sigma) * truncation_compensation)
# )
# }
# )
# ) %>%
# select(-data) %>%
# unnest(CI) %>%
#
# # Adapt to old dataset
# mutate(.variable = "counts_rng") %>%
# mutate(S = as.integer(S), G = as.integer(G))

}

get_CI_semi_analytically_pnbinom_core = function(.x, .quantile){
ab<-range(
qnbinom(
.quantile,
mu = exp(.x$mu + .x$exposure),
size = 1/exp(.x$sigma) * .x$truncation_compensation
)
);

if(sum(ab) == 0) return(0);

opt<-optim(
mean(ab),
function (x)
sapply(
x,
function(p)
((.quantile)-mean(pnbinom(p, mu = exp(.x$mu + .x$exposure),
size = 1/exp(.x$sigma) * .x$truncation_compensation )))^2
),
method='Brent',
lower=ab[1],
upper=ab[2]
)
opt$par
}

get_CI_semi_analytically_pnbinom = function(.x, .quantile){

tibble(
.lower = .x %>% get_CI_semi_analytically_pnbinom_core(.quantile),
.upper = .x %>% get_CI_semi_analytically_pnbinom_core(1-.quantile)
)


}

get_CI_semi_analytically_rnbinom = function(.x, .quantile, how_many_posterior_draws){
.x_supersampled = .x %>% sample_n(how_many_posterior_draws, replace = T)
draws = rnbinom(n =how_many_posterior_draws, mu = exp(.x_supersampled$mu + .x_supersampled$exposure), size = 1/exp(.x_supersampled$sigma) * .x$truncation_compensation )
draws %>%
# Process quantile
quantile(c(.quantile, 1 - .quantile)) %>%
tibble::as_tibble(rownames="prop") %>%
tidyr::spread(prop, value) %>%
setNames(c(".lower", ".upper"))
}

save_generated_quantities_in_case = function(.data, fit, save_generated_quantities){
Expand Down
1 change: 1 addition & 0 deletions inst/stan/negBinomial_MPI.stan
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ functions{
}

matrix merge_coefficients(row_vector intercept, row_vector alpha_sub_1, matrix alpha_2, int C, int S, int G){
// Here I build the coefficient matrix appending the house keeping genes
matrix[C,G] my_alpha;


Expand Down