diff --git a/.github/workflows/pkgdown.yml b/.github/workflows/pkgdown.yml.txt similarity index 100% rename from .github/workflows/pkgdown.yml rename to .github/workflows/pkgdown.yml.txt diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml.txt similarity index 100% rename from .github/workflows/pre-commit.yml rename to .github/workflows/pre-commit.yml.txt diff --git a/R/personalis.R b/R/personalis.R index a5d25b8..59b3a46 100644 --- a/R/personalis.R +++ b/R/personalis.R @@ -52,7 +52,7 @@ read_personalis <- function(pathlist) { # #' Read in gene expression data from personalis folders -#' @param sample_list A vector of paths to personalis folders +#' @param sample_paths A vector of paths to personalis folders #' @return SummarizedExperiment #' @export read_personalis_gene_expression <- function(sample_paths) { @@ -95,8 +95,28 @@ read_personalis_gene_expression_sample <- function(sample_folder) { # -------------------- SMALL VARIANTS -------------------- # -#' Read in small variant data from personalis folders +#' Read in small variant data from personalis folder +#' We only read in the "Preferred Transcript" report here: +#' +#' Preferred Transcript: The RefSeq accession.version for the transcript used for variant analysis. +#' Personalis uses a curated list of transcripts, which is based on the number of +#' times a transcript (accession.version) is referred to in COSMIC. If not present in +#' COSMIC, the default transcript would be the one corresponding to the longest CDS. +#' (source: Personalis Analysis_Pipeline_Documentation) +#' +#' We also do not read the `cancer_clinical_research`, the `cancer_research` and the `lowpupulationfreq` reports, +#' because they are subsets of the full report. +#' +#' In addition, Personalis also provides raw VCF files with all (unfiltered) variants. We currently don't +#' read them in because without additional filtering they are not very useful. If you need this level of information, +#' feel free to start from the raw vcf files or even run your own variant calling based on the FASTQ files. +#' #' @param sample_paths A vector of paths to personalis folders +#' @param sample_type Can be one or multiple of of 'tumor', 'normal', or 'somatic'. +#' 'tumor' refers to tumor sample vs. genome reference (i.e. somatic+germline mutations), +#' 'normal' refers to normal sample vs. genome reference (i.e. germline mutations) and +#' 'somatic' refers to tumor vs. normal (i.e. somatic mutations only). +#' @param modality modality from which the variants were called. Can be either 'DNA' or 'RNA' #' @return SummarizedExperiment #' @importFrom dplyr select #' @importFrom purrr map @@ -252,6 +272,7 @@ read_personalis_somatic_variants_summary_statistics <- function(sample_folder, m #' Read Personalis CNV data for a list of samples #' @return SummarizedExperiment +#' @param sample_paths List of directories with Personalis samples #' @importFrom purrr map #' @export read_personalis_cnv_reports <- function(sample_paths) { diff --git a/man/read_personalis_cnv_reports.Rd b/man/read_personalis_cnv_reports.Rd index c615234..5d48637 100644 --- a/man/read_personalis_cnv_reports.Rd +++ b/man/read_personalis_cnv_reports.Rd @@ -6,6 +6,9 @@ \usage{ read_personalis_cnv_reports(sample_paths) } +\arguments{ +\item{sample_paths}{List of directories with Personalis samples} +} \value{ SummarizedExperiment } diff --git a/man/read_personalis_gene_expression.Rd b/man/read_personalis_gene_expression.Rd index 0f57891..389a8a1 100644 --- a/man/read_personalis_gene_expression.Rd +++ b/man/read_personalis_gene_expression.Rd @@ -7,7 +7,7 @@ read_personalis_gene_expression(sample_paths) } \arguments{ -\item{sample_list}{A vector of paths to personalis folders} +\item{sample_paths}{A vector of paths to personalis folders} } \value{ SummarizedExperiment diff --git a/man/read_personalis_small_variant_reports.Rd b/man/read_personalis_small_variant_reports.Rd index 298aa55..085c7e2 100644 --- a/man/read_personalis_small_variant_reports.Rd +++ b/man/read_personalis_small_variant_reports.Rd @@ -2,16 +2,36 @@ % Please edit documentation in R/personalis.R \name{read_personalis_small_variant_reports} \alias{read_personalis_small_variant_reports} -\title{Read in small variant data from personalis folders} +\title{Read in small variant data from personalis folder +We only read in the "Preferred Transcript" report here:} \usage{ read_personalis_small_variant_reports(sample_paths, modality, sample_type) } \arguments{ \item{sample_paths}{A vector of paths to personalis folders} + +\item{modality}{modality from which the variants were called. Can be either 'DNA' or 'RNA'} + +\item{sample_types}{Can be ome or multiple of of 'tumor', 'normal', or 'somatic'. +'tumor' refers to tumor sample vs. genome reference (i.e. somatic+germline mutations), +'normal' refers to normal sample vs. genome reference (i.e. germline mutations) and +'somatic' refers to tumor vs. normal (i.e. somatic mutations only).} } \value{ SummarizedExperiment } \description{ -Read in small variant data from personalis folders +Preferred Transcript: The RefSeq accession.version for the transcript used for variant analysis. +Personalis uses a curated list of transcripts, which is based on the number of +times a transcript (accession.version) is referred to in COSMIC. If not present in +COSMIC, the default transcript would be the one corresponding to the longest CDS. +(source: Personalis Analysis_Pipeline_Documentation) +} +\details{ +We also do not read the \code{cancer_clinical_research}, the \code{cancer_research} and the \code{lowpupulationfreq} reports, +because they are subsets of the full report. + +In addition, Personalis also provides raw VCF files with all (unfiltered) variants. We currently don't +read them in because without additional filtering they are not very useful. If you need this level of information, +feel free to start from the raw vcf files or even run your own variant calling based on the FASTQ files. }