Skip to content

Commit

Permalink
Merge pull request #31 from COMBINE-lab/develop
Browse files Browse the repository at this point in the history
merge develop into main
  • Loading branch information
rob-p committed Jul 12, 2024
2 parents 72711ea + 5dd74da commit d871847
Show file tree
Hide file tree
Showing 18 changed files with 3,087 additions and 187 deletions.
23 changes: 14 additions & 9 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "libradicl"
version = "0.8.2"
version = "0.9.0"
authors = [
"Avi Srivastava <avi.srivastava@nyu.edu>",
"Hirak Sarkar <hirak_sarkar@hms.harvard.edu>",
Expand Down Expand Up @@ -28,15 +28,20 @@ categories = ["command-line-utilities", "science"]
[dependencies]
snap = "1"
scroll = "0.12.0"
num = "0.4.1"
ahash = "0.8.9"
serde = { version = "1.0.197", features = ["derive"] }
dashmap = "^5.5.3"
num = "0.4.3"
ahash = "0.8.11"
serde = { version = "1.0.204", features = ["derive"] }
dashmap = "^6.0.1"
bio-types = "1.0.1"
smallvec = "1.13.1"
noodles-bam = "0.56.0"
noodles-sam = "0.53.0"
anyhow = "1.0.80"
smallvec = "1.13.2"
noodles-bam = "0.64.0"
noodles-sam = "0.61.0"
anyhow = "1.0.86"
itertools = "0.13.0"
bytemuck = { version = "1.16.1", features = ["aarch64_simd"] }
derivative = "2.2.0"
crossbeam-queue = "0.3.11"

[dev-dependencies]
needletail="0.5.1"
indicatif="0.17.8"
26 changes: 23 additions & 3 deletions examples/read_chunk_bulk.rs
Original file line number Diff line number Diff line change
@@ -1,19 +1,20 @@
use anyhow;
use anyhow::{self, Context};
use libradicl;
use libradicl::chunk::Chunk;
use libradicl::record::{PiscemBulkReadRecord, PiscemBulkRecordContext};
use std::io::BufReader;

fn main() -> anyhow::Result<()> {
let fname = std::env::args().nth(1).expect("input filename");
let fname = std::env::args().nth(1).context("missing input filename")?;
let f = std::fs::File::open(&fname)?;
let mut ifile = BufReader::new(f);

let p = libradicl::header::RadPrelude::from_bytes(&mut ifile)?;
if let Ok(summary) = p.summary(None) {
println!("{}", summary);
}

let _tag_map = p.file_tags.parse_tags_from_bytes_checked(&mut ifile)?;
let _tag_map = p.file_tags.try_parse_tags_from_bytes(&mut ifile)?;

// Any extra context we may need to parse the records. In this case, it's the
// size of the barcode and the umi.
Expand All @@ -27,6 +28,25 @@ fn main() -> anyhow::Result<()> {
for (i, r) in first_chunk.reads.iter().take(10).enumerate() {
println!("record {i}: {:?}", r);
}
println!("printed 10 records of {} in chunk", first_chunk.nrec);

let mut total_rec = first_chunk.nrec as usize;
let mut total_bytes = first_chunk.nbytes as usize;
let mut total_chunks = 1;

while total_chunks < p.hdr.num_chunks {
let chunk = Chunk::<PiscemBulkReadRecord>::from_bytes(&mut ifile, &tag_context);
total_rec += chunk.nrec as usize;
total_bytes += chunk.nbytes as usize;
total_chunks += 1;
if total_chunks % 500 == 0 {
println!(r"read {total_chunks} chunks");
}
}

println!(
r"read a total of {total_chunks} chunks, comprising {total_rec} records and {total_bytes} bytes."
);

Ok(())
}
30 changes: 27 additions & 3 deletions examples/read_chunk_single_cell.rs
Original file line number Diff line number Diff line change
@@ -1,20 +1,22 @@
use anyhow;
use anyhow::{self, Context};
use libradicl::chunk::Chunk;
use libradicl::header;
use libradicl::record::{AlevinFryReadRecord, AlevinFryRecordContext};
use std::io::BufReader;

fn main() -> anyhow::Result<()> {
let fname = std::env::args().nth(1).expect("input filename");
let fname = std::env::args().nth(1).context("missinng input filename")?;
let f = std::fs::File::open(&fname)?;
let mut ifile = BufReader::new(f);

let p = header::RadPrelude::from_bytes(&mut ifile)?;
if let Ok(summary) = p.summary(None) {
println!("{}", summary);
}

let tag_map = p.file_tags.parse_tags_from_bytes_checked(&mut ifile)?;
let tag_map = p.file_tags.try_parse_tags_from_bytes(&mut ifile)?;
println!("tag map {:?}\n", tag_map);

// Any extra context we may need to parse the records. In this case, it's the
// size of the barcode and the umi.
let tag_context = p.get_record_context::<AlevinFryRecordContext>()?;
Expand All @@ -27,6 +29,28 @@ fn main() -> anyhow::Result<()> {
for (i, r) in first_chunk.reads.iter().take(10).enumerate() {
println!("record {i}: {:?}", r);
}
println!(
"printed first 10 records of {} in the first chunk",
first_chunk.nrec
);

let mut total_rec = first_chunk.nrec as usize;
let mut total_bytes = first_chunk.nbytes as usize;
let mut total_chunks = 1;

while total_chunks < p.hdr.num_chunks {
let chunk = Chunk::<AlevinFryReadRecord>::from_bytes(&mut ifile, &tag_context);
total_rec += chunk.nrec as usize;
total_bytes += chunk.nbytes as usize;
total_chunks += 1;
if total_chunks % 500 == 0 {
println!(r"read {total_chunks} chunks");
}
}

println!(
r"read a total of {total_chunks} chunks, comprising {total_rec} records and {total_bytes} bytes."
);

Ok(())
}
81 changes: 81 additions & 0 deletions examples/read_chunk_single_cell_atac.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
use indicatif::{ProgressBar, ProgressDrawTarget, ProgressStyle};
use libradicl::{
//header::RadPrelude,
//readers::ParallelChunkReader,
readers::ParallelRadReader,
record::AtacSeqReadRecord,
};
use std::fs::File;
use std::io::BufReader;
use std::num::NonZeroUsize;
use std::sync::atomic::Ordering;

fn main() -> anyhow::Result<()> {
let f = File::open("../piscem_atac_data/map.rad")?;
let metadata = f.metadata()?;
let file_len = metadata.len();
let ifile = BufReader::new(f);

const NWORKERS: usize = 12;
let mut rad_reader = ParallelRadReader::<AtacSeqReadRecord, BufReader<File>>::new(
ifile,
NonZeroUsize::new(NWORKERS).unwrap(),
);

if let Ok(summary) = rad_reader.prelude.summary(None) {
println!("{}", summary);
}
println!("tag map {:?}\n", rad_reader.file_tag_map);
println!("num chunks = {:?}\n", rad_reader.prelude.hdr.num_chunks());

let mut handles = Vec::<std::thread::JoinHandle<usize>>::new();
for _ in 0..NWORKERS {
let rd = rad_reader.is_done();
let q = rad_reader.get_queue();
let handle = std::thread::spawn(move || {
let mut nrec_processed = 0_usize;
while !rd.load(Ordering::SeqCst) {
while let Some(meta_chunk) = q.pop() {
for c in meta_chunk.iter() {
nrec_processed += c.nrec as usize;
/*
println!("Chunk :: nbytes: {}, nrecs: {}", c.nbytes, c.nrec);
assert_eq!(c.nrec as usize, c.reads.len());
for (i, r) in c.reads.iter().take(10).enumerate() {
println!("record {i}: {:?}", r);
}
*/
}
}
}
nrec_processed
});
handles.push(handle);
}

// simple callback if we want to test one
let header_offset = rad_reader.get_byte_offset();
let pbar = ProgressBar::new(file_len - header_offset);
pbar.set_style(
ProgressStyle::with_template(
"[{elapsed_precise}] {bar:40.cyan/blue} {pos:>7}/{len:7} {msg}",
)
.unwrap()
.progress_chars("##-"),
);
pbar.set_draw_target(ProgressDrawTarget::stderr_with_hz(5));
let cb = |new_bytes: u64, _new_rec: u64| {
pbar.inc(new_bytes);
};

let _ = rad_reader.start_chunk_parsing(Some(cb)); //libradicl::readers::EMPTY_METACHUNK_CALLBACK);
let mut total_processed = 0;
for handle in handles {
total_processed += handle.join().expect("The parsing thread panicked");
}
pbar.finish_with_message(format!(
"finished parsing RAD file; processed {} total records\n",
total_processed
));
Ok(())
}
81 changes: 81 additions & 0 deletions examples/read_chunk_single_cell_parallel.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
use indicatif::{ProgressBar, ProgressDrawTarget, ProgressStyle};
use libradicl::{
//header::RadPrelude,
//readers::ParallelChunkReader,
readers::ParallelRadReader,
record::AlevinFryReadRecord,
};
use std::fs::File;
use std::io::BufReader;
use std::num::NonZeroUsize;
use std::sync::atomic::Ordering;

fn main() -> anyhow::Result<()> {
let f = File::open("../piscem_atac_data/map.rad")?;
let metadata = f.metadata()?;
let file_len = metadata.len();
let ifile = BufReader::new(f);

const NWORKERS: usize = 12;
let mut rad_reader = ParallelRadReader::<AlevinFryReadRecord, BufReader<File>>::new(
ifile,
NonZeroUsize::new(NWORKERS).unwrap(),
);

if let Ok(summary) = rad_reader.prelude.summary(None) {
println!("{}", summary);
}
println!("tag map {:?}\n", rad_reader.file_tag_map);
println!("num chunks = {:?}\n", rad_reader.prelude.hdr.num_chunks());

let mut handles = Vec::<std::thread::JoinHandle<usize>>::new();
for _ in 0..NWORKERS {
let rd = rad_reader.is_done();
let q = rad_reader.get_queue();
let handle = std::thread::spawn(move || {
let mut nrec_processed = 0_usize;
while !rd.load(Ordering::SeqCst) {
while let Some(meta_chunk) = q.pop() {
for c in meta_chunk.iter() {
nrec_processed += c.nrec as usize;
/*
println!("Chunk :: nbytes: {}, nrecs: {}", c.nbytes, c.nrec);
assert_eq!(c.nrec as usize, c.reads.len());
for (i, r) in c.reads.iter().take(10).enumerate() {
println!("record {i}: {:?}", r);
}
*/
}
}
}
nrec_processed
});
handles.push(handle);
}

// simple callback if we want to test one
let header_offset = rad_reader.get_byte_offset();
let pbar = ProgressBar::new(file_len - header_offset);
pbar.set_style(
ProgressStyle::with_template(
"[{elapsed_precise}] {bar:40.cyan/blue} {pos:>7}/{len:7} {msg}",
)
.unwrap()
.progress_chars("##-"),
);
pbar.set_draw_target(ProgressDrawTarget::stderr_with_hz(5));
let cb = |new_bytes: u64, _new_rec: u64| {
pbar.inc(new_bytes);
};

let _ = rad_reader.start_chunk_parsing(Some(cb)); //libradicl::readers::EMPTY_METACHUNK_CALLBACK);
let mut total_processed = 0;
for handle in handles {
total_processed += handle.join().expect("The parsing thread panicked");
}
pbar.finish_with_message(format!(
"finished parsing RAD file; processed {} total records\n",
total_processed
));
Ok(())
}
27 changes: 2 additions & 25 deletions examples/read_header.rs
Original file line number Diff line number Diff line change
@@ -1,32 +1,9 @@
use anyhow;
use anyhow::{self, Context};
use libradicl::{self, rad_types};
use std::io::BufReader;

fn main() -> anyhow::Result<()> {
let fname = std::env::args().nth(1).expect("input filename");
/*
{
let f = std::fs::File::open(&fname)?;
let mut ifile = BufReader::new(f);
let h = rad_types::RadHeader::from_bytes(&mut ifile)?;
if let Ok(summary) = h.summary(None) {
println!("{}", summary);
}
let ts = rad_types::TagSection::from_bytes(&mut ifile)?;
println!("File-level tags: {ts:?}");
let ts = rad_types::TagSection::from_bytes(&mut ifile)?;
println!("Read-level tags: {ts:?}");
let ts = rad_types::TagSection::from_bytes(&mut ifile)?;
println!("Alignment-level tags: {ts:?}");
}
*/

println!("\n");

let fname = std::env::args().nth(1).context("missing input filename")?;
{
let f = std::fs::File::open(&fname)?;
let mut ifile = BufReader::new(f);
Expand Down
Loading

0 comments on commit d871847

Please sign in to comment.