Skip to content

Commit

Permalink
attempting to implement insta
Browse files Browse the repository at this point in the history
  • Loading branch information
cosmikwolf committed Oct 18, 2023
1 parent 9d52ff4 commit 7beaca9
Show file tree
Hide file tree
Showing 13 changed files with 226 additions and 259 deletions.
22 changes: 22 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,10 @@ tracing = "0.1.37"
tracing-error = "0.2.0"
tracing-subscriber = { version = "0.3.17", features = ["env-filter", "serde"] }
tui-input = { version = "0.8.0", features = ["serde"] }

[dev-dependencies]
insta = { version = "1.34.0", features = ["yaml", "toml", "json"] }

[profile.dev.package]
insta.opt-level = 3
similar.opt-level = 3
140 changes: 72 additions & 68 deletions src/app/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,126 +3,130 @@ use std::fmt;

#[derive(Debug)]
pub enum ChunkifierError {
IO(std::io::Error),
Utf8(std::string::FromUtf8Error),
Other(String),
IO(std::io::Error),
Utf8(std::string::FromUtf8Error),
Other(String),
}

#[derive(Debug)]
pub enum GPTConnectorError {
Reqwest(reqwest::Error),
OpenAI(OpenAIError),
APIError(OpenAIError),
Other(String),
Reqwest(reqwest::Error),
OpenAI(OpenAIError),
APIError(OpenAIError),
Other(String),
}

#[derive(Debug)]
pub enum SessionManagerError {
FileChunker(ChunkifierError),
GPTConnector(GPTConnectorError),
PdfExtractor(PdfExtractorError),
FileNotFound(String),
ReadError,
IO(std::io::Error),
Other(String),
FileChunker(ChunkifierError),
GPTConnector(GPTConnectorError),
PdfExtractor(PdfExtractorError),
FileNotFound(String),
ReadError,
IO(std::io::Error),
Other(String),
}

#[derive(Debug)]
pub enum PdfExtractorError {
IO(std::io::Error),
Other(String),
IO(std::io::Error),
Other(String),
}

impl fmt::Display for ChunkifierError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
ChunkifierError::IO(err) => write!(f, "IO error: {}", err),
ChunkifierError::Utf8(err) => write!(f, "UTF-8 conversion error: {}", err),
ChunkifierError::Other(err) => write!(f, "Other error: {}", err),
}
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
ChunkifierError::IO(err) => write!(f, "IO error: {}", err),
ChunkifierError::Utf8(err) => write!(f, "UTF-8 conversion error: {}", err),
ChunkifierError::Other(err) => write!(f, "Other error: {}", err),
}
}
}

impl fmt::Display for GPTConnectorError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
GPTConnectorError::Reqwest(err) => write!(f, "Reqwest error: {}", err),
GPTConnectorError::OpenAI(err) => write!(f, "OpenAI error: {}", err),
GPTConnectorError::APIError(err) => write!(f, "API error: {}", err),
GPTConnectorError::Other(err) => write!(f, "Other error: {}", err),
}
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
GPTConnectorError::Reqwest(err) => write!(f, "Reqwest error: {}", err),
GPTConnectorError::OpenAI(err) => write!(f, "OpenAI error: {}", err),
GPTConnectorError::APIError(err) => write!(f, "API error: {}", err),
GPTConnectorError::Other(err) => write!(f, "Other error: {}", err),
}
}
}

impl fmt::Display for SessionManagerError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
SessionManagerError::FileChunker(err) => write!(f, "FileChunker error: {}", err),
SessionManagerError::GPTConnector(err) => write!(f, "GPTConnector error: {}", err),
SessionManagerError::PdfExtractor(err) => write!(f, "PdfExtractor error: {}", err),
SessionManagerError::IO(err) => write!(f, "IO error: {}", err),
SessionManagerError::Other(err) => write!(f, "Other error: {}", err),
SessionManagerError::FileNotFound(file) => { write!(f, "Session file not found: {}", file) },
SessionManagerError::ReadError => { write!(f, "Error reading the session file") },
}
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
SessionManagerError::FileChunker(err) => write!(f, "FileChunker error: {}", err),
SessionManagerError::GPTConnector(err) => write!(f, "GPTConnector error: {}", err),
SessionManagerError::PdfExtractor(err) => write!(f, "PdfExtractor error: {}", err),
SessionManagerError::IO(err) => write!(f, "IO error: {}", err),
SessionManagerError::Other(err) => write!(f, "Other error: {}", err),
SessionManagerError::FileNotFound(file) => {
write!(f, "Session file not found: {}", file)
},
SessionManagerError::ReadError => {
write!(f, "Error reading the session file")
},
}
}
}

impl fmt::Display for PdfExtractorError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
PdfExtractorError::IO(err) => write!(f, "IO error: {}", err),
PdfExtractorError::Other(err) => write!(f, "Other error: {}", err),
}
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
PdfExtractorError::IO(err) => write!(f, "IO error: {}", err),
PdfExtractorError::Other(err) => write!(f, "Other error: {}", err),
}
}
}

impl std::error::Error for GPTConnectorError {}
impl std::error::Error for SessionManagerError {}

impl From<std::io::Error> for SessionManagerError {
fn from(err: std::io::Error) -> SessionManagerError {
SessionManagerError::IO(err)
}
fn from(err: std::io::Error) -> SessionManagerError {
SessionManagerError::IO(err)
}
}

impl From<ChunkifierError> for SessionManagerError {
fn from(err: ChunkifierError) -> SessionManagerError {
SessionManagerError::FileChunker(err)
}
fn from(err: ChunkifierError) -> SessionManagerError {
SessionManagerError::FileChunker(err)
}
}

impl From<GPTConnectorError> for SessionManagerError {
fn from(err: GPTConnectorError) -> SessionManagerError {
SessionManagerError::GPTConnector(err)
}
fn from(err: GPTConnectorError) -> SessionManagerError {
SessionManagerError::GPTConnector(err)
}
}
impl From<PdfExtractorError> for SessionManagerError {
fn from(err: PdfExtractorError) -> SessionManagerError {
SessionManagerError::PdfExtractor(err)
}
fn from(err: PdfExtractorError) -> SessionManagerError {
SessionManagerError::PdfExtractor(err)
}
}

impl From<OpenAIError> for GPTConnectorError {
fn from(err: OpenAIError) -> GPTConnectorError {
GPTConnectorError::OpenAI(err)
}
fn from(err: OpenAIError) -> GPTConnectorError {
GPTConnectorError::OpenAI(err)
}
}

impl From<reqwest::Error> for GPTConnectorError {
fn from(err: reqwest::Error) -> GPTConnectorError {
GPTConnectorError::Reqwest(err)
}
fn from(err: reqwest::Error) -> GPTConnectorError {
GPTConnectorError::Reqwest(err)
}
}

impl From<std::io::Error> for ChunkifierError {
fn from(err: std::io::Error) -> ChunkifierError {
ChunkifierError::IO(err)
}
fn from(err: std::io::Error) -> ChunkifierError {
ChunkifierError::IO(err)
}
}

impl From<std::io::Error> for PdfExtractorError {
fn from(err: std::io::Error) -> PdfExtractorError {
PdfExtractorError::IO(err)
}
fn from(err: std::io::Error) -> PdfExtractorError {
PdfExtractorError::IO(err)
}
}
112 changes: 54 additions & 58 deletions src/app/tools/chunkifier.rs
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ fn chunkify_text(text: &str, tokens_per_chunk: usize) -> Vec<String> {
}

/// Check if the given file is a PDF.
fn is_pdf_file(file_path: &PathBuf) -> bool {
fn is_pdf_file(file_path: &Path) -> bool {
file_path.extension().and_then(|s| s.to_str()) == Some("pdf")
}

Expand All @@ -183,79 +183,75 @@ fn extract_file_text(file_path: &PathBuf) -> Result<String, ChunkifierError> {
fs::read_to_string(file_path).map_err(|_| ChunkifierError::Other("Failed to read text file".to_string()))
}
}

#[cfg(test)]
mod tests {
use super::*;
use std::fs::File;
use std::io::Write;
use tempfile::tempdir;

// a test for parse_input to verify that it is doing what it needs to do
// using fake text that has a URL, a filepath and also some text
// it should return a IngestData struct that contains the full text, a list of URLs, and a list of file paths
#[test]
fn test_parse_input() {
let input = "https://www.google.com/ src/main.rs this is some text";
let ingest_data = categorize_input(input).unwrap();
assert_eq!(ingest_data.text, "https://www.google.com/ src/main.rs this is some text");
assert_eq!(ingest_data.urls, vec!["https://www.google.com/"]);
assert_eq!(ingest_data.file_paths, vec![PathBuf::from("src/main.rs")]);
}

#[cfg(test)]
mod tests {
use super::*;
use std::fs::File;
use std::io::Write;
use tempfile::tempdir;

// a test for parse_input to verify that it is doing what it needs to do
// using fake text that has a URL, a filepath and also some text
// it should return a IngestData struct that contains the full text, a list of URLs, and a list of file paths
#[test]
fn test_parse_input() {
let input = "https://www.google.com/ src/main.rs this is some text";
let ingest_data = categorize_input(input).unwrap();
assert_eq!(ingest_data.text, "https://www.google.com/ src/main.rs this is some text");
assert_eq!(ingest_data.urls, vec!["https://www.google.com/"]);
assert_eq!(ingest_data.file_paths, vec![PathBuf::from("src/main.rs")]);
}

#[test]
fn test_chunkify_pdf_file() {
let pdf_file_path = PathBuf::from("tests/data/NIST.SP.800-185.pdf");
let chunks = chunkify_file(&pdf_file_path, 4).unwrap();
#[test]
fn test_chunkify_pdf_file() {
let pdf_file_path = PathBuf::from("tests/data/NIST.SP.800-185.pdf");
let chunks = chunkify_file(&pdf_file_path, 4).unwrap();

// This will depend on the content of the PDF and the chunk size.
// For the purpose of the test, let's check if the first chunk contains some expected stub text.
// You can adjust the expected stub text based on the content of the PDF.
let expected_text = "NIST Special Publication"; // Adjust this as necessary
// This will depend on the content of the PDF and the chunk size.
// For the purpose of the test, let's check if the first chunk contains some expected stub text.
// You can adjust the expected stub text based on the content of the PDF.
let expected_text = "NIST Special Publication"; // Adjust this as necessary

assert!(!chunks.is_empty());
assert!(chunks[0].contains(expected_text), "Expected stub text not found in the first chunk.");
assert!(!chunks.is_empty());
assert!(chunks[0].contains(expected_text), "Expected stub text not found in the first chunk.");

// Print out the chunks for verification:
for (i, chunk) in chunks.iter().enumerate() {
println!("Chunk {}: {}", i + 1, chunk);
}
// Print out the chunks for verification:
for (i, chunk) in chunks.iter().enumerate() {
println!("Chunk {}: {}", i + 1, chunk);
}
}

#[test]
fn test_chunkify_text_file() {
let dir = tempdir().unwrap();
let text_file_path = dir.path().join("test.txt");
#[test]
fn test_chunkify_text_file() {
let dir = tempdir().unwrap();
let text_file_path = dir.path().join("test.txt");

File::create(&text_file_path).unwrap().write_all(b"Hello, world!\nHow are you?\nThis is a test!").unwrap();
File::create(&text_file_path).unwrap().write_all(b"Hello, world!\nHow are you?\nThis is a test!").unwrap();

let chunks = chunkify_file(&text_file_path, 4).unwrap();
let chunks = chunkify_file(&text_file_path, 4).unwrap();

// Print out the chunks for verification:
for (i, chunk) in chunks.iter().enumerate() {
println!("Chunk {}: {}", i + 1, chunk);
}
assert_eq!(chunks.len(), 4);
assert_eq!(chunks[0], "Hello, world!");
assert_eq!(chunks[1], "How are you?");
assert_eq!(chunks[2], "This is a");
assert_eq!(chunks[3], "test!");
// Print out the chunks for verification:
for (i, chunk) in chunks.iter().enumerate() {
println!("Chunk {}: {}", i + 1, chunk);
}
assert_eq!(chunks.len(), 4);
assert_eq!(chunks[0], "Hello, world!");
assert_eq!(chunks[1], "How are you?");
assert_eq!(chunks[2], "This is a");
assert_eq!(chunks[3], "test!");
}

#[test]
fn test_chunkify_binary_file() {
let dir = tempdir().unwrap();
let binary_file_path = dir.path().join("binary_test_file.bin");
#[test]
fn test_chunkify_binary_file() {
let dir = tempdir().unwrap();
let binary_file_path = dir.path().join("binary_test_file.bin");

File::create(&binary_file_path).unwrap().write_all(&[0u8, 1, 2, 3, 4, 255]).unwrap();
File::create(&binary_file_path).unwrap().write_all(&[0u8, 1, 2, 3, 4, 255]).unwrap();

let result = chunkify_file(&binary_file_path, 4);
let result = chunkify_file(&binary_file_path, 4);

// We expect an error as the binary file is not processable.
assert!(result.is_err());
}
// We expect an error as the binary file is not processable.
assert!(result.is_err());
}
}
Loading

0 comments on commit 7beaca9

Please sign in to comment.