Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix clippy warnings #253

7 changes: 5 additions & 2 deletions .github/workflows/rust-lint-fmt.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,11 @@ env:
jobs:
build:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3
- name: Rustfmt
run: cargo fmt -- --check --files-with-diff
run: cargo fmt -- --check
- name: Run Clippy
run: cargo clippy --all-targets --all-features
env:
RUSTFLAGS: "-Dwarnings" # fail on warning
22 changes: 12 additions & 10 deletions sudachi-cli/src/analysis.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021 Works Applications Co., Ltd.
* Copyright (c) 2021-2024 Works Applications Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -51,14 +51,14 @@ impl<'a> Analysis for SplitSentencesOnly<'a> {
}
}

pub struct AnalyzeNonSplitted<D: DictionaryAccess, O: SudachiOutput<D>> {
output: O,
pub struct AnalyzeNonSplitted<D: DictionaryAccess, O: SudachiOutput<D> + ?Sized> {
output: Box<O>,
analyzer: StatefulTokenizer<D>,
morphemes: MorphemeList<D>,
}

impl<D: DictionaryAccess + Clone, O: SudachiOutput<D>> AnalyzeNonSplitted<D, O> {
pub fn new(output: O, dict: D, mode: Mode, enable_debug: bool) -> Self {
impl<D: DictionaryAccess + Clone, O: SudachiOutput<D> + ?Sized> AnalyzeNonSplitted<D, O> {
pub fn new(output: Box<O>, dict: D, mode: Mode, enable_debug: bool) -> Self {
Self {
output,
morphemes: MorphemeList::empty(dict.clone()),
Expand All @@ -67,7 +67,7 @@ impl<D: DictionaryAccess + Clone, O: SudachiOutput<D>> AnalyzeNonSplitted<D, O>
}
}

impl<D: DictionaryAccess, O: SudachiOutput<D>> Analysis for AnalyzeNonSplitted<D, O> {
impl<D: DictionaryAccess, O: SudachiOutput<D> + ?Sized> Analysis for AnalyzeNonSplitted<D, O> {
fn analyze(&mut self, input: &str, writer: &mut Writer) {
self.analyzer.reset().push_str(input);
self.analyzer
Expand All @@ -86,21 +86,23 @@ impl<D: DictionaryAccess, O: SudachiOutput<D>> Analysis for AnalyzeNonSplitted<D
}
}

pub struct AnalyzeSplitted<'a, D: DictionaryAccess + 'a, O: SudachiOutput<&'a D>> {
pub struct AnalyzeSplitted<'a, D: DictionaryAccess + 'a, O: SudachiOutput<&'a D> + ?Sized> {
splitter: SentenceSplitter<'a>,
inner: AnalyzeNonSplitted<&'a D, O>,
}

impl<'a, D: DictionaryAccess + 'a, O: SudachiOutput<&'a D>> AnalyzeSplitted<'a, D, O> {
pub fn new(output: O, dict: &'a D, mode: Mode, enable_debug: bool) -> Self {
impl<'a, D: DictionaryAccess + 'a, O: SudachiOutput<&'a D> + ?Sized> AnalyzeSplitted<'a, D, O> {
pub fn new(output: Box<O>, dict: &'a D, mode: Mode, enable_debug: bool) -> Self {
Self {
inner: AnalyzeNonSplitted::new(output, dict, mode, enable_debug),
splitter: SentenceSplitter::new().with_checker(dict.lexicon()),
}
}
}

impl<'a, D: DictionaryAccess + 'a, O: SudachiOutput<&'a D>> Analysis for AnalyzeSplitted<'a, D, O> {
impl<'a, D: DictionaryAccess + 'a, O: SudachiOutput<&'a D> + ?Sized> Analysis
for AnalyzeSplitted<'a, D, O>
{
fn analyze(&mut self, input: &str, writer: &mut Writer) {
for (_, sent) in self.splitter.split(input) {
self.inner.analyze(sent, writer);
Expand Down
8 changes: 4 additions & 4 deletions sudachi-cli/src/build.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021 Works Applications Co., Ltd.
* Copyright (c) 2021-2024 Works Applications Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -172,12 +172,12 @@ fn output_file(p: &Path) -> File {
OpenOptions::new()
.write(true)
.create_new(true)
.open(&p)
.open(p)
.unwrap_or_else(|e| panic!("failed to open {:?} for writing:\n{:?}", p, e))
}

fn dump_part(dict: PathBuf, part: String, output: PathBuf) {
let file = File::open(&dict).expect("open failed");
let file = File::open(dict).expect("open failed");
let data = unsafe { Mmap::map(&file) }.expect("mmap failed");
let loader =
unsafe { DictionaryLoader::read_any_dictionary(&data) }.expect("failed to load dictionary");
Expand Down Expand Up @@ -215,7 +215,7 @@ fn dump_matrix<W: Write>(grammar: &Grammar, w: &mut W) {
for left in 0..conn.num_left() {
for right in 0..conn.num_right() {
let cost = conn.cost(left as _, right as _);
write!(w, "{} {} {}\n", left, right, cost).unwrap();
writeln!(w, "{} {} {}", left, right, cost).unwrap();
}
}
}
Expand Down
69 changes: 36 additions & 33 deletions sudachi-cli/src/main.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021 Works Applications Co., Ltd.
* Copyright (c) 2021-2024 Works Applications Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -27,29 +27,22 @@ use clap::Parser;

use crate::analysis::{Analysis, AnalyzeNonSplitted, AnalyzeSplitted, SplitSentencesOnly};
use crate::build::{build_main, is_build_mode, BuildCli};
use sudachi::analysis::stateless_tokenizer::DictionaryAccess;
use sudachi::config::Config;
use sudachi::dic::dictionary::JapaneseDictionary;
use sudachi::prelude::*;

#[cfg(feature = "bake_dictionary")]
const BAKED_DICTIONARY_BYTES: &[u8] = include_bytes!(env!("SUDACHI_DICT_PATH"));

#[derive(Clone, Debug, Eq, PartialEq)]
#[derive(Clone, Copy, Debug, Eq, PartialEq, Default)]
pub enum SentenceSplitMode {
/// Do both sentence splitting and analysis
#[default]
Default,
/// Do only sentence splitting and not analysis
Only,
/// Do only analysis without sentence splitting
None,
}

impl Default for SentenceSplitMode {
fn default() -> Self {
SentenceSplitMode::Default
}
}

impl FromStr for SentenceSplitMode {
type Err = &'static str;

Expand Down Expand Up @@ -122,16 +115,33 @@ struct Cli {
command: Option<BuildCli>,
}

// want to instantiate a different type for different output format
// this takes a f as a function which will be created with a different actual type
macro_rules! with_output {
($cli: expr, $f: expr) => {
if $cli.wakati {
Box::new($f(output::Wakachi::default()))
} else {
Box::new($f(output::Simple::new($cli.print_all)))
}
};
pub fn setup_output<D: DictionaryAccess>(
wakachi: bool,
print_all: bool,
) -> Box<dyn output::SudachiOutput<D>> {
if wakachi {
Box::new(output::Wakachi::default())
} else {
Box::new(output::Simple::new(print_all))
}
}

fn setup_analyzer<'a>(args: &Cli, dict: &'a impl DictionaryAccess) -> Box<dyn Analysis + 'a> {
match args.split_sentences {
SentenceSplitMode::Only => Box::new(SplitSentencesOnly::new(dict)),
SentenceSplitMode::Default => Box::new(AnalyzeSplitted::new(
setup_output(args.wakati, args.print_all),
dict,
args.mode,
args.enable_debug,
)),
SentenceSplitMode::None => Box::new(AnalyzeNonSplitted::new(
setup_output(args.wakati, args.print_all),
dict,
args.mode,
args.enable_debug,
)),
}
}

fn main() {
Expand All @@ -156,7 +166,7 @@ fn main() {
// output: stdout or file
let inner_writer: Box<dyn Write> = match &args.output_file {
Some(output_path) => Box::new(
File::create(&output_path)
File::create(output_path)
.unwrap_or_else(|_| panic!("Failed to open output file {:?}", &output_path)),
),
None => Box::new(io::stdout()),
Expand All @@ -174,15 +184,7 @@ fn main() {
let dict = JapaneseDictionary::from_cfg(&config)
.unwrap_or_else(|e| panic!("Failed to create dictionary: {:?}", e));

let mut analyzer: Box<dyn Analysis> = match args.split_sentences {
SentenceSplitMode::Only => Box::new(SplitSentencesOnly::new(&dict)),
SentenceSplitMode::Default => with_output!(args, |o| {
AnalyzeSplitted::new(o, &dict, args.mode, args.enable_debug)
}),
SentenceSplitMode::None => with_output!(args, |o| {
AnalyzeNonSplitted::new(o, &dict, args.mode, args.enable_debug)
}),
};
let mut analyzer: Box<dyn Analysis> = setup_analyzer(&args, &dict);

let mut data = String::with_capacity(4 * 1024);
let is_stdout = args.output_file.is_none();
Expand All @@ -207,17 +209,18 @@ fn strip_eol(data: &str) -> &str {
let mut bytes = data.as_bytes();
let mut len = bytes.len();
if len > 1 && bytes[len - 1] == b'\n' {
len = len - 1;
len -= 1;
bytes = &bytes[..len];
if len > 1 && bytes[len - 1] == b'\r' {
len = len - 1;
len -= 1;
bytes = &bytes[..len];
}
}

// Safety: str was correct and we only removed full characters
unsafe { std::str::from_utf8_unchecked(bytes) }
}

#[cfg(test)]
mod tests {
use clap::CommandFactory;
Expand Down
4 changes: 2 additions & 2 deletions sudachi-cli/src/output.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021 Works Applications Co., Ltd.
* Copyright (c) 2021-2024 Works Applications Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -44,7 +44,7 @@ impl Wakachi {

impl<T: DictionaryAccess> SudachiOutput<T> for Wakachi {
fn write(&self, writer: &mut Writer, morphemes: &MorphemeList<T>) -> SudachiResult<()> {
if morphemes.len() == 0 {
if morphemes.is_empty() {
writer.write_all(b"\n")?;
return Ok(());
}
Expand Down
18 changes: 8 additions & 10 deletions sudachi/src/analysis/created.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021 Works Applications Co., Ltd.
* Copyright (c) 2021-2024 Works Applications Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -40,7 +40,7 @@ impl CreatedWords {
const MAX_SHIFT: Carrier = CreatedWords::MAX_VALUE - 1;

pub fn empty() -> CreatedWords {
return Default::default();
Default::default()
}

pub fn single<Pos: Into<i64>>(length: Pos) -> CreatedWords {
Expand All @@ -55,7 +55,7 @@ impl CreatedWords {
#[must_use]
pub fn add_word<P: Into<i64>>(&self, length: P) -> CreatedWords {
let mask = CreatedWords::single(length);
return self.add(mask);
self.add(mask)
}

#[must_use]
Expand All @@ -67,21 +67,19 @@ impl CreatedWords {
let mask = CreatedWords::single(length);
if (self.0 & mask.0) == 0 {
HasWord::No
} else if length.into() >= CreatedWords::MAX_VALUE as _ {
HasWord::Maybe
} else {
if length.into() >= CreatedWords::MAX_VALUE as _ {
HasWord::Maybe
} else {
HasWord::Yes
}
HasWord::Yes
}
}

pub fn is_empty(&self) -> bool {
return self.0 == 0;
self.0 == 0
}

pub fn not_empty(&self) -> bool {
return !self.is_empty();
!self.is_empty()
}
}

Expand Down
20 changes: 4 additions & 16 deletions sudachi/src/analysis/lattice.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021 Works Applications Co., Ltd.
* Copyright (c) 2021-2024 Works Applications Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -70,6 +70,7 @@ impl VNode {
/// the size of vectors never shrink.
/// You must use the size parameter to check the current size and never
/// access vectors after the end.
#[derive(Default)]
pub struct Lattice {
ends: Vec<Vec<VNode>>,
ends_full: Vec<Vec<Node>>,
Expand All @@ -78,18 +79,6 @@ pub struct Lattice {
size: usize,
}

impl Default for Lattice {
fn default() -> Self {
Lattice {
ends: Vec::new(),
ends_full: Vec::new(),
indices: Vec::new(),
eos: None,
size: 0,
}
}
}

impl Lattice {
fn reset_vec<T>(data: &mut Vec<Vec<T>>, target: usize) {
for v in data.iter_mut() {
Expand Down Expand Up @@ -242,8 +231,7 @@ impl Lattice {
for boundary in (0..self.indices.len()).rev() {
let nodes = &self.ends_full[boundary];

for node_idx in 0..nodes.len() {
let r_node = &nodes[node_idx];
for r_node in nodes {
let (surface, pos) = if r_node.is_special_node() {
("(null)", PosData::Bos)
} else if r_node.is_oov() {
Expand Down Expand Up @@ -282,7 +270,7 @@ impl Lattice {
write!(out, " {}", connect_cost)?;
}

write!(out, "\n")?;
writeln!(out)?;

dump_idx += 1;
}
Expand Down
8 changes: 2 additions & 6 deletions sudachi/src/analysis/mlist.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021 Works Applications Co., Ltd.
* Copyright (c) 2021-2024 Works Applications Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -93,11 +93,7 @@ impl<T: DictionaryAccess> MorphemeList<T> {
match self.input.try_borrow_mut() {
Ok(mut i) => {
let mref = i.deref_mut();
analyzer.swap_result(
&mut mref.input,
&mut self.nodes.mut_data(),
&mut mref.subset,
);
analyzer.swap_result(&mut mref.input, self.nodes.mut_data(), &mut mref.subset);
Ok(())
}
Err(_) => Err(SudachiError::MorphemeListBorrowed),
Expand Down
Loading
Loading