From 3324138da5948dfbcb1765d4715b70fa41a90069 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Dupr=C3=A9?= Date: Wed, 7 Feb 2024 19:16:50 +0100 Subject: [PATCH] Make `slice_str` similar to `truncate_str` --- src/ansi.rs | 90 --------------------------------- src/lib.rs | 6 +-- src/utils.rs | 139 ++++++++++++++++++++++++++++++++++----------------- 3 files changed, 95 insertions(+), 140 deletions(-) diff --git a/src/ansi.rs b/src/ansi.rs index eb7ccff0..3a3c96c3 100644 --- a/src/ansi.rs +++ b/src/ansi.rs @@ -4,8 +4,6 @@ use std::{ str::CharIndices, }; -use crate::utils::char_width; - #[derive(Debug, Clone, Copy)] enum State { Start, @@ -269,63 +267,8 @@ impl<'a> Iterator for AnsiCodeIterator<'a> { impl<'a> FusedIterator for AnsiCodeIterator<'a> {} -/// Slice a `&str` in terms of text width. This means that only the text -/// columns strictly between `start` and `stop` will be kept. -/// -/// If a multi-columns character overlaps with the end of the interval it will -/// not be included. In such a case, the result will be less than `end - start` -/// columns wide. -pub fn slice_ansi_str(s: &str, start: usize, end: usize) -> &str { - if end <= start { - return ""; - } - - let mut pos = 0; - let mut res_start = 0; - let mut res_end = 0; - - 'outer: for (sub, is_ansi) in AnsiCodeIterator::new(s) { - // As ansi symbols have a width of 0 we can safely early-interupt - // the outer for loop only if current pos strictly greater than - // `end`. - if pos > end { - break; - } - - if is_ansi { - if pos < start { - res_start += sub.len(); - res_end = res_start; - } else if pos <= end { - res_end += sub.len(); - } else { - break 'outer; - } - } else { - for c in sub.chars() { - let c_width = char_width(c); - - if pos < start { - res_start += c.len_utf8(); - res_end = res_start; - } else if pos + c_width <= end { - res_end += c.len_utf8(); - } else { - break 'outer; - } - - pos += char_width(c); - } - } - } - - &s[res_start..res_end] -} - #[cfg(test)] mod tests { - use crate::measure_text_width; - use super::*; use lazy_static::lazy_static; @@ -492,37 +435,4 @@ mod tests { assert_eq!(iter.rest_slice(), ""); assert_eq!(iter.next(), None); } - - #[test] - fn test_slice_ansi_str() { - // Note that 🐶 is two columns wide - let test_str = "Hello\x1b[31m🐶\x1b[1m🐶\x1b[0m world!"; - assert_eq!(slice_ansi_str(test_str, 5, 5), ""); - assert_eq!(slice_ansi_str(test_str, 0, test_str.len()), test_str); - - if cfg!(feature = "unicode-width") { - assert_eq!(slice_ansi_str(test_str, 0, 5), "Hello\x1b[31m"); - assert_eq!(slice_ansi_str(test_str, 0, 6), "Hello\x1b[31m"); - assert_eq!(measure_text_width(test_str), 16); - assert_eq!(slice_ansi_str(test_str, 0, 5), "Hello\x1b[31m"); - assert_eq!(slice_ansi_str(test_str, 0, 6), "Hello\x1b[31m"); - assert_eq!(slice_ansi_str(test_str, 0, 7), "Hello\x1b[31m🐶\x1b[1m"); - assert_eq!(slice_ansi_str(test_str, 7, 21), "\x1b[1m🐶\x1b[0m world!"); - assert_eq!(slice_ansi_str(test_str, 8, 21), "\x1b[0m world!"); - assert_eq!(slice_ansi_str(test_str, 9, 21), "\x1b[0m world!"); - - assert_eq!( - slice_ansi_str(test_str, 4, 9), - "o\x1b[31m🐶\x1b[1m🐶\x1b[0m" - ); - } else { - assert_eq!(slice_ansi_str(test_str, 0, 5), "Hello\x1b[31m"); - assert_eq!(slice_ansi_str(test_str, 0, 6), "Hello\x1b[31m🐶\u{1b}[1m"); - - assert_eq!( - slice_ansi_str(test_str, 4, 9), - "o\x1b[31m🐶\x1b[1m🐶\x1b[0m w" - ); - } - } } diff --git a/src/lib.rs b/src/lib.rs index f57e2c80..a7fbb935 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -82,12 +82,12 @@ pub use crate::term::{ }; pub use crate::utils::{ colors_enabled, colors_enabled_stderr, measure_text_width, pad_str, pad_str_with, - set_colors_enabled, set_colors_enabled_stderr, style, truncate_str, Alignment, Attribute, - Color, Emoji, Style, StyledObject, + set_colors_enabled, set_colors_enabled_stderr, slice_str, style, truncate_str, Alignment, + Attribute, Color, Emoji, Style, StyledObject, }; #[cfg(feature = "ansi-parsing")] -pub use crate::ansi::{slice_ansi_str, strip_ansi_codes, AnsiCodeIterator}; +pub use crate::ansi::{strip_ansi_codes, AnsiCodeIterator}; mod common_term; mod kb; diff --git a/src/utils.rs b/src/utils.rs index 868c0b5c..81e0ceb3 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -724,7 +724,7 @@ fn str_width(s: &str) -> usize { } #[cfg(feature = "ansi-parsing")] -pub(crate) fn char_width(c: char) -> usize { +fn char_width(c: char) -> usize { #[cfg(feature = "unicode-width")] { use unicode_width::UnicodeWidthChar; @@ -737,66 +737,90 @@ pub(crate) fn char_width(c: char) -> usize { } } -/// Truncates a string to a certain number of characters. +/// Slice a `&str` in terms of text width. This means that only the text +/// columns strictly between `start` and `stop` will be kept. +/// +/// If a multi-columns character overlaps with the end of the interval it will +/// not be included. In such a case, the result will be less than `end - start` +/// columns wide. /// /// This ensures that escape codes are not screwed up in the process. -/// If the maximum length is hit the string will be truncated but -/// escapes code will still be honored. If truncation takes place -/// the tail string will be appended. -pub fn truncate_str<'a>(s: &'a str, width: usize, tail: &str) -> Cow<'a, str> { +pub fn slice_str(s: &str, start: usize, end: usize) -> Cow<'_, str> { #[cfg(feature = "ansi-parsing")] { - use std::cmp::Ordering; - let mut iter = AnsiCodeIterator::new(s); - let mut length = 0; - let mut rv = None; - - while let Some(item) = iter.next() { - match item { - (s, false) => { - if rv.is_none() { - if str_width(s) + length > width - str_width(tail) { - let ts = iter.current_slice(); - - let mut s_byte = 0; - let mut s_width = 0; - let rest_width = width - str_width(tail) - length; - for c in s.chars() { - s_byte += c.len_utf8(); - s_width += char_width(c); - match s_width.cmp(&rest_width) { - Ordering::Equal => break, - Ordering::Greater => { - s_byte -= c.len_utf8(); - break; - } - Ordering::Less => continue, - } - } - - let idx = ts.len() - s.len() + s_byte; - let mut buf = ts[..idx].to_string(); - buf.push_str(tail); - rv = Some(buf); - } - length += str_width(s); - } + let mut pos = 0; + let mut slice_start = 0; + let mut slice_end = 0; + + // ANSI symbols outside of the slice + let mut front_ansi = String::new(); + let mut back_ansi = String::new(); + + for (sub, is_ansi) in AnsiCodeIterator::new(s) { + if is_ansi { + if pos < start { + front_ansi.push_str(sub); + slice_start += sub.len(); + slice_end = slice_start; + } else if pos <= end { + slice_end += sub.len(); + } else { + back_ansi.push_str(sub); } - (s, true) => { - if let Some(ref mut rv) = rv { - rv.push_str(s); + } else { + for c in sub.chars() { + let c_width = char_width(c); + + if pos < start { + slice_start += c.len_utf8(); + slice_end = slice_start; + } else if pos + c_width <= end { + slice_end += c.len_utf8(); } + + pos += char_width(c); } } } - if let Some(buf) = rv { - Cow::Owned(buf) + let slice = &s[slice_start..slice_end]; + + if front_ansi.is_empty() && back_ansi.is_empty() { + Cow::Borrowed(slice) } else { - Cow::Borrowed(s) + Cow::Owned(front_ansi + slice + &back_ansi) } } + #[cfg(not(feature = "ansi-parsing"))] + { + Cow::Borrowed(s.get(start..end).unwrap_or_default()) + } +} +/// Truncates a string to a certain number of characters. +/// +/// This ensures that escape codes are not screwed up in the process. +/// If the maximum length is hit the string will be truncated but +/// escapes code will still be honored. If truncation takes place +/// the tail string will be appended. +pub fn truncate_str<'a>(s: &'a str, width: usize, tail: &str) -> Cow<'a, str> { + #[cfg(feature = "ansi-parsing")] + { + let s_width = measure_text_width(s); + + if s_width <= width { + return Cow::Borrowed(s); + } + + let tail_width = measure_text_width(tail); + let mut sliced = slice_str(s, 0, width.saturating_sub(tail_width)); + + if !tail.is_empty() { + sliced.to_mut().push_str(tail); + } + + sliced + } #[cfg(not(feature = "ansi-parsing"))] { if s.len() <= width - tail.len() { @@ -919,6 +943,27 @@ fn test_truncate_str() { ); } +#[test] +fn test_slice_ansi_str() { + // Note that 🐶 is two columns wide + let test_str = "Hello\x1b[31m🐶\x1b[1m🐶\x1b[0m world!"; + assert_eq!(slice_str(test_str, 0, test_str.len()), test_str); + + if cfg!(feature = "unicode-width") && cfg!(feature = "ansi-parsing") { + assert_eq!(slice_str(test_str, 5, 5), "\u{1b}[31m\u{1b}[1m\u{1b}[0m"); + assert_eq!(measure_text_width(test_str), 16); + assert_eq!(slice_str(test_str, 0, 5), "Hello\x1b[31m\x1b[1m\x1b[0m"); + assert_eq!(slice_str(test_str, 0, 6), "Hello\x1b[31m\x1b[1m\x1b[0m"); + assert_eq!(slice_str(test_str, 0, 7), "Hello\x1b[31m🐶\x1b[1m\x1b[0m"); + assert_eq!(slice_str(test_str, 4, 9), "o\x1b[31m🐶\x1b[1m🐶\x1b[0m"); + + assert_eq!( + slice_str(test_str, 7, 21), + "\x1b[31m\x1b[1m🐶\x1b[0m world!" + ); + } +} + #[test] fn test_truncate_str_no_ansi() { assert_eq!(&truncate_str("foo bar", 5, ""), "foo b");