From 3ba683233a9d1349b6f6d6a64a7ca4fe76698f6c Mon Sep 17 00:00:00 2001 From: Mingun Date: Wed, 12 Jun 2024 12:00:52 +0500 Subject: [PATCH] Fix incorrect missing of trimming all-space text events when trim_text_start = false and trim_text_end = true This is still not complete fix, because we will generate empty Event::Text although we should not do that, but it is hard to prevent generation of such event. Moreover it would be better to remove ability of automatic trimming completely, because it is anyway does not work correctly -- events should not be trimmed at boundary of text / CDATA, or text / PI, or text / comment in some cases --- Changelog.md | 5 +++++ src/reader/mod.rs | 16 ++++++++++++---- src/reader/state.rs | 17 ++++------------- 3 files changed, 21 insertions(+), 17 deletions(-) diff --git a/Changelog.md b/Changelog.md index 7b91ec59..db947d77 100644 --- a/Changelog.md +++ b/Changelog.md @@ -14,8 +14,13 @@ ### Bug Fixes +- [#755]: Fix incorrect missing of trimming all-space text events when + `trim_text_start = false` and `trim_text_end = true`. + ### Misc Changes +[#755]: https://github.com/tafia/quick-xml/pull/755 + ## 0.32.0 -- 2024-06-10 diff --git a/src/reader/mod.rs b/src/reader/mod.rs index 56afe387..bf8d75cd 100644 --- a/src/reader/mod.rs +++ b/src/reader/mod.rs @@ -244,13 +244,21 @@ macro_rules! read_event_impl { } ReadTextResult::UpToMarkup(bytes) => { $self.state.state = ParseState::InsideMarkup; - // Return Text event with `bytes` content or Eof if bytes is empty - Ok($self.state.emit_text(bytes)) + // FIXME: Can produce an empty event if: + // - event contains only spaces + // - trim_text_start = false + // - trim_text_end = true + Ok(Event::Text($self.state.emit_text(bytes))) } ReadTextResult::UpToEof(bytes) => { $self.state.state = ParseState::Done; - // Return Text event with `bytes` content or Eof if bytes is empty - Ok($self.state.emit_text(bytes)) + // Trim bytes from end if required + let event = $self.state.emit_text(bytes); + if event.is_empty() { + Ok(Event::Eof) + } else { + Ok(Event::Text(event)) + } } ReadTextResult::Err(e) => Err(Error::Io(e.into())), } diff --git a/src/reader/state.rs b/src/reader/state.rs index 50cef66a..c70d53f7 100644 --- a/src/reader/state.rs +++ b/src/reader/state.rs @@ -52,15 +52,11 @@ pub(super) struct ReaderState { } impl ReaderState { - /// Trims end whitespaces from `bytes`, if required, and returns a [`Text`] - /// event or an [`Eof`] event, if text after trimming is empty. + /// Trims end whitespaces from `bytes`, if required, and returns a text event. /// /// # Parameters /// - `bytes`: data from the start of stream to the first `<` or from `>` to `<` - /// - /// [`Text`]: Event::Text - /// [`Eof`]: Event::Eof - pub fn emit_text<'b>(&mut self, bytes: &'b [u8]) -> Event<'b> { + pub fn emit_text<'b>(&mut self, bytes: &'b [u8]) -> BytesText<'b> { let mut content = bytes; if self.config.trim_text_end { @@ -68,15 +64,10 @@ impl ReaderState { let len = bytes .iter() .rposition(|&b| !is_whitespace(b)) - .map_or_else(|| bytes.len(), |p| p + 1); + .map_or(0, |p| p + 1); content = &bytes[..len]; } - - if content.is_empty() { - Event::Eof - } else { - Event::Text(BytesText::wrap(content, self.decoder())) - } + BytesText::wrap(content, self.decoder()) } /// reads `BytesElement` starting with a `!`,