diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index 59c20a7b..df55b12b 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -40,6 +40,10 @@ jobs:
env:
LLVM_PROFILE_FILE: coverage/serialize-escape-html-%p-%m.profraw
run: cargo test --features serialize,escape-html
+ - name: Run tests (all features)
+ env:
+ LLVM_PROFILE_FILE: coverage/all-features-%p-%m.profraw
+ run: cargo test --all-features
- name: Prepare coverage information for upload
if: runner.os == 'Linux'
run: |
diff --git a/Cargo.toml b/Cargo.toml
index 1113a795..0da6e32e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -42,6 +42,52 @@ default = []
## [standard compliant]: https://www.w3.org/TR/xml11/#charencoding
encoding = ["encoding_rs"]
+## This feature enables support for deserializing lists where tags are overlapped
+## with tags that do not correspond to the list.
+##
+## When this feature is enabled, the XML:
+## ```xml
+##
+##
+##
+##
+##
+##
+## ```
+## could be deserialized to a struct:
+## ```ignore
+## #[derive(Deserialize)]
+## #[serde(rename_all = "kebab-case")]
+## struct AnyName {
+## item: Vec<()>,
+## another_item: (),
+## }
+## ```
+##
+## When this feature is not enabled (default), only the first element will be
+## associated with the field, and the deserialized type will report an error
+## (duplicated field) when the deserializer encounters a second ` `.
+##
+## Note, that enabling this feature can lead to high and even unlimited memory
+## consumption, because deserializer should check all events up to the end of a
+## container tag (`` in that example) to figure out that there are no
+## more items for a field. If `` or even EOF is not encountered, the
+## parsing will never end which can lead to a denial-of-service (DoS) scenario.
+##
+## Having several lists and overlapped elements for them in XML could also lead
+## to quadratic parsing time, because the deserializer must check the list of
+## events as many times as the number of sequence fields present in the schema.
+##
+## To reduce negative consequences, always [limit] the maximum number of events
+## that [`Deserializer`] will buffer.
+##
+## This feature works only with `serialize` feature and has no effect if `serialize`
+## is not enabled.
+##
+## [limit]: crate::de::Deserializer::event_buffer_size
+## [`Deserializer`]: crate::de::Deserializer
+overlapped-lists = []
+
## Enables support for [`serde`] serialization and deserialization
serialize = ["serde"]
diff --git a/Changelog.md b/Changelog.md
index 0c8a0499..f55c3f5c 100644
--- a/Changelog.md
+++ b/Changelog.md
@@ -10,10 +10,19 @@
## Unreleased
+### New Features
+
+- [#387]: Allow overlapping between elements of sequence and other elements
+ (using new feature `overlapped-lists`)
+
### Bug Fixes
- [#9]: Deserialization erroneously was successful in some cases where error is expected.
This broke deserialization of untagged enums which rely on error if variant cannot be parsed
+- [#387]: Allow to have an ordinary elements together with a `$value` field
+- [#387]: Internal deserializer state can be broken when deserializing a map with
+ a sequence field (such as `Vec`), where elements of this sequence contains
+ another sequence. This error affects only users with the `serialize` feature enabled
### Misc Changes
@@ -36,9 +45,11 @@
### New Tests
- [#9]: Added tests for incorrect nested tags in input
+- [#387]: Added a bunch of tests for sequences deserialization
[#8]: https://github.com/Mingun/fast-xml/pull/8
[#9]: https://github.com/Mingun/fast-xml/pull/9
+[#387]: https://github.com/tafia/quick-xml/pull/387
[#391]: https://github.com/tafia/quick-xml/pull/391
## 0.23.0 -- 2022-05-08
diff --git a/src/de/map.rs b/src/de/map.rs
index 38346027..dfe24676 100644
--- a/src/de/map.rs
+++ b/src/de/map.rs
@@ -2,13 +2,14 @@
use crate::{
de::escape::EscapedDeserializer,
+ de::seq::{not_in, TagFilter},
de::{deserialize_bool, DeEvent, Deserializer, XmlRead, INNER_VALUE, UNFLATTEN_PREFIX},
errors::serialize::DeError,
events::attributes::IterState,
events::{BytesCData, BytesStart},
reader::Decoder,
};
-use serde::de::{self, DeserializeSeed, IntoDeserializer, Visitor};
+use serde::de::{self, DeserializeSeed, IntoDeserializer, SeqAccess, Visitor};
use serde::serde_if_integer128;
use std::borrow::Cow;
use std::ops::Range;
@@ -43,12 +44,14 @@ enum ValueSource {
///
/// ```
Text,
- /// Next value should be deserialized from an element with an any name.
- /// Corresponding tag name will always be associated with a field with name
- /// [`INNER_VALUE`].
+ /// Next value should be deserialized from an element with an any name, except
+ /// elements with a name matching one of the struct fields. Corresponding tag
+ /// name will always be associated with a field with name [`INNER_VALUE`].
///
- /// That state is set when call to [`peek()`] returns a [`Start`] event
- /// _and_ struct has a field with a special name [`INNER_VALUE`].
+ /// That state is set when call to [`peek()`] returns a [`Start`] event, which
+ /// [`name()`] is not listed in the [list of known fields] (which for a struct
+ /// is a list of field names, and for a map that is an empty list), _and_
+ /// struct has a field with a special name [`INNER_VALUE`].
///
/// When in this state, next event, returned by [`next()`], will be a [`Start`],
/// which represents both a key, and a value. Value would be deserialized from
@@ -97,9 +100,14 @@ enum ValueSource {
/// [`Start`]: DeEvent::Start
/// [`peek()`]: Deserializer::peek()
/// [`next()`]: Deserializer::next()
+ /// [`name()`]: BytesStart::name()
/// [`Text`]: Self::Text
+ /// [list of known fields]: MapAccess::fields
Content,
/// Next value should be deserialized from an element with a dedicated name.
+ /// If deserialized type is a sequence, then that sequence will collect all
+ /// elements with the same name until it will be filled. If not all elements
+ /// would be consumed, the rest will be ignored.
///
/// That state is set when call to [`peek()`] returns a [`Start`] event, which
/// [`name()`] represents a field name. That name will be deserialized as a key.
@@ -135,7 +143,24 @@ enum ValueSource {
Nested,
}
-/// A deserializer for `Attributes`
+/// A deserializer that extracts map-like structures from an XML. This deserializer
+/// represents a one XML tag:
+///
+/// ```xml
+/// ...
+/// ```
+///
+/// Name of this tag is stored in a [`Self::start`] property.
+///
+/// # Lifetimes
+///
+/// - `'de` lifetime represents a buffer, from which deserialized values can
+/// borrow their data. Depending on the underlying reader, there can be an
+/// internal buffer of deserializer (i.e. deserializer itself) or an input
+/// (in that case it is possible to approach zero-copy deserialization).
+///
+/// - `'a` lifetime represents a parent deserializer, which could own the data
+/// buffer.
pub(crate) struct MapAccess<'de, 'a, R>
where
R: XmlRead<'de>,
@@ -149,6 +174,16 @@ where
/// Current state of the accessor that determines what next call to API
/// methods should return.
source: ValueSource,
+ /// List of field names of the struct. It is empty for maps
+ fields: &'static [&'static str],
+ /// If `true`, then the deserialized struct has a field with a special name:
+ /// [`INNER_VALUE`]. That field should be deserialized from the text content
+ /// of an XML node:
+ ///
+ /// ```xml
+ /// value for INNER_VALUE field
+ /// ```
+ has_value_field: bool,
/// list of fields yet to unflatten (defined as starting with $unflatten=)
unflatten_fields: Vec<&'static [u8]>,
}
@@ -161,13 +196,15 @@ where
pub fn new(
de: &'a mut Deserializer<'de, R>,
start: BytesStart<'de>,
- fields: &[&'static str],
+ fields: &'static [&'static str],
) -> Result {
Ok(MapAccess {
de,
start,
iter: IterState::new(0, false),
source: ValueSource::Unknown,
+ fields,
+ has_value_field: fields.contains(&INNER_VALUE),
unflatten_fields: fields
.iter()
.filter(|f| f.starts_with(UNFLATTEN_PREFIX))
@@ -192,7 +229,6 @@ where
// FIXME: There error positions counted from end of tag name - need global position
let slice = self.start.attributes_raw();
let decoder = self.de.reader.decoder();
- let has_value_field = self.de.has_value_field;
if let Some(a) = self.iter.next(slice).transpose()? {
// try getting map from attributes (key= "value")
@@ -230,7 +266,7 @@ where
// }
// TODO: This should be handled by #[serde(flatten)]
// See https://github.com/serde-rs/serde/issues/1905
- DeEvent::Start(_) if has_value_field => {
+ DeEvent::Start(e) if self.has_value_field && not_in(self.fields, e, decoder)? => {
self.source = ValueSource::Content;
seed.deserialize(INNER_VALUE.into_deserializer()).map(Some)
}
@@ -287,7 +323,10 @@ where
// is implicit and equals to the `INNER_VALUE` constant, and the value
// is a `Text` or a `CData` event (the value deserializer will see one
// of that events)
- ValueSource::Text => seed.deserialize(MapValueDeserializer { map: self }),
+ ValueSource::Text => seed.deserialize(MapValueDeserializer {
+ map: self,
+ allow_start: false,
+ }),
// This arm processes the following XML shape:
//
// ...
@@ -295,7 +334,10 @@ where
// The whole map represented by an `` element, the map key
// is implicit and equals to the `INNER_VALUE` constant, and the value
// is a `Start` event (the value deserializer will see that event)
- ValueSource::Content => seed.deserialize(MapValueDeserializer { map: self }),
+ ValueSource::Content => seed.deserialize(MapValueDeserializer {
+ map: self,
+ allow_start: false,
+ }),
// This arm processes the following XML shape:
//
// ...
@@ -303,7 +345,10 @@ where
// The whole map represented by an `` element, the map key
// is a `tag`, and the value is a `Start` event (the value deserializer
// will see that event)
- ValueSource::Nested => seed.deserialize(&mut *self.de),
+ ValueSource::Nested => seed.deserialize(MapValueDeserializer {
+ map: self,
+ allow_start: true,
+ }),
ValueSource::Unknown => Err(DeError::KeyNotRead),
}
}
@@ -339,6 +384,82 @@ where
/// Access to the map that created this deserializer. Gives access to the
/// context, such as list of fields, that current map known about.
map: &'m mut MapAccess<'de, 'a, R>,
+ /// Determines, should [`Deserializer::next_text_impl()`] expand the second
+ /// level of tags or not.
+ ///
+ /// If this field is `true`, we process the following XML shape:
+ ///
+ /// ```xml
+ ///
+ /// ...
+ ///
+ /// ```
+ ///
+ /// The whole map represented by an `` element, the map key is a `tag`,
+ /// and the value starts with is a `Start("tag")` (the value deserializer will
+ /// see that event first) and extended to the matching `End("tag")` event.
+ /// In order to deserialize primitives (such as `usize`) we need to allow to
+ /// look inside the one levels of tags, so the
+ ///
+ /// ```xml
+ /// 42
+ /// ```
+ ///
+ /// could be deserialized into `42usize` without problems, and at the same time
+ ///
+ /// ```xml
+ ///
+ ///
+ ///
+ ///
+ ///
+ /// ```
+ /// could be deserialized to a struct.
+ ///
+ /// If this field is `false`, we processes the one of following XML shapes:
+ ///
+ /// ```xml
+ ///
+ /// text value
+ ///
+ /// ```
+ /// ```xml
+ ///
+ ///
+ ///
+ /// ```
+ /// ```xml
+ ///
+ /// ...
+ ///
+ /// ```
+ ///
+ /// The whole map represented by an `` element, the map key is
+ /// implicit and equals to the [`INNER_VALUE`] constant, and the value is
+ /// a [`Text`], a [`CData`], or a [`Start`] event (the value deserializer
+ /// will see one of those events). In the first two cases the value of this
+ /// field do not matter (because we already see the textual event and there
+ /// no reasons to look "inside" something), but in the last case the primitives
+ /// should raise a deserialization error, because that means that you trying
+ /// to deserialize the following struct:
+ ///
+ /// ```ignore
+ /// struct AnyName {
+ /// #[serde(rename = "$value")]
+ /// any_name: String,
+ /// }
+ /// ```
+ /// which means that `any_name` should get a content of the `` element.
+ ///
+ /// Changing this can be valuable for ,
+ /// but those fields should be explicitly marked that they want to get any
+ /// possible markup as a `String` and that mark is different from marking them
+ /// as accepting "text content" which the currently `$value` means.
+ ///
+ /// [`Text`]: DeEvent::Text
+ /// [`CData`]: DeEvent::CData
+ /// [`Start`]: DeEvent::Start
+ allow_start: bool,
}
impl<'de, 'a, 'm, R> MapValueDeserializer<'de, 'a, 'm, R>
@@ -348,7 +469,7 @@ where
/// Returns a text event, used inside [`deserialize_primitives!()`]
#[inline]
fn next_text(&mut self, unescape: bool) -> Result, DeError> {
- self.map.de.next_text_impl(unescape, false)
+ self.map.de.next_text_impl(unescape, self.allow_start)
}
/// Returns a decoder, used inside [`deserialize_primitives!()`]
@@ -371,10 +492,6 @@ where
forward!(deserialize_unit_struct(name: &'static str));
forward!(deserialize_newtype_struct(name: &'static str));
- forward!(deserialize_seq);
- forward!(deserialize_tuple(len: usize));
- forward!(deserialize_tuple_struct(name: &'static str, len: usize));
-
forward!(deserialize_map);
forward!(deserialize_struct(
name: &'static str,
@@ -389,8 +506,114 @@ where
forward!(deserialize_any);
forward!(deserialize_ignored_any);
+ /// Tuple representation is the same as [sequences](#method.deserialize_seq).
+ fn deserialize_tuple(self, _len: usize, visitor: V) -> Result
+ where
+ V: Visitor<'de>,
+ {
+ self.deserialize_seq(visitor)
+ }
+
+ /// Named tuple representation is the same as [unnamed tuples](#method.deserialize_tuple).
+ fn deserialize_tuple_struct(
+ self,
+ _name: &'static str,
+ len: usize,
+ visitor: V,
+ ) -> Result
+ where
+ V: Visitor<'de>,
+ {
+ self.deserialize_tuple(len, visitor)
+ }
+
+ fn deserialize_seq(self, visitor: V) -> Result
+ where
+ V: Visitor<'de>,
+ {
+ let filter = if self.allow_start {
+ match self.map.de.peek()? {
+ // Clone is cheap if event borrows from the input
+ DeEvent::Start(e) => TagFilter::Include(e.clone()),
+ // SAFETY: we use that deserializer with `allow_start == true`
+ // only from the `MapAccess::next_value_seed` and only when we
+ // peeked `Start` event
+ _ => unreachable!(),
+ }
+ } else {
+ TagFilter::Exclude(self.map.fields)
+ };
+ let seq = visitor.visit_seq(MapValueSeqAccess {
+ map: self.map,
+ filter,
+ });
+ #[cfg(feature = "overlapped-lists")]
+ self.map.de.start_replay();
+ seq
+ }
+
#[inline]
fn is_human_readable(&self) -> bool {
self.map.de.is_human_readable()
}
}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+/// An accessor to sequence elements forming a value for struct field.
+/// Technically, this sequence is flattened out into structure and sequence
+/// elements are overlapped with other fields of a structure
+struct MapValueSeqAccess<'de, 'a, 'm, R>
+where
+ R: XmlRead<'de>,
+{
+ /// Accessor to a map that creates this accessor and to a deserializer for
+ /// a sequence items.
+ map: &'m mut MapAccess<'de, 'a, R>,
+ /// Filter that determines whether a tag is a part of this sequence.
+ ///
+ /// When feature `overlapped-lists` is not activated, iteration will stop
+ /// when found a tag that does not pass this filter.
+ ///
+ /// When feature `overlapped-lists` is activated, all tags, that not pass
+ /// this check, will be skipped.
+ filter: TagFilter<'de>,
+}
+
+impl<'de, 'a, 'm, R> SeqAccess<'de> for MapValueSeqAccess<'de, 'a, 'm, R>
+where
+ R: XmlRead<'de>,
+{
+ type Error = DeError;
+
+ fn next_element_seed(&mut self, seed: T) -> Result, DeError>
+ where
+ T: DeserializeSeed<'de>,
+ {
+ let decoder = self.map.de.reader.decoder();
+ loop {
+ break match self.map.de.peek()? {
+ // If we see a tag that we not interested, skip it
+ #[cfg(feature = "overlapped-lists")]
+ DeEvent::Start(e) if !self.filter.is_suitable(&e, decoder)? => {
+ self.map.de.skip()?;
+ continue;
+ }
+ // Stop iteration when list elements ends
+ #[cfg(not(feature = "overlapped-lists"))]
+ DeEvent::Start(e) if !self.filter.is_suitable(&e, decoder)? => Ok(None),
+
+ // Stop iteration after reaching a closing tag
+ DeEvent::End(e) if e.name() == self.map.start.name() => Ok(None),
+ // This is a unmatched closing tag, so the XML is invalid
+ DeEvent::End(e) => Err(DeError::UnexpectedEnd(e.name().to_owned())),
+ // We cannot get `Eof` legally, because we always inside of the
+ // opened tag `self.map.start`
+ DeEvent::Eof => Err(DeError::UnexpectedEof),
+
+ // Start(tag), Text, CData
+ _ => seed.deserialize(&mut *self.map.de).map(Some),
+ };
+ }
+ }
+}
diff --git a/src/de/mod.rs b/src/de/mod.rs
index 424155eb..71ba50c7 100644
--- a/src/de/mod.rs
+++ b/src/de/mod.rs
@@ -226,7 +226,11 @@ use crate::{
};
use serde::de::{self, Deserialize, DeserializeOwned, Visitor};
use std::borrow::Cow;
+#[cfg(feature = "overlapped-lists")]
+use std::collections::VecDeque;
use std::io::BufRead;
+#[cfg(feature = "overlapped-lists")]
+use std::num::NonZeroUsize;
pub(crate) const INNER_VALUE: &str = "$value";
pub(crate) const UNFLATTEN_PREFIX: &str = "$unflatten=";
@@ -248,21 +252,42 @@ pub enum DeEvent<'a> {
Eof,
}
-/// An xml deserializer
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+/// A structure that deserializes XML into Rust values.
pub struct Deserializer<'de, R>
where
R: XmlRead<'de>,
{
+ /// An XML reader that streams events into this deserializer
reader: R,
- peek: Option>,
- /// Special sing that deserialized struct have a field with the special
- /// name (see constant `INNER_VALUE`). That field should be deserialized
- /// from the text content of the XML node:
+
+ /// When deserializing sequences sometimes we have to skip unwanted events.
+ /// That events should be stored and then replayed. This is a replay buffer,
+ /// that streams events while not empty. When it exhausted, events will
+ /// requested from [`Self::reader`].
+ #[cfg(feature = "overlapped-lists")]
+ read: VecDeque>,
+ /// When deserializing sequences sometimes we have to skip events, because XML
+ /// is tolerant to elements order and even if in the XSD order is strictly
+ /// specified (using `xs:sequence`) most of XML parsers allows order violations.
+ /// That means, that elements, forming a sequence, could be overlapped with
+ /// other elements, do not related to that sequence.
///
- /// ```xml
- /// value for INNER_VALUE field
- /// ```
- has_value_field: bool,
+ /// In order to support this, deserializer will scan events and skip unwanted
+ /// events, store them here. After call [`Self::start_replay()`] all events
+ /// moved from this to [`Self::read`].
+ #[cfg(feature = "overlapped-lists")]
+ write: VecDeque>,
+ /// Maximum number of events that can be skipped when processing sequences
+ /// that occur out-of-order. This field is used to prevent potential
+ /// denial-of-service (DoS) attacks which could cause infinite memory
+ /// consumption when parsing a very large amount of XML into a sequence field.
+ #[cfg(feature = "overlapped-lists")]
+ limit: Option,
+
+ #[cfg(not(feature = "overlapped-lists"))]
+ peek: Option>,
}
/// Deserialize an instance of type `T` from a string of XML text.
@@ -353,8 +378,16 @@ where
pub fn new(reader: R) -> Self {
Deserializer {
reader,
+
+ #[cfg(feature = "overlapped-lists")]
+ read: VecDeque::new(),
+ #[cfg(feature = "overlapped-lists")]
+ write: VecDeque::new(),
+ #[cfg(feature = "overlapped-lists")]
+ limit: None,
+
+ #[cfg(not(feature = "overlapped-lists"))]
peek: None,
- has_value_field: false,
}
}
@@ -364,6 +397,85 @@ where
Self::new(reader)
}
+ /// Set the maximum number of events that could be skipped during deserialization
+ /// of sequences.
+ ///
+ /// If `` contains more than specified nested elements, `#text` or
+ /// CDATA nodes, then [`DeError::TooManyEvents`] will be returned during
+ /// deserialization of sequence field (any type that uses [`deserialize_seq`]
+ /// for the deserialization, for example, `Vec`).
+ ///
+ /// This method can be used to prevent a [DoS] attack and infinite memory
+ /// consumption when parsing a very large XML to a sequence field.
+ ///
+ /// It is strongly recommended to set limit to some value when you parse data
+ /// from untrusted sources. You should choose a value that your typical XMLs
+ /// can have _between_ different elements that corresponds to the same sequence.
+ ///
+ /// # Examples
+ ///
+ /// Let's imagine, that we deserialize such structure:
+ /// ```
+ /// struct List {
+ /// item: Vec<()>,
+ /// }
+ /// ```
+ ///
+ /// The XML that we try to parse look like this:
+ /// ```xml
+ ///
+ ///
+ ///
+ ///
+ /// with text
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ /// ```
+ ///
+ /// There, when we deserialize the `item` field, we need to buffer 7 events,
+ /// before we can deserialize the second ` `:
+ ///
+ /// - ``
+ /// - ``
+ /// - `#text(with text)`
+ /// - ` `
+ /// - ` ` (virtual start event)
+ /// - ` ` (vitrual end event)
+ /// - ` `
+ ///
+ /// Note, that ` ` internally represented as 2 events:
+ /// one for the start tag and one for the end tag. In the future this can be
+ /// eliminated, but for now we use [auto-expanding feature] of a reader,
+ /// because this simplifies deserializer code.
+ ///
+ /// [`deserialize_seq`]: serde::Deserializer::deserialize_seq
+ /// [DoS]: https://en.wikipedia.org/wiki/Denial-of-service_attack
+ /// [auto-expanding feature]: Reader::expand_empty_elements
+ #[cfg(feature = "overlapped-lists")]
+ pub fn event_buffer_size(&mut self, limit: Option) -> &mut Self {
+ self.limit = limit;
+ self
+ }
+
+ #[cfg(feature = "overlapped-lists")]
+ fn peek(&mut self) -> Result<&DeEvent<'de>, DeError> {
+ if self.read.is_empty() {
+ self.read.push_front(self.reader.next()?);
+ }
+ if let Some(event) = self.read.front() {
+ return Ok(&event);
+ }
+ // SAFETY: `self.read` was filled in the code above.
+ // NOTE: Can be replaced with `unsafe { std::hint::unreachable_unchecked() }`
+ // if unsafe code will be allowed
+ unreachable!()
+ }
+ #[cfg(not(feature = "overlapped-lists"))]
fn peek(&mut self) -> Result<&DeEvent<'de>, DeError> {
if self.peek.is_none() {
self.peek = Some(self.reader.next()?);
@@ -379,12 +491,81 @@ where
}
fn next(&mut self) -> Result, DeError> {
+ // Replay skipped or peeked events
+ #[cfg(feature = "overlapped-lists")]
+ if let Some(event) = self.read.pop_front() {
+ return Ok(event);
+ }
+ #[cfg(not(feature = "overlapped-lists"))]
if let Some(e) = self.peek.take() {
return Ok(e);
}
self.reader.next()
}
+ /// Extracts XML tree of events from and stores them in the skipped events
+ /// buffer from which they can be retrieved later. You MUST call
+ /// [`Self::start_replay()`] after calling this to give access to the skipped
+ /// events and release internal buffers.
+ #[cfg(feature = "overlapped-lists")]
+ fn skip(&mut self) -> Result<(), DeError> {
+ let event = self.next()?;
+ self.skip_event(event)?;
+ match self.write.back() {
+ // Skip all subtree, if we skip a start event
+ Some(DeEvent::Start(e)) => {
+ let end = e.name().to_owned();
+ let mut depth = 0;
+ loop {
+ let event = self.next()?;
+ match event {
+ DeEvent::Start(ref e) if e.name() == end => {
+ self.skip_event(event)?;
+ depth += 1;
+ }
+ DeEvent::End(ref e) if e.name() == end => {
+ self.skip_event(event)?;
+ if depth == 0 {
+ return Ok(());
+ }
+ depth -= 1;
+ }
+ _ => self.skip_event(event)?,
+ }
+ }
+ }
+ _ => Ok(()),
+ }
+ }
+
+ #[cfg(feature = "overlapped-lists")]
+ #[inline]
+ fn skip_event(&mut self, event: DeEvent<'de>) -> Result<(), DeError> {
+ if let Some(max) = self.limit {
+ if self.write.len() >= max.get() {
+ return Err(DeError::TooManyEvents(max));
+ }
+ }
+ self.write.push_back(event);
+ Ok(())
+ }
+
+ /// Moves all buffered events to the end of [`Self::write`] buffer and swaps
+ /// read and write buffers.
+ ///
+ /// After calling this method, [`Self::peek()`] and [`Self::next()`] starts
+ /// return events that was skipped previously by calling [`Self::skip()`],
+ /// and only when all that events will be consumed, the deserializer starts
+ /// to drain events from underlying reader.
+ ///
+ /// This method MUST be called if any number of [`Self::skip()`] was called
+ /// after [`Self::new()`] or `start_replay()` or you'll lost events.
+ #[cfg(feature = "overlapped-lists")]
+ fn start_replay(&mut self) {
+ self.write.append(&mut self.read);
+ std::mem::swap(&mut self.read, &mut self.write);
+ }
+
fn next_start(&mut self) -> Result>, DeError> {
loop {
let e = self.next()?;
@@ -474,6 +655,33 @@ where
self.reader.decoder()
}
+ /// Drops all events until event with [name](BytesEnd::name()) `name` won't be
+ /// dropped. This method should be called after [`Self::next()`]
+ #[cfg(feature = "overlapped-lists")]
+ fn read_to_end(&mut self, name: &[u8]) -> Result<(), DeError> {
+ let mut depth = 0;
+ loop {
+ match self.read.pop_front() {
+ Some(DeEvent::Start(e)) if e.name() == name => {
+ depth += 1;
+ }
+ Some(DeEvent::End(e)) if e.name() == name => {
+ if depth == 0 {
+ return Ok(());
+ }
+ depth -= 1;
+ }
+
+ // Drop all other skipped events
+ Some(_) => continue,
+
+ // If we do not have skipped events, use effective reading that will
+ // not allocate memory for events
+ None => return self.reader.read_to_end(name),
+ }
+ }
+ }
+ #[cfg(not(feature = "overlapped-lists"))]
fn read_to_end(&mut self, name: &[u8]) -> Result<(), DeError> {
// First one might be in self.peek
match self.next()? {
@@ -544,10 +752,8 @@ where
// Try to go to the next `... ` or ` `
if let Some(e) = self.next_start()? {
let name = e.name().to_vec();
- self.has_value_field = fields.contains(&INNER_VALUE);
let map = map::MapAccess::new(self, e, fields)?;
let value = visitor.visit_map(map)?;
- self.has_value_field = false;
self.read_to_end(&name)?;
Ok(value)
} else {
@@ -649,7 +855,10 @@ where
where
V: Visitor<'de>,
{
- visitor.visit_seq(seq::SeqAccess::new(self)?)
+ let seq = visitor.visit_seq(seq::TopLevelSeqAccess::new(self)?);
+ #[cfg(feature = "overlapped-lists")]
+ self.start_replay();
+ seq
}
fn deserialize_map(self, visitor: V) -> Result
@@ -804,6 +1013,283 @@ mod tests {
use super::*;
use pretty_assertions::assert_eq;
+ #[cfg(feature = "overlapped-lists")]
+ mod skip {
+ use super::*;
+ use crate::de::DeEvent::*;
+ use crate::events::{BytesEnd, BytesText};
+ use pretty_assertions::assert_eq;
+
+ /// Checks that `peek()` and `read()` behaves correctly after `skip()`
+ #[test]
+ fn read_and_peek() {
+ let mut de = Deserializer::from_slice(
+ br#"
+
+
+ text
+
+
+
+
+
+ "#,
+ );
+
+ // Initial conditions - both are empty
+ assert_eq!(de.read, vec![]);
+ assert_eq!(de.write, vec![]);
+
+ assert_eq!(
+ de.next().unwrap(),
+ Start(BytesStart::borrowed_name(b"root"))
+ );
+ assert_eq!(
+ de.peek().unwrap(),
+ &Start(BytesStart::borrowed_name(b"inner"))
+ );
+
+ // Should skip first tree
+ de.skip().unwrap();
+ assert_eq!(de.read, vec![]);
+ assert_eq!(
+ de.write,
+ vec![
+ Start(BytesStart::borrowed_name(b"inner")),
+ Text(BytesText::from_escaped_str("text")),
+ Start(BytesStart::borrowed_name(b"inner")),
+ End(BytesEnd::borrowed(b"inner")),
+ End(BytesEnd::borrowed(b"inner")),
+ ]
+ );
+
+ // Consume . Now unconsumed XML looks like:
+ //
+ //
+ // text
+ //
+ //
+ //
+ //
+ assert_eq!(
+ de.next().unwrap(),
+ Start(BytesStart::borrowed_name(b"next"))
+ );
+ assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed(b"next")));
+
+ // We finish writing. Next call to `next()` should start replay that messages:
+ //
+ //
+ // text
+ //
+ //
+ //
+ // and after that stream that messages:
+ //
+ //
+ //
+ de.start_replay();
+ assert_eq!(
+ de.read,
+ vec![
+ Start(BytesStart::borrowed_name(b"inner")),
+ Text(BytesText::from_escaped_str("text")),
+ Start(BytesStart::borrowed_name(b"inner")),
+ End(BytesEnd::borrowed(b"inner")),
+ End(BytesEnd::borrowed(b"inner")),
+ ]
+ );
+ assert_eq!(de.write, vec![]);
+ assert_eq!(
+ de.next().unwrap(),
+ Start(BytesStart::borrowed_name(b"inner"))
+ );
+
+ // Skip `#text` node and consume after it
+ de.skip().unwrap();
+ assert_eq!(
+ de.read,
+ vec![
+ Start(BytesStart::borrowed_name(b"inner")),
+ End(BytesEnd::borrowed(b"inner")),
+ End(BytesEnd::borrowed(b"inner")),
+ ]
+ );
+ assert_eq!(
+ de.write,
+ vec![
+ // This comment here to keep the same formatting of both arrays
+ // otherwise rustfmt suggest one-line it
+ Text(BytesText::from_escaped_str("text")),
+ ]
+ );
+
+ assert_eq!(
+ de.next().unwrap(),
+ Start(BytesStart::borrowed_name(b"inner"))
+ );
+ assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed(b"inner")));
+
+ // We finish writing. Next call to `next()` should start replay messages:
+ //
+ // text
+ //
+ //
+ // and after that stream that messages:
+ //
+ //
+ //
+ de.start_replay();
+ assert_eq!(
+ de.read,
+ vec![
+ Text(BytesText::from_escaped_str("text")),
+ End(BytesEnd::borrowed(b"inner")),
+ ]
+ );
+ assert_eq!(de.write, vec![]);
+ assert_eq!(
+ de.next().unwrap(),
+ Text(BytesText::from_escaped_str("text"))
+ );
+ assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed(b"inner")));
+ assert_eq!(
+ de.next().unwrap(),
+ Start(BytesStart::borrowed_name(b"target"))
+ );
+ assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed(b"target")));
+ assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed(b"root")));
+ }
+
+ /// Checks that `read_to_end()` behaves correctly after `skip()`
+ #[test]
+ fn read_to_end() {
+ let mut de = Deserializer::from_slice(
+ br#"
+
+
+ text
+
+
+
+
+
+
+ "#,
+ );
+
+ // Initial conditions - both are empty
+ assert_eq!(de.read, vec![]);
+ assert_eq!(de.write, vec![]);
+
+ assert_eq!(
+ de.next().unwrap(),
+ Start(BytesStart::borrowed_name(b"root"))
+ );
+
+ // Skip the tree
+ de.skip().unwrap();
+ assert_eq!(de.read, vec![]);
+ assert_eq!(
+ de.write,
+ vec![
+ Start(BytesStart::borrowed_name(b"skip")),
+ Text(BytesText::from_escaped_str("text")),
+ Start(BytesStart::borrowed_name(b"skip")),
+ End(BytesEnd::borrowed(b"skip")),
+ End(BytesEnd::borrowed(b"skip")),
+ ]
+ );
+
+ // Drop all events thet represents tree. Now unconsumed XML looks like:
+ //
+ //
+ // text
+ //
+ //
+ //
+ assert_eq!(
+ de.next().unwrap(),
+ Start(BytesStart::borrowed_name(b"target"))
+ );
+ de.read_to_end(b"target").unwrap();
+ assert_eq!(de.read, vec![]);
+ assert_eq!(
+ de.write,
+ vec![
+ Start(BytesStart::borrowed_name(b"skip")),
+ Text(BytesText::from_escaped_str("text")),
+ Start(BytesStart::borrowed_name(b"skip")),
+ End(BytesEnd::borrowed(b"skip")),
+ End(BytesEnd::borrowed(b"skip")),
+ ]
+ );
+
+ // We finish writing. Next call to `next()` should start replay that messages:
+ //
+ //
+ // text
+ //
+ //
+ //
+ // and after that stream that messages:
+ //
+ //
+ de.start_replay();
+ assert_eq!(
+ de.read,
+ vec![
+ Start(BytesStart::borrowed_name(b"skip")),
+ Text(BytesText::from_escaped_str("text")),
+ Start(BytesStart::borrowed_name(b"skip")),
+ End(BytesEnd::borrowed(b"skip")),
+ End(BytesEnd::borrowed(b"skip")),
+ ]
+ );
+ assert_eq!(de.write, vec![]);
+
+ assert_eq!(
+ de.next().unwrap(),
+ Start(BytesStart::borrowed_name(b"skip"))
+ );
+ de.read_to_end(b"skip").unwrap();
+
+ assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed(b"root")));
+ }
+
+ /// Checks that limiting buffer size works correctly
+ #[test]
+ fn limit() {
+ use serde::Deserialize;
+
+ #[derive(Debug, Deserialize)]
+ #[allow(unused)]
+ struct List {
+ item: Vec<()>,
+ }
+
+ let mut de = Deserializer::from_slice(
+ br#"
+
+
+
+ with text
+
+
+
+
+
+ "#,
+ );
+ de.event_buffer_size(NonZeroUsize::new(3));
+
+ match List::deserialize(&mut de) {
+ Err(DeError::TooManyEvents(count)) => assert_eq!(count.get(), 3),
+ e => panic!("Expected `Err(TooManyEvents(3))`, but found {:?}", e),
+ }
+ }
+ }
+
#[test]
fn read_to_end() {
use crate::de::DeEvent::*;
diff --git a/src/de/seq.rs b/src/de/seq.rs
index daeac2de..a1a5b9de 100644
--- a/src/de/seq.rs
+++ b/src/de/seq.rs
@@ -1,51 +1,112 @@
use crate::de::{DeError, DeEvent, Deserializer, XmlRead};
use crate::events::BytesStart;
-use serde::de::{self, DeserializeSeed};
+use crate::reader::Decoder;
+use serde::de::{DeserializeSeed, SeqAccess};
+#[cfg(not(feature = "encoding"))]
+use std::borrow::Cow;
+/// Check if tag `start` is included in the `fields` list. `decoder` is used to
+/// get a string representation of a tag.
+///
+/// Returns `true`, if `start` is not in the `fields` list and `false` otherwise.
+pub fn not_in(
+ fields: &'static [&'static str],
+ start: &BytesStart,
+ decoder: Decoder,
+) -> Result {
+ #[cfg(not(feature = "encoding"))]
+ let tag = Cow::Borrowed(decoder.decode(start.name())?);
+
+ #[cfg(feature = "encoding")]
+ let tag = decoder.decode(start.name());
+
+ Ok(fields.iter().all(|&field| field != tag.as_ref()))
+}
+
+/// A filter that determines, what tags should form a sequence.
+///
+/// There are two types of sequences:
+/// - sequence where each element represented by tags with the same name
+/// - sequence where each element can have a different tag
+///
+/// The first variant could represent a collection of structs, the second --
+/// a collection of enum variants.
+///
+/// In the second case we don't know what tag name should be expected as a
+/// sequence element, so we accept any element. Since the sequence are flattened
+/// into maps, we skip elements which have dedicated fields in a struct by using an
+/// `Exclude` filter that filters out elements with names matching field names
+/// from the struct.
+///
+/// # Lifetimes
+///
+/// `'de` represents a lifetime of the XML input, when filter stores the
+/// dedicated tag name
#[derive(Debug)]
-enum Names {
- Unknown,
- Peek(Vec),
+pub enum TagFilter<'de> {
+ /// A `SeqAccess` interested only in tags with specified name to deserialize
+ /// an XML like this:
+ ///
+ /// ```xml
+ /// <...>
+ ///
+ ///
+ ///
+ /// ...
+ ///
+ /// ```
+ ///
+ /// The tag name is stored inside (`b"tag"` for that example)
+ Include(BytesStart<'de>), //TODO: Need to store only name instead of a whole tag
+ /// A `SeqAccess` interested in tags with any name, except explicitly listed.
+ /// Excluded tags are used as struct field names and therefore should not
+ /// fall into a `$value` category
+ Exclude(&'static [&'static str]),
}
-impl Names {
- fn is_valid(&self, start: &BytesStart) -> bool {
+impl<'de> TagFilter<'de> {
+ pub fn is_suitable(&self, start: &BytesStart, decoder: Decoder) -> Result {
match self {
- Names::Unknown => true,
- Names::Peek(n) => n == start.name(),
+ Self::Include(n) => Ok(n.name() == start.name()),
+ Self::Exclude(fields) => not_in(fields, start, decoder),
}
}
}
/// A SeqAccess
-pub struct SeqAccess<'de, 'a, R>
+pub struct TopLevelSeqAccess<'de, 'a, R>
where
R: XmlRead<'de>,
{
+ /// Deserializer used to deserialize sequence items
de: &'a mut Deserializer<'de, R>,
- names: Names,
+ /// Filter that determines whether a tag is a part of this sequence.
+ ///
+ /// When feature `overlapped-lists` is not activated, iteration will stop
+ /// when found a tag that does not pass this filter.
+ ///
+ /// When feature `overlapped-lists` is activated, all tags, that not pass
+ /// this check, will be skipped.
+ filter: TagFilter<'de>,
}
-impl<'a, 'de, R> SeqAccess<'de, 'a, R>
+impl<'a, 'de, R> TopLevelSeqAccess<'de, 'a, R>
where
R: XmlRead<'de>,
{
- /// Get a new SeqAccess
+ /// Creates a new accessor to a top-level sequence of XML elements.
pub fn new(de: &'a mut Deserializer<'de, R>) -> Result {
- let names = if de.has_value_field {
- Names::Unknown
+ let filter = if let DeEvent::Start(e) = de.peek()? {
+ // Clone is cheap if event borrows from the input
+ TagFilter::Include(e.clone())
} else {
- if let DeEvent::Start(e) = de.peek()? {
- Names::Peek(e.name().to_vec())
- } else {
- Names::Unknown
- }
+ TagFilter::Exclude(&[])
};
- Ok(SeqAccess { de, names })
+ Ok(Self { de, filter })
}
}
-impl<'de, 'a, R> de::SeqAccess<'de> for SeqAccess<'de, 'a, R>
+impl<'de, 'a, R> SeqAccess<'de> for TopLevelSeqAccess<'de, 'a, R>
where
R: XmlRead<'de>,
{
@@ -55,10 +116,39 @@ where
where
T: DeserializeSeed<'de>,
{
- match self.de.peek()? {
- DeEvent::Eof | DeEvent::End(_) => Ok(None),
- DeEvent::Start(e) if !self.names.is_valid(e) => Ok(None),
- _ => seed.deserialize(&mut *self.de).map(Some),
+ let decoder = self.de.reader.decoder();
+ loop {
+ break match self.de.peek()? {
+ // If we see a tag that we not interested, skip it
+ #[cfg(feature = "overlapped-lists")]
+ DeEvent::Start(e) if !self.filter.is_suitable(e, decoder)? => {
+ self.de.skip()?;
+ continue;
+ }
+ // Stop iteration when list elements ends
+ #[cfg(not(feature = "overlapped-lists"))]
+ DeEvent::Start(e) if !self.filter.is_suitable(e, decoder)? => Ok(None),
+ DeEvent::End(_) => Ok(None),
+ DeEvent::Eof => Ok(None),
+
+ // Start(tag), Text, CData
+ _ => seed.deserialize(&mut *self.de).map(Some),
+ };
}
}
}
+
+#[test]
+fn test_not_in() {
+ let tag = BytesStart::borrowed_name(b"tag");
+
+ assert_eq!(not_in(&[], &tag, Decoder::utf8()).unwrap(), true);
+ assert_eq!(
+ not_in(&["no", "such", "tags"], &tag, Decoder::utf8()).unwrap(),
+ true
+ );
+ assert_eq!(
+ not_in(&["some", "tag", "included"], &tag, Decoder::utf8()).unwrap(),
+ false
+ );
+}
diff --git a/src/errors.rs b/src/errors.rs
index cc92c7d0..54a87205 100644
--- a/src/errors.rs
+++ b/src/errors.rs
@@ -116,6 +116,8 @@ pub mod serialize {
use super::*;
use crate::utils::write_byte_string;
use std::fmt;
+ #[cfg(feature = "overlapped-lists")]
+ use std::num::NonZeroUsize;
use std::num::{ParseFloatError, ParseIntError};
/// (De)serialization error
@@ -159,6 +161,10 @@ pub mod serialize {
ExpectedStart,
/// Unsupported operation
Unsupported(&'static str),
+ /// Too many events were skipped while deserializing a sequence, event limit
+ /// exceeded. The limit was provided as an argument
+ #[cfg(feature = "overlapped-lists")]
+ TooManyEvents(NonZeroUsize),
}
impl fmt::Display for DeError {
@@ -183,6 +189,8 @@ pub mod serialize {
DeError::UnexpectedEof => write!(f, "Unexpected `Event::Eof`"),
DeError::ExpectedStart => write!(f, "Expecting `Event::Start`"),
DeError::Unsupported(s) => write!(f, "Unsupported operation {}", s),
+ #[cfg(feature = "overlapped-lists")]
+ DeError::TooManyEvents(s) => write!(f, "Deserializer buffers {} events, limit exceeded", s),
}
}
}
diff --git a/src/reader.rs b/src/reader.rs
index 17b4f344..271c0741 100644
--- a/src/reader.rs
+++ b/src/reader.rs
@@ -1669,6 +1669,23 @@ impl Decoder {
}
}
+/// This implementation is required for tests of other parts of the library
+#[cfg(test)]
+#[cfg(feature = "serialize")]
+impl Decoder {
+ #[cfg(not(feature = "encoding"))]
+ pub(crate) fn utf8() -> Self {
+ Decoder
+ }
+
+ #[cfg(feature = "encoding")]
+ pub(crate) fn utf8() -> Self {
+ Decoder {
+ encoding: encoding_rs::UTF_8,
+ }
+ }
+}
+
#[cfg(test)]
mod test {
macro_rules! check {
diff --git a/tests/serde-de.rs b/tests/serde-de.rs
index 28f6dba5..1b100ff1 100644
--- a/tests/serde-de.rs
+++ b/tests/serde-de.rs
@@ -512,6 +512,2530 @@ mod tuple_struct {
}
}
+mod seq {
+ use super::*;
+
+ /// Check that top-level sequences can be deserialized from the multi-root XML documents
+ mod top_level {
+ use super::*;
+ use pretty_assertions::assert_eq;
+
+ #[test]
+ fn simple() {
+ from_str::<[(); 3]>("42 answer ").unwrap();
+
+ let data: Vec<()> = from_str("42 answer ").unwrap();
+ assert_eq!(data, vec![(), (), ()]);
+ }
+
+ /// Special case: empty sequence
+ #[test]
+ fn empty() {
+ from_str::<[(); 0]>("").unwrap();
+
+ let data: Vec<()> = from_str("").unwrap();
+ assert_eq!(data, vec![]);
+ }
+
+ /// Special case: one-element sequence
+ #[test]
+ fn one_element() {
+ from_str::<[(); 1]>(" ").unwrap();
+ from_str::<[(); 1]>("42 ").unwrap();
+ from_str::<[(); 1]>("text").unwrap();
+ from_str::<[(); 1]>("").unwrap();
+
+ let data: Vec<()> = from_str(" ").unwrap();
+ assert_eq!(data, vec![()]);
+
+ let data: Vec<()> = from_str("42 ").unwrap();
+ assert_eq!(data, vec![()]);
+
+ let data: Vec<()> = from_str("text").unwrap();
+ assert_eq!(data, vec![()]);
+
+ let data: Vec<()> = from_str("").unwrap();
+ assert_eq!(data, vec![()]);
+ }
+
+ #[test]
+ fn excess_attribute() {
+ from_str::<[(); 3]>(r#"42 answer "#)
+ .unwrap();
+
+ let data: Vec<()> =
+ from_str(r#"42 answer "#)
+ .unwrap();
+ assert_eq!(data, vec![(), (), ()]);
+ }
+
+ #[test]
+ fn mixed_content() {
+ from_str::<[(); 3]>(
+ r#"
+
+ text
+
+ "#,
+ )
+ .unwrap();
+
+ let data: Vec<()> = from_str(
+ r#"
+
+ text
+
+ "#,
+ )
+ .unwrap();
+ assert_eq!(data, vec![(), (), ()]);
+ }
+ }
+
+ /// Tests where each sequence item have an identical name in an XML.
+ /// That explicitly means that `enum`s as list elements are not supported
+ /// in that case, because enum requires different tags.
+ ///
+ /// (by `enums` we mean [externally tagged enums] is serde terminology)
+ ///
+ /// [externally tagged enums]: https://serde.rs/enum-representations.html#externally-tagged
+ mod fixed_name {
+ use super::*;
+
+ /// This module contains tests where size of the list have a compile-time size
+ mod fixed_size {
+ use super::*;
+ use pretty_assertions::assert_eq;
+
+ #[derive(Debug, PartialEq, Deserialize)]
+ struct List {
+ item: [(); 3],
+ }
+
+ /// Simple case: count of elements matches expected size of sequence,
+ /// each element has the same name. Successful deserialization expected
+ #[test]
+ fn simple() {
+ from_str::(
+ r#"
+
+
+
+
+
+ "#,
+ )
+ .unwrap();
+ }
+
+ /// Special case: empty sequence
+ #[test]
+ #[ignore = "it is impossible to distinguish between missed field and empty list: use `Option<>` or #[serde(default)]"]
+ fn empty() {
+ #[derive(Debug, PartialEq, Deserialize)]
+ struct List {
+ item: [(); 0],
+ }
+
+ from_str::(r#" "#).unwrap();
+ from_str::(r#" "#).unwrap();
+ }
+
+ /// Special case: one-element sequence
+ #[test]
+ fn one_element() {
+ #[derive(Debug, PartialEq, Deserialize)]
+ struct List {
+ item: [(); 1],
+ }
+
+ from_str::(
+ r#"
+
+
+
+ "#,
+ )
+ .unwrap();
+ }
+
+ /// Fever elements than expected size of sequence, each element has
+ /// the same name. Failure expected
+ #[test]
+ fn fever_elements() {
+ let data = from_str::(
+ r#"
+
+
+
+
+ "#,
+ );
+
+ match data {
+ Err(DeError::Custom(e)) => {
+ assert_eq!(e, "invalid length 2, expected an array of length 3")
+ }
+ e => panic!(
+ r#"Expected `Err(Custom("invalid length 2, expected an array of length 3"))`, but found {:?}"#,
+ e
+ ),
+ }
+ }
+
+ /// More elements than expected size of sequence, each element has
+ /// the same name. Failure expected. If you wish to ignore excess
+ /// elements, use the special type, that consume as much elements
+ /// as possible, but ignores excess elements
+ #[test]
+ fn excess_elements() {
+ let data = from_str::(
+ r#"
+
+
+
+
+
+
+ "#,
+ );
+
+ match data {
+ Err(DeError::Custom(e)) => assert_eq!(e, "duplicate field `item`"),
+ e => panic!(
+ r#"Expected `Err(Custom("duplicate field `item`"))`, but found {:?}"#,
+ e
+ ),
+ }
+ }
+
+ /// Mixed content assumes, that some elements will have an internal
+ /// name `$value`, so, unless field named the same, it is expected
+ /// to fail
+ #[test]
+ fn mixed_content() {
+ let data = from_str::(
+ r#"
+
+
+ text
+
+
+ "#,
+ );
+
+ match data {
+ Err(DeError::Custom(e)) => assert_eq!(e, "missing field `item`"),
+ e => panic!(
+ r#"Expected `Err(Custom("missing field `item`"))`, but found {:?}"#,
+ e
+ ),
+ }
+ }
+
+ /// In those tests sequence should be deserialized from an XML
+ /// with additional elements that is not defined in the struct.
+ /// That fields should be skipped during deserialization
+ mod unknown_items {
+ use super::*;
+ #[cfg(not(feature = "overlapped-lists"))]
+ use pretty_assertions::assert_eq;
+
+ #[test]
+ fn before() {
+ from_str::(
+ r#"
+
+
+
+
+
+
+ "#,
+ )
+ .unwrap();
+ }
+
+ #[test]
+ fn after() {
+ from_str::(
+ r#"
+
+
+
+
+
+
+ "#,
+ )
+ .unwrap();
+ }
+
+ #[test]
+ fn overlapped() {
+ let data = from_str::(
+ r#"
+
+
+
+
+
+
+ "#,
+ );
+
+ #[cfg(feature = "overlapped-lists")]
+ data.unwrap();
+
+ #[cfg(not(feature = "overlapped-lists"))]
+ match data {
+ Err(DeError::Custom(e)) => {
+ assert_eq!(e, "invalid length 1, expected an array of length 3")
+ }
+ e => panic!(
+ r#"Expected Err(Custom("invalid length 1, expected an array of length 3")), got {:?}"#,
+ e
+ ),
+ }
+ }
+ }
+
+ /// In those tests non-sequential field is defined in the struct
+ /// before sequential, so it will be deserialized before the list.
+ /// That struct should be deserialized from an XML where these
+ /// fields comes in an arbitrary order
+ mod field_before_list {
+ use super::*;
+ #[cfg(not(feature = "overlapped-lists"))]
+ use pretty_assertions::assert_eq;
+
+ #[derive(Debug, PartialEq, Deserialize)]
+ struct Root {
+ node: (),
+ item: [(); 3],
+ }
+
+ #[test]
+ fn before() {
+ from_str::(
+ r#"
+
+
+
+
+
+
+ "#,
+ )
+ .unwrap();
+ }
+
+ #[test]
+ fn after() {
+ from_str::(
+ r#"
+
+
+
+
+
+
+ "#,
+ )
+ .unwrap();
+ }
+
+ #[test]
+ fn overlapped() {
+ let data = from_str::(
+ r#"
+
+
+
+
+
+
+ "#,
+ );
+
+ #[cfg(feature = "overlapped-lists")]
+ data.unwrap();
+
+ #[cfg(not(feature = "overlapped-lists"))]
+ match data {
+ Err(DeError::Custom(e)) => {
+ assert_eq!(e, "invalid length 1, expected an array of length 3")
+ }
+ e => panic!(
+ r#"Expected Err(Custom("invalid length 1, expected an array of length 3")), got {:?}"#,
+ e
+ ),
+ }
+ }
+ }
+
+ /// In those tests non-sequential field is defined in the struct
+ /// after sequential, so it will be deserialized after the list.
+ /// That struct should be deserialized from an XML where these
+ /// fields comes in an arbitrary order
+ mod field_after_list {
+ use super::*;
+ #[cfg(not(feature = "overlapped-lists"))]
+ use pretty_assertions::assert_eq;
+
+ #[derive(Debug, PartialEq, Deserialize)]
+ struct Root {
+ item: [(); 3],
+ node: (),
+ }
+
+ #[test]
+ fn before() {
+ from_str::(
+ r#"
+
+
+
+
+
+
+ "#,
+ )
+ .unwrap();
+ }
+
+ #[test]
+ fn after() {
+ from_str::(
+ r#"
+
+
+
+
+
+
+ "#,
+ )
+ .unwrap();
+ }
+
+ #[test]
+ fn overlapped() {
+ let data = from_str::(
+ r#"
+
+
+
+
+
+
+ "#,
+ );
+
+ #[cfg(feature = "overlapped-lists")]
+ data.unwrap();
+
+ #[cfg(not(feature = "overlapped-lists"))]
+ match data {
+ Err(DeError::Custom(e)) => {
+ assert_eq!(e, "invalid length 1, expected an array of length 3")
+ }
+ e => panic!(
+ r#"Expected Err(Custom("invalid length 1, expected an array of length 3")), got {:?}"#,
+ e
+ ),
+ }
+ }
+ }
+
+ /// In those tests two lists are deserialized simultaneously.
+ /// Lists should be deserialized even when them overlaps
+ mod two_lists {
+ use super::*;
+ #[cfg(not(feature = "overlapped-lists"))]
+ use pretty_assertions::assert_eq;
+
+ #[derive(Debug, PartialEq, Deserialize)]
+ struct Pair {
+ item: [(); 3],
+ element: [(); 2],
+ }
+
+ #[test]
+ fn splitted() {
+ from_str::(
+ r#"
+
+
+
+
+
+
+
+ "#,
+ )
+ .unwrap();
+ }
+
+ #[test]
+ fn overlapped() {
+ let data = from_str::(
+ r#"
+
+
+
+
+
+
+
+ "#,
+ );
+
+ #[cfg(feature = "overlapped-lists")]
+ data.unwrap();
+
+ #[cfg(not(feature = "overlapped-lists"))]
+ match data {
+ Err(DeError::Custom(e)) => {
+ assert_eq!(e, "invalid length 1, expected an array of length 3")
+ }
+ e => panic!(
+ r#"Expected Err(Custom("invalid length 1, expected an array of length 3")), got {:?}"#,
+ e
+ ),
+ }
+ }
+ }
+
+ /// Deserialization of primitives slightly differs from deserialization
+ /// of complex types, so need to check this separately
+ #[test]
+ fn primitives() {
+ #[derive(Debug, PartialEq, Deserialize)]
+ struct List {
+ item: [usize; 3],
+ }
+
+ let data: List = from_str(
+ r#"
+
+ - 41
+ - 42
+ - 43
+
+ "#,
+ )
+ .unwrap();
+ assert_eq!(data, List { item: [41, 42, 43] });
+
+ from_str::(
+ r#"
+
+ - 41
+ - 42
+ - 43
+
+ "#,
+ )
+ .unwrap_err();
+ }
+ }
+
+ /// This module contains tests where size of the list have an unspecified size
+ mod variable_size {
+ use super::*;
+ use pretty_assertions::assert_eq;
+
+ #[derive(Debug, PartialEq, Deserialize)]
+ struct List {
+ item: Vec<()>,
+ }
+
+ /// Simple case: count of elements matches expected size of sequence,
+ /// each element has the same name. Successful deserialization expected
+ #[test]
+ fn simple() {
+ let data: List = from_str(
+ r#"
+
+
+
+
+
+ "#,
+ )
+ .unwrap();
+
+ assert_eq!(
+ data,
+ List {
+ item: vec![(), (), ()],
+ }
+ );
+ }
+
+ /// Special case: empty sequence
+ #[test]
+ #[ignore = "it is impossible to distinguish between missed field and empty list: use `Option<>` or #[serde(default)]"]
+ fn empty() {
+ let data: List = from_str(r#" "#).unwrap();
+ assert_eq!(data, List { item: vec![] });
+
+ let data: List = from_str(r#" "#).unwrap();
+ assert_eq!(data, List { item: vec![] });
+ }
+
+ /// Special case: one-element sequence
+ #[test]
+ fn one_element() {
+ let data: List = from_str(
+ r#"
+
+
+
+ "#,
+ )
+ .unwrap();
+
+ assert_eq!(data, List { item: vec![()] });
+ }
+
+ /// Mixed content assumes, that some elements will have an internal
+ /// name `$value`, so, unless field named the same, it is expected
+ /// to fail
+ #[test]
+ fn mixed_content() {
+ let data = from_str::(
+ r#"
+
+
+ text
+
+
+ "#,
+ );
+
+ match data {
+ Err(DeError::Custom(e)) => assert_eq!(e, "missing field `item`"),
+ e => panic!(
+ r#"Expected `Err(Custom("missing field `item`"))`, but found {:?}"#,
+ e
+ ),
+ }
+ }
+
+ /// In those tests sequence should be deserialized from the XML
+ /// with additional elements that is not defined in the struct.
+ /// That fields should be skipped during deserialization
+ mod unknown_items {
+ use super::*;
+ use pretty_assertions::assert_eq;
+
+ #[test]
+ fn before() {
+ let data: List = from_str(
+ r#"
+
+
+
+
+
+
+ "#,
+ )
+ .unwrap();
+
+ assert_eq!(
+ data,
+ List {
+ item: vec![(), (), ()],
+ }
+ );
+ }
+
+ #[test]
+ fn after() {
+ let data: List = from_str(
+ r#"
+
+
+
+
+
+
+ "#,
+ )
+ .unwrap();
+
+ assert_eq!(
+ data,
+ List {
+ item: vec![(), (), ()],
+ }
+ );
+ }
+
+ #[test]
+ fn overlapped() {
+ let data = from_str::(
+ r#"
+
+
+
+
+
+
+ "#,
+ );
+
+ #[cfg(feature = "overlapped-lists")]
+ assert_eq!(
+ data.unwrap(),
+ List {
+ item: vec![(), (), ()],
+ }
+ );
+
+ #[cfg(not(feature = "overlapped-lists"))]
+ match data {
+ Err(DeError::Custom(e)) => assert_eq!(e, "duplicate field `item`"),
+ e => panic!(
+ r#"Expected Err(Custom("duplicate field `item`")), got {:?}"#,
+ e
+ ),
+ }
+ }
+ }
+
+ /// In those tests non-sequential field is defined in the struct
+ /// before sequential, so it will be deserialized before the list.
+ /// That struct should be deserialized from the XML where these
+ /// fields comes in an arbitrary order
+ mod field_before_list {
+ use super::*;
+ use pretty_assertions::assert_eq;
+
+ #[derive(Debug, PartialEq, Default, Deserialize)]
+ struct Root {
+ node: (),
+ item: Vec<()>,
+ }
+
+ #[test]
+ fn before() {
+ let data: Root = from_str(
+ r#"
+
+
+
+
+
+
+ "#,
+ )
+ .unwrap();
+
+ assert_eq!(
+ data,
+ Root {
+ node: (),
+ item: vec![(), (), ()],
+ }
+ );
+ }
+
+ #[test]
+ fn after() {
+ let data: Root = from_str(
+ r#"
+
+
+
+
+
+
+ "#,
+ )
+ .unwrap();
+
+ assert_eq!(
+ data,
+ Root {
+ node: (),
+ item: vec![(), (), ()],
+ }
+ );
+ }
+
+ #[test]
+ fn overlapped() {
+ let data = from_str::(
+ r#"
+
+
+
+
+
+
+ "#,
+ );
+
+ #[cfg(feature = "overlapped-lists")]
+ assert_eq!(
+ data.unwrap(),
+ Root {
+ node: (),
+ item: vec![(), (), ()],
+ }
+ );
+
+ #[cfg(not(feature = "overlapped-lists"))]
+ match data {
+ Err(DeError::Custom(e)) => {
+ assert_eq!(e, "duplicate field `item`")
+ }
+ e => panic!(
+ r#"Expected Err(Custom("duplicate field `item`")), got {:?}"#,
+ e
+ ),
+ }
+ }
+ }
+
+ /// In those tests non-sequential field is defined in the struct
+ /// after sequential, so it will be deserialized after the list.
+ /// That struct should be deserialized from the XML where these
+ /// fields comes in an arbitrary order
+ mod field_after_list {
+ use super::*;
+ use pretty_assertions::assert_eq;
+
+ #[derive(Debug, PartialEq, Default, Deserialize)]
+ struct Root {
+ item: Vec<()>,
+ node: (),
+ }
+
+ #[test]
+ fn before() {
+ let data: Root = from_str(
+ r#"
+
+
+
+
+
+
+ "#,
+ )
+ .unwrap();
+
+ assert_eq!(
+ data,
+ Root {
+ item: vec![(), (), ()],
+ node: (),
+ }
+ );
+ }
+
+ #[test]
+ fn after() {
+ let data: Root = from_str(
+ r#"
+
+
+
+
+
+
+ "#,
+ )
+ .unwrap();
+
+ assert_eq!(
+ data,
+ Root {
+ item: vec![(), (), ()],
+ node: (),
+ }
+ );
+ }
+
+ #[test]
+ fn overlapped() {
+ let data = from_str::(
+ r#"
+
+
+
+
+
+
+ "#,
+ );
+
+ #[cfg(feature = "overlapped-lists")]
+ assert_eq!(
+ data.unwrap(),
+ Root {
+ item: vec![(), (), ()],
+ node: (),
+ }
+ );
+
+ #[cfg(not(feature = "overlapped-lists"))]
+ match data {
+ Err(DeError::Custom(e)) => {
+ assert_eq!(e, "duplicate field `item`")
+ }
+ e => panic!(
+ r#"Expected Err(Custom("duplicate field `item`")), got {:?}"#,
+ e
+ ),
+ }
+ }
+ }
+
+ /// In those tests two lists are deserialized simultaneously.
+ /// Lists should be deserialized even when them overlaps
+ mod two_lists {
+ use super::*;
+ use pretty_assertions::assert_eq;
+
+ #[derive(Debug, PartialEq, Deserialize)]
+ struct Pair {
+ item: Vec<()>,
+ element: Vec<()>,
+ }
+
+ #[test]
+ fn splitted() {
+ let data: Pair = from_str(
+ r#"
+
+
+
+
+
+
+
+ "#,
+ )
+ .unwrap();
+
+ assert_eq!(
+ data,
+ Pair {
+ item: vec![(), (), ()],
+ element: vec![(), ()],
+ }
+ );
+ }
+
+ #[test]
+ fn overlapped() {
+ let data = from_str::(
+ r#"
+
+
+
+
+
+
+
+ "#,
+ );
+
+ #[cfg(feature = "overlapped-lists")]
+ assert_eq!(
+ data.unwrap(),
+ Pair {
+ item: vec![(), (), ()],
+ element: vec![(), ()],
+ }
+ );
+
+ #[cfg(not(feature = "overlapped-lists"))]
+ match data {
+ Err(DeError::Custom(e)) => assert_eq!(e, "duplicate field `item`"),
+ e => panic!(
+ r#"Expected Err(Custom("duplicate field `item`")), got {:?}"#,
+ e
+ ),
+ }
+ }
+ }
+
+ /// Deserialization of primitives slightly differs from deserialization
+ /// of complex types, so need to check this separately
+ #[test]
+ fn primitives() {
+ #[derive(Debug, PartialEq, Deserialize)]
+ struct List {
+ item: Vec,
+ }
+
+ let data: List = from_str(
+ r#"
+
+ - 41
+ - 42
+ - 43
+
+ "#,
+ )
+ .unwrap();
+
+ assert_eq!(
+ data,
+ List {
+ item: vec![41, 42, 43],
+ }
+ );
+
+ from_str::(
+ r#"
+
+ - 41
+ - 42
+ - 43
+
+ "#,
+ )
+ .unwrap_err();
+ }
+ }
+ }
+
+ /// Check that sequences inside element can be deserialized.
+ /// In terms of serde this is a sequence flatten into the struct:
+ ///
+ /// ```ignore
+ /// struct Root {
+ /// #[serde(flatten)]
+ /// items: Vec,
+ /// }
+ /// ```
+ /// except that fact that this is not supported nowadays
+ /// (https://github.com/serde-rs/serde/issues/1905)
+ ///
+ /// Because this is very frequently used pattern in the XML, quick-xml
+ /// have a workaround for this. If a field will have a special name `$value`
+ /// then any `xs:element`s in the `xs:sequence` / `xs:all`, except that
+ /// which name matches the struct name, will be associated with this field:
+ ///
+ /// ```ignore
+ /// struct Root {
+ /// field: U,
+ /// #[serde(rename = "$value")]
+ /// items: Vec,
+ /// }
+ /// ```
+ /// In this example `` tag will be associated with a `field` field,
+ /// but all other tags will be associated with an `items` field. Disadvantages
+ /// of this approach that you can have only one field, but usually you don't
+ /// want more
+ mod variable_name {
+ use super::*;
+ use serde::de::{Deserializer, EnumAccess, VariantAccess, Visitor};
+ use std::fmt::{self, Formatter};
+
+ // NOTE: Derive could be possible once https://github.com/serde-rs/serde/issues/2126 is resolved
+ macro_rules! impl_deserialize_choice {
+ ($name:ident : $(($field:ident, $field_name:literal)),*) => {
+ impl<'de> Deserialize<'de> for $name {
+ fn deserialize(deserializer: D) -> Result
+ where
+ D: Deserializer<'de>,
+ {
+ #[derive(Deserialize)]
+ #[serde(field_identifier)]
+ #[serde(rename_all = "kebab-case")]
+ enum Tag {
+ $($field,)*
+ Other(String),
+ }
+
+ struct EnumVisitor;
+ impl<'de> Visitor<'de> for EnumVisitor {
+ type Value = $name;
+
+ fn expecting(&self, f: &mut Formatter) -> fmt::Result {
+ f.write_str("enum ")?;
+ f.write_str(stringify!($name))
+ }
+
+ fn visit_enum(self, data: A) -> Result
+ where
+ A: EnumAccess<'de>,
+ {
+ match data.variant()? {
+ $(
+ (Tag::$field, variant) => variant.unit_variant().map(|_| $name::$field),
+ )*
+ (Tag::Other(t), v) => v.unit_variant().map(|_| $name::Other(t)),
+ }
+ }
+ }
+
+ const VARIANTS: &'static [&'static str] = &[
+ $($field_name,)*
+ ""
+ ];
+ deserializer.deserialize_enum(stringify!($name), VARIANTS, EnumVisitor)
+ }
+ }
+ };
+ }
+
+ /// Type that can be deserialized from ``, ``, or any other element
+ #[derive(Debug, PartialEq)]
+ enum Choice {
+ One,
+ Two,
+ /// Any other tag name except `One` or `Two`, name of tag stored inside variant
+ Other(String),
+ }
+ impl_deserialize_choice!(Choice: (One, "one"), (Two, "two"));
+
+ /// Type that can be deserialized from ``, ``, or any other element
+ #[derive(Debug, PartialEq)]
+ enum Choice2 {
+ First,
+ Second,
+ /// Any other tag name except `First` or `Second`, name of tag stored inside variant
+ Other(String),
+ }
+ impl_deserialize_choice!(Choice2: (First, "first"), (Second, "second"));
+
+ /// Type that can be deserialized from ``, ``, or any other element.
+ /// Used for `primitives` tests
+ #[derive(Debug, PartialEq, Deserialize)]
+ #[serde(rename_all = "kebab-case")]
+ enum Choice3 {
+ One(usize),
+ Two(String),
+ #[serde(other)]
+ Other,
+ }
+
+ /// This module contains tests where size of the list have a compile-time size
+ mod fixed_size {
+ use super::*;
+ use pretty_assertions::assert_eq;
+
+ #[derive(Debug, PartialEq, Deserialize)]
+ struct List {
+ #[serde(rename = "$value")]
+ item: [Choice; 3],
+ }
+
+ /// Simple case: count of elements matches expected size of sequence,
+ /// each element has the same name. Successful deserialization expected
+ #[test]
+ fn simple() {
+ let data: List = from_str(
+ r#"
+
+
+
+
+
+ "#,
+ )
+ .unwrap();
+
+ assert_eq!(
+ data,
+ List {
+ item: [Choice::One, Choice::Two, Choice::Other("three".into())],
+ }
+ );
+ }
+
+ /// Special case: empty sequence
+ #[test]
+ #[ignore = "it is impossible to distinguish between missed field and empty list: use `Option<>` or #[serde(default)]"]
+ fn empty() {
+ #[derive(Debug, PartialEq, Deserialize)]
+ struct List {
+ #[serde(rename = "$value")]
+ item: [Choice; 0],
+ }
+
+ from_str::(r#" "#).unwrap();
+ from_str::(r#" "#).unwrap();
+ }
+
+ /// Special case: one-element sequence
+ #[test]
+ fn one_element() {
+ #[derive(Debug, PartialEq, Deserialize)]
+ struct List {
+ #[serde(rename = "$value")]
+ item: [Choice; 1],
+ }
+
+ let data: List = from_str(
+ r#"
+
+
+
+ "#,
+ )
+ .unwrap();
+
+ assert_eq!(
+ data,
+ List {
+ item: [Choice::One],
+ }
+ );
+ }
+
+ /// Fever elements than expected size of sequence, each element has
+ /// the same name. Failure expected
+ #[test]
+ fn fever_elements() {
+ from_str::(
+ r#"
+
+
+
+
+ "#,
+ )
+ .unwrap_err();
+ }
+
+ /// More elements than expected size of sequence, each element has
+ /// the same name. Failure expected. If you wish to ignore excess
+ /// elements, use the special type, that consume as much elements
+ /// as possible, but ignores excess elements
+ #[test]
+ fn excess_elements() {
+ from_str::(
+ r#"
+
+
+
+
+
+
+ "#,
+ )
+ .unwrap_err();
+ }
+
+ #[test]
+ fn mixed_content() {
+ #[derive(Debug, PartialEq, Deserialize)]
+ struct List {
+ #[serde(rename = "$value")]
+ item: [(); 3],
+ }
+
+ from_str::(
+ r#"
+
+
+ text
+
+
+ "#,
+ )
+ .unwrap();
+ }
+
+ // There cannot be unknown items, because any tag name is accepted
+
+ /// In those tests non-sequential field is defined in the struct
+ /// before sequential, so it will be deserialized before the list.
+ /// That struct should be deserialized from the XML where these
+ /// fields comes in an arbitrary order
+ mod field_before_list {
+ use super::*;
+ use pretty_assertions::assert_eq;
+
+ #[derive(Debug, PartialEq, Deserialize)]
+ struct Root {
+ node: (),
+ #[serde(rename = "$value")]
+ item: [Choice; 3],
+ }
+
+ #[test]
+ fn before() {
+ let data: Root = from_str(
+ r#"
+
+
+
+
+
+
+ "#,
+ )
+ .unwrap();
+
+ assert_eq!(
+ data,
+ Root {
+ node: (),
+ item: [Choice::One, Choice::Two, Choice::Other("three".into())],
+ }
+ );
+ }
+
+ #[test]
+ fn after() {
+ let data: Root = from_str(
+ r#"
+
+
+
+
+
+
+ "#,
+ )
+ .unwrap();
+
+ assert_eq!(
+ data,
+ Root {
+ node: (),
+ item: [Choice::One, Choice::Two, Choice::Other("three".into())],
+ }
+ );
+ }
+
+ #[test]
+ fn overlapped() {
+ let data = from_str::(
+ r#"
+
+
+
+
+
+
+ "#,
+ );
+
+ #[cfg(feature = "overlapped-lists")]
+ assert_eq!(
+ data.unwrap(),
+ Root {
+ node: (),
+ item: [Choice::One, Choice::Two, Choice::Other("three".into())],
+ }
+ );
+
+ #[cfg(not(feature = "overlapped-lists"))]
+ match data {
+ Err(DeError::Custom(e)) => {
+ assert_eq!(e, "invalid length 1, expected an array of length 3")
+ }
+ e => panic!(
+ r#"Expected Err(Custom("invalid length 1, expected an array of length 3")), got {:?}"#,
+ e
+ ),
+ }
+ }
+ }
+
+ /// In those tests non-sequential field is defined in the struct
+ /// after sequential, so it will be deserialized after the list.
+ /// That struct should be deserialized from the XML where these
+ /// fields comes in an arbitrary order
+ mod field_after_list {
+ use super::*;
+ use pretty_assertions::assert_eq;
+
+ #[derive(Debug, PartialEq, Deserialize)]
+ struct Root {
+ #[serde(rename = "$value")]
+ item: [Choice; 3],
+ node: (),
+ }
+
+ #[test]
+ fn before() {
+ let data: Root = from_str(
+ r#"
+
+
+
+
+
+
+ "#,
+ )
+ .unwrap();
+
+ assert_eq!(
+ data,
+ Root {
+ item: [Choice::One, Choice::Two, Choice::Other("three".into())],
+ node: (),
+ }
+ );
+ }
+
+ #[test]
+ fn after() {
+ let data: Root = from_str(
+ r#"
+
+
+
+
+
+
+ "#,
+ )
+ .unwrap();
+
+ assert_eq!(
+ data,
+ Root {
+ item: [Choice::One, Choice::Two, Choice::Other("three".into())],
+ node: (),
+ }
+ );
+ }
+
+ #[test]
+ fn overlapped() {
+ let data = from_str::(
+ r#"
+
+
+
+
+
+
+ "#,
+ );
+
+ #[cfg(feature = "overlapped-lists")]
+ assert_eq!(
+ data.unwrap(),
+ Root {
+ item: [Choice::One, Choice::Two, Choice::Other("three".into())],
+ node: (),
+ }
+ );
+
+ #[cfg(not(feature = "overlapped-lists"))]
+ match data {
+ Err(DeError::Custom(e)) => {
+ assert_eq!(e, "invalid length 1, expected an array of length 3")
+ }
+ e => panic!(
+ r#"Expected Err(Custom("invalid length 1, expected an array of length 3")), got {:?}"#,
+ e
+ ),
+ }
+ }
+ }
+
+ /// In those tests two lists are deserialized simultaneously.
+ /// Lists should be deserialized even when them overlaps
+ mod two_lists {
+ use super::*;
+
+ /// A field with a variable-name items defined before a field with a fixed-name
+ /// items
+ mod choice_and_fixed {
+ use super::*;
+ use pretty_assertions::assert_eq;
+
+ #[derive(Debug, PartialEq, Deserialize)]
+ struct Pair {
+ #[serde(rename = "$value")]
+ item: [Choice; 3],
+ element: [(); 2],
+ }
+
+ /// A list with fixed-name elements located before a list with variable-name
+ /// elements in an XML
+ #[test]
+ fn fixed_before() {
+ let data: Pair = from_str(
+ r#"
+
+
+
+
+
+
+
+ "#,
+ )
+ .unwrap();
+
+ assert_eq!(
+ data,
+ Pair {
+ item: [Choice::One, Choice::Two, Choice::Other("three".into())],
+ element: [(), ()],
+ }
+ );
+ }
+
+ /// A list with fixed-name elements located after a list with variable-name
+ /// elements in an XML
+ #[test]
+ fn fixed_after() {
+ let data: Pair = from_str(
+ r#"
+
+
+
+
+
+
+
+ "#,
+ )
+ .unwrap();
+
+ assert_eq!(
+ data,
+ Pair {
+ item: [Choice::One, Choice::Two, Choice::Other("three".into())],
+ element: [(), ()],
+ }
+ );
+ }
+
+ /// A list with fixed-name elements are mixed with a list with variable-name
+ /// elements in an XML, and the first element is a fixed-name one
+ #[test]
+ fn overlapped_fixed_before() {
+ let data = from_str::(
+ r#"
+
+
+
+
+
+
+
+ "#,
+ );
+
+ #[cfg(feature = "overlapped-lists")]
+ assert_eq!(
+ data.unwrap(),
+ Pair {
+ item: [Choice::One, Choice::Two, Choice::Other("three".into())],
+ element: [(), ()],
+ }
+ );
+
+ #[cfg(not(feature = "overlapped-lists"))]
+ match data {
+ Err(DeError::Custom(e)) => {
+ assert_eq!(e, "invalid length 1, expected an array of length 2")
+ }
+ e => panic!(
+ r#"Expected Err(Custom("invalid length 1, expected an array of length 2")), got {:?}"#,
+ e
+ ),
+ }
+ }
+
+ /// A list with fixed-name elements are mixed with a list with variable-name
+ /// elements in an XML, and the first element is a variable-name one
+ #[test]
+ fn overlapped_fixed_after() {
+ let data = from_str::(
+ r#"
+
+
+
+
+
+
+
+ "#,
+ );
+
+ #[cfg(feature = "overlapped-lists")]
+ assert_eq!(
+ data.unwrap(),
+ Pair {
+ item: [Choice::One, Choice::Two, Choice::Other("three".into())],
+ element: [(), ()],
+ }
+ );
+
+ #[cfg(not(feature = "overlapped-lists"))]
+ match data {
+ Err(DeError::Custom(e)) => {
+ assert_eq!(e, "invalid length 1, expected an array of length 3")
+ }
+ e => panic!(
+ r#"Expected Err(Custom("invalid length 1, expected an array of length 3")), got {:?}"#,
+ e
+ ),
+ }
+ }
+ }
+
+ /// A field with a variable-name items defined after a field with a fixed-name
+ /// items
+ mod fixed_and_choice {
+ use super::*;
+ use pretty_assertions::assert_eq;
+
+ #[derive(Debug, PartialEq, Deserialize)]
+ struct Pair {
+ element: [(); 2],
+ #[serde(rename = "$value")]
+ item: [Choice; 3],
+ }
+
+ /// A list with fixed-name elements located before a list with variable-name
+ /// elements in an XML
+ #[test]
+ fn fixed_before() {
+ let data: Pair = from_str(
+ r#"
+
+
+
+
+
+
+
+ "#,
+ )
+ .unwrap();
+
+ assert_eq!(
+ data,
+ Pair {
+ item: [Choice::One, Choice::Two, Choice::Other("three".into())],
+ element: [(), ()],
+ }
+ );
+ }
+
+ /// A list with fixed-name elements located after a list with variable-name
+ /// elements in an XML
+ #[test]
+ fn fixed_after() {
+ let data: Pair = from_str(
+ r#"
+
+
+
+
+
+
+
+ "#,
+ )
+ .unwrap();
+
+ assert_eq!(
+ data,
+ Pair {
+ item: [Choice::One, Choice::Two, Choice::Other("three".into())],
+ element: [(), ()],
+ }
+ );
+ }
+
+ /// A list with fixed-name elements are mixed with a list with variable-name
+ /// elements in an XML, and the first element is a fixed-name one
+ #[test]
+ fn overlapped_fixed_before() {
+ let data = from_str::(
+ r#"
+
+
+
+
+
+
+
+ "#,
+ );
+
+ #[cfg(feature = "overlapped-lists")]
+ assert_eq!(
+ data.unwrap(),
+ Pair {
+ item: [Choice::One, Choice::Two, Choice::Other("three".into())],
+ element: [(), ()],
+ }
+ );
+
+ #[cfg(not(feature = "overlapped-lists"))]
+ match data {
+ Err(DeError::Custom(e)) => {
+ assert_eq!(e, "invalid length 1, expected an array of length 2")
+ }
+ e => panic!(
+ r#"Expected Err(Custom("invalid length 1, expected an array of length 2")), got {:?}"#,
+ e
+ ),
+ }
+ }
+
+ /// A list with fixed-name elements are mixed with a list with variable-name
+ /// elements in an XML, and the first element is a variable-name one
+ #[test]
+ fn overlapped_fixed_after() {
+ let data = from_str::(
+ r#"
+
+
+
+
+
+
+
+ "#,
+ );
+
+ #[cfg(feature = "overlapped-lists")]
+ assert_eq!(
+ data.unwrap(),
+ Pair {
+ item: [Choice::One, Choice::Two, Choice::Other("three".into())],
+ element: [(), ()],
+ }
+ );
+
+ #[cfg(not(feature = "overlapped-lists"))]
+ match data {
+ Err(DeError::Custom(e)) => {
+ assert_eq!(e, "invalid length 1, expected an array of length 3")
+ }
+ e => panic!(
+ r#"Expected Err(Custom("invalid length 1, expected an array of length 3")), got {:?}"#,
+ e
+ ),
+ }
+ }
+ }
+
+ /// Tests are ignored, but exists to show a problem.
+ /// May be it will be solved in the future
+ mod choice_and_choice {
+ use super::*;
+ use pretty_assertions::assert_eq;
+
+ #[derive(Debug, PartialEq, Deserialize)]
+ struct Pair {
+ #[serde(rename = "$value")]
+ item: [Choice; 3],
+ // Actually, we cannot rename both fields to `$value`, which is now
+ // required to indicate, that field accepts elements with any name
+ #[serde(rename = "$value")]
+ element: [Choice2; 2],
+ }
+
+ #[test]
+ #[ignore = "There is no way to associate XML elements with `item` or `element` without extra knowledge from type"]
+ fn splitted() {
+ let data: Pair = from_str(
+ r#"
+
+
+
+
+
+
+
+ "#,
+ )
+ .unwrap();
+
+ assert_eq!(
+ data,
+ Pair {
+ item: [Choice::One, Choice::Two, Choice::Other("three".into())],
+ element: [Choice2::First, Choice2::Second],
+ }
+ );
+ }
+
+ #[test]
+ #[ignore = "There is no way to associate XML elements with `item` or `element` without extra knowledge from type"]
+ fn overlapped() {
+ let data = from_str::(
+ r#"
+
+
+
+
+
+
+
+ "#,
+ );
+
+ #[cfg(feature = "overlapped-lists")]
+ assert_eq!(
+ data.unwrap(),
+ Pair {
+ item: [Choice::One, Choice::Two, Choice::Other("three".into())],
+ element: [Choice2::First, Choice2::Second],
+ }
+ );
+
+ #[cfg(not(feature = "overlapped-lists"))]
+ match data {
+ Err(DeError::Custom(e)) => {
+ assert_eq!(e, "invalid length 1, expected an array of length 3")
+ }
+ e => panic!(
+ r#"Expected Err(Custom("invalid length 1, expected an array of length 3")), got {:?}"#,
+ e
+ ),
+ }
+ }
+ }
+ }
+
+ /// Deserialization of primitives slightly differs from deserialization
+ /// of complex types, so need to check this separately
+ #[test]
+ fn primitives() {
+ #[derive(Debug, PartialEq, Deserialize)]
+ struct List {
+ #[serde(rename = "$value")]
+ item: [Choice3; 3],
+ }
+
+ let data: List = from_str(
+ r#"
+
+ 41
+ 42
+ 43
+
+ "#,
+ )
+ .unwrap();
+
+ assert_eq!(
+ data,
+ List {
+ item: [
+ Choice3::One(41),
+ Choice3::Two("42".to_string()),
+ Choice3::Other,
+ ],
+ }
+ );
+
+ from_str::(
+ r#"
+
+ 41
+ - 42
+ 43
+
+ "#,
+ )
+ .unwrap_err();
+ }
+ }
+
+ /// This module contains tests where size of the list have an unspecified size
+ mod variable_size {
+ use super::*;
+ use pretty_assertions::assert_eq;
+
+ #[derive(Debug, PartialEq, Deserialize)]
+ struct List {
+ #[serde(rename = "$value")]
+ item: Vec,
+ }
+
+ /// Simple case: count of elements matches expected size of sequence,
+ /// each element has the same name. Successful deserialization expected
+ #[test]
+ fn simple() {
+ let data: List = from_str(
+ r#"
+
+
+
+
+
+ "#,
+ )
+ .unwrap();
+
+ assert_eq!(
+ data,
+ List {
+ item: vec![Choice::One, Choice::Two, Choice::Other("three".into())],
+ }
+ );
+ }
+
+ /// Special case: empty sequence
+ #[test]
+ #[ignore = "it is impossible to distinguish between missed field and empty list: use `Option<>` or #[serde(default)]"]
+ fn empty() {
+ let data = from_str::(r#" "#).unwrap();
+ assert_eq!(data, List { item: vec![] });
+
+ let data = from_str::(r#" "#).unwrap();
+ assert_eq!(data, List { item: vec![] });
+ }
+
+ /// Special case: one-element sequence
+ #[test]
+ fn one_element() {
+ let data: List = from_str(
+ r#"
+
+
+
+ "#,
+ )
+ .unwrap();
+
+ assert_eq!(
+ data,
+ List {
+ item: vec![Choice::One],
+ }
+ );
+ }
+
+ #[test]
+ fn mixed_content() {
+ #[derive(Debug, PartialEq, Deserialize)]
+ struct List {
+ #[serde(rename = "$value")]
+ item: Vec<()>,
+ }
+
+ let data: List = from_str(
+ r#"
+
+
+ text
+
+
+ "#,
+ )
+ .unwrap();
+
+ assert_eq!(
+ data,
+ List {
+ item: vec![(), (), ()],
+ }
+ );
+ }
+
+ // There cannot be unknown items, because any tag name is accepted
+
+ /// In those tests non-sequential field is defined in the struct
+ /// before sequential, so it will be deserialized before the list.
+ /// That struct should be deserialized from the XML where these
+ /// fields comes in an arbitrary order
+ mod field_before_list {
+ use super::*;
+ use pretty_assertions::assert_eq;
+
+ #[derive(Debug, PartialEq, Deserialize)]
+ struct Root {
+ node: (),
+ #[serde(rename = "$value")]
+ item: Vec,
+ }
+
+ #[test]
+ fn before() {
+ let data: Root = from_str(
+ r#"
+
+
+
+
+
+
+ "#,
+ )
+ .unwrap();
+
+ assert_eq!(
+ data,
+ Root {
+ node: (),
+ item: vec![Choice::One, Choice::Two, Choice::Other("three".into())],
+ }
+ );
+ }
+
+ #[test]
+ fn after() {
+ let data: Root = from_str(
+ r#"
+
+
+
+
+
+
+ "#,
+ )
+ .unwrap();
+
+ assert_eq!(
+ data,
+ Root {
+ node: (),
+ item: vec![Choice::One, Choice::Two, Choice::Other("three".into())],
+ }
+ );
+ }
+
+ #[test]
+ fn overlapped() {
+ let data = from_str::(
+ r#"
+
+
+
+
+
+
+ "#,
+ );
+
+ #[cfg(feature = "overlapped-lists")]
+ assert_eq!(
+ data.unwrap(),
+ Root {
+ node: (),
+ item: vec![Choice::One, Choice::Two, Choice::Other("three".into())],
+ }
+ );
+
+ #[cfg(not(feature = "overlapped-lists"))]
+ match data {
+ Err(DeError::Custom(e)) => assert_eq!(e, "duplicate field `$value`"),
+ e => panic!(
+ r#"Expected Err(Custom("duplicate field `$value`")), got {:?}"#,
+ e
+ ),
+ }
+ }
+ }
+
+ /// In those tests non-sequential field is defined in the struct
+ /// after sequential, so it will be deserialized after the list.
+ /// That struct should be deserialized from the XML where these
+ /// fields comes in an arbitrary order
+ mod field_after_list {
+ use super::*;
+ use pretty_assertions::assert_eq;
+
+ #[derive(Debug, PartialEq, Deserialize)]
+ struct Root {
+ #[serde(rename = "$value")]
+ item: Vec,
+ node: (),
+ }
+
+ #[test]
+ fn before() {
+ let data: Root = from_str(
+ r#"
+
+
+
+
+
+
+ "#,
+ )
+ .unwrap();
+
+ assert_eq!(
+ data,
+ Root {
+ item: vec![Choice::One, Choice::Two, Choice::Other("three".into())],
+ node: (),
+ }
+ );
+ }
+
+ #[test]
+ fn after() {
+ let data: Root = from_str(
+ r#"
+
+
+
+
+
+
+ "#,
+ )
+ .unwrap();
+
+ assert_eq!(
+ data,
+ Root {
+ item: vec![Choice::One, Choice::Two, Choice::Other("three".into())],
+ node: (),
+ }
+ );
+ }
+
+ #[test]
+ fn overlapped() {
+ let data = from_str::(
+ r#"
+
+
+
+
+
+
+ "#,
+ );
+
+ #[cfg(feature = "overlapped-lists")]
+ assert_eq!(
+ data.unwrap(),
+ Root {
+ item: vec![Choice::One, Choice::Two, Choice::Other("three".into())],
+ node: (),
+ }
+ );
+
+ #[cfg(not(feature = "overlapped-lists"))]
+ match data {
+ Err(DeError::Custom(e)) => assert_eq!(e, "duplicate field `$value`"),
+ e => panic!(
+ r#"Expected Err(Custom("duplicate field `$value`")), got {:?}"#,
+ e
+ ),
+ }
+ }
+ }
+
+ /// In those tests two lists are deserialized simultaneously.
+ /// Lists should be deserialized even when them overlaps
+ mod two_lists {
+ use super::*;
+
+ /// A field with a variable-name items defined before a field with a fixed-name
+ /// items
+ mod choice_and_fixed {
+ use super::*;
+ use pretty_assertions::assert_eq;
+
+ #[derive(Debug, PartialEq, Deserialize)]
+ struct Pair {
+ #[serde(rename = "$value")]
+ item: Vec,
+ element: Vec<()>,
+ }
+
+ /// A list with fixed-name elements located before a list with variable-name
+ /// elements in an XML
+ #[test]
+ fn fixed_before() {
+ let data: Pair = from_str(
+ r#"
+
+
+
+
+
+
+
+ "#,
+ )
+ .unwrap();
+
+ assert_eq!(
+ data,
+ Pair {
+ item: vec![Choice::One, Choice::Two, Choice::Other("three".into())],
+ element: vec![(), ()],
+ }
+ );
+ }
+
+ /// A list with fixed-name elements located after a list with variable-name
+ /// elements in an XML
+ #[test]
+ fn fixed_after() {
+ let data: Pair = from_str(
+ r#"
+
+
+
+
+
+
+
+ "#,
+ )
+ .unwrap();
+
+ assert_eq!(
+ data,
+ Pair {
+ item: vec![Choice::One, Choice::Two, Choice::Other("three".into())],
+ element: vec![(), ()],
+ }
+ );
+ }
+
+ /// A list with fixed-name elements are mixed with a list with variable-name
+ /// elements in an XML, and the first element is a fixed-name one
+ #[test]
+ fn overlapped_fixed_before() {
+ let data = from_str::(
+ r#"
+
+
+
+
+
+
+
+ "#,
+ );
+
+ #[cfg(feature = "overlapped-lists")]
+ assert_eq!(
+ data.unwrap(),
+ Pair {
+ item: vec![Choice::One, Choice::Two, Choice::Other("three".into())],
+ element: vec![(), ()],
+ }
+ );
+
+ #[cfg(not(feature = "overlapped-lists"))]
+ match data {
+ Err(DeError::Custom(e)) => assert_eq!(e, "duplicate field `element`"),
+ e => panic!(
+ r#"Expected Err(Custom("duplicate field `element`")), got {:?}"#,
+ e
+ ),
+ }
+ }
+
+ /// A list with fixed-name elements are mixed with a list with variable-name
+ /// elements in an XML, and the first element is a variable-name one
+ #[test]
+ fn overlapped_fixed_after() {
+ let data = from_str::(
+ r#"
+
+
+
+
+
+
+
+ "#,
+ );
+
+ #[cfg(feature = "overlapped-lists")]
+ assert_eq!(
+ data.unwrap(),
+ Pair {
+ item: vec![Choice::One, Choice::Two, Choice::Other("three".into())],
+ element: vec![(), ()],
+ }
+ );
+
+ #[cfg(not(feature = "overlapped-lists"))]
+ match data {
+ Err(DeError::Custom(e)) => assert_eq!(e, "duplicate field `$value`"),
+ e => panic!(
+ r#"Expected Err(Custom("duplicate field `$value`")), got {:?}"#,
+ e
+ ),
+ }
+ }
+ }
+
+ /// A field with a variable-name items defined after a field with a fixed-name
+ /// items
+ mod fixed_and_choice {
+ use super::*;
+ use pretty_assertions::assert_eq;
+
+ #[derive(Debug, PartialEq, Deserialize)]
+ struct Pair {
+ element: Vec<()>,
+ #[serde(rename = "$value")]
+ item: Vec,
+ }
+
+ /// A list with fixed-name elements located before a list with variable-name
+ /// elements in an XML
+ #[test]
+ fn fixed_before() {
+ let data: Pair = from_str(
+ r#"
+
+
+
+
+
+
+
+ "#,
+ )
+ .unwrap();
+
+ assert_eq!(
+ data,
+ Pair {
+ element: vec![(), ()],
+ item: vec![Choice::One, Choice::Two, Choice::Other("three".into())],
+ }
+ );
+ }
+
+ /// A list with fixed-name elements located after a list with variable-name
+ /// elements in an XML
+ #[test]
+ fn fixed_after() {
+ let data: Pair = from_str(
+ r#"
+
+
+
+
+
+
+
+ "#,
+ )
+ .unwrap();
+
+ assert_eq!(
+ data,
+ Pair {
+ element: vec![(), ()],
+ item: vec![Choice::One, Choice::Two, Choice::Other("three".into())],
+ }
+ );
+ }
+
+ /// A list with fixed-name elements are mixed with a list with variable-name
+ /// elements in an XML, and the first element is a fixed-name one
+ #[test]
+ fn overlapped_fixed_before() {
+ let data = from_str::(
+ r#"
+
+
+
+
+
+
+
+ "#,
+ );
+
+ #[cfg(feature = "overlapped-lists")]
+ assert_eq!(
+ data.unwrap(),
+ Pair {
+ element: vec![(), ()],
+ item: vec![Choice::One, Choice::Two, Choice::Other("three".into())],
+ }
+ );
+
+ #[cfg(not(feature = "overlapped-lists"))]
+ match data {
+ Err(DeError::Custom(e)) => assert_eq!(e, "duplicate field `element`"),
+ e => panic!(
+ r#"Expected Err(Custom("duplicate field `element`")), got {:?}"#,
+ e
+ ),
+ }
+ }
+
+ /// A list with fixed-name elements are mixed with a list with variable-name
+ /// elements in an XML, and the first element is a variable-name one
+ #[test]
+ fn overlapped_fixed_after() {
+ let data = from_str::(
+ r#"
+
+
+
+
+
+
+
+ "#,
+ );
+
+ #[cfg(feature = "overlapped-lists")]
+ assert_eq!(
+ data.unwrap(),
+ Pair {
+ element: vec![(), ()],
+ item: vec![Choice::One, Choice::Two, Choice::Other("three".into())],
+ }
+ );
+
+ #[cfg(not(feature = "overlapped-lists"))]
+ match data {
+ Err(DeError::Custom(e)) => assert_eq!(e, "duplicate field `$value`"),
+ e => panic!(
+ r#"Expected Err(Custom("duplicate field `$value`")), got {:?}"#,
+ e
+ ),
+ }
+ }
+ }
+
+ /// Tests are ignored, but exists to show a problem.
+ /// May be it will be solved in the future
+ mod choice_and_choice {
+ use super::*;
+ use pretty_assertions::assert_eq;
+
+ #[derive(Debug, PartialEq, Deserialize)]
+ struct Pair {
+ #[serde(rename = "$value")]
+ item: Vec,
+ // Actually, we cannot rename both fields to `$value`, which is now
+ // required to indicate, that field accepts elements with any name
+ #[serde(rename = "$value")]
+ element: Vec,
+ }
+
+ #[test]
+ #[ignore = "There is no way to associate XML elements with `item` or `element` without extra knowledge from type"]
+ fn splitted() {
+ let data: Pair = from_str(
+ r#"
+
+
+
+
+
+
+
+ "#,
+ )
+ .unwrap();
+
+ assert_eq!(
+ data,
+ Pair {
+ item: vec![Choice::One, Choice::Two, Choice::Other("three".into())],
+ element: vec![Choice2::First, Choice2::Second],
+ }
+ );
+ }
+
+ #[test]
+ #[ignore = "There is no way to associate XML elements with `item` or `element` without extra knowledge from type"]
+ fn overlapped() {
+ let data = from_str::(
+ r#"
+
+
+
+
+
+
+
+ "#,
+ );
+
+ #[cfg(feature = "overlapped-lists")]
+ assert_eq!(
+ data.unwrap(),
+ Pair {
+ item: vec![Choice::One, Choice::Two, Choice::Other("three".into())],
+ element: vec![Choice2::First, Choice2::Second],
+ }
+ );
+
+ #[cfg(not(feature = "overlapped-lists"))]
+ match data {
+ Err(DeError::Custom(e)) => {
+ assert_eq!(e, "invalid length 1, expected an array of length 3")
+ }
+ e => panic!(
+ r#"Expected Err(Custom("invalid length 1, expected an array of length 3")), got {:?}"#,
+ e
+ ),
+ }
+ }
+ }
+ }
+
+ /// Deserialization of primitives slightly differs from deserialization
+ /// of complex types, so need to check this separately
+ #[test]
+ fn primitives() {
+ #[derive(Debug, PartialEq, Deserialize)]
+ struct List {
+ #[serde(rename = "$value")]
+ item: Vec,
+ }
+
+ let data: List = from_str(
+ r#"
+
+ 41
+ 42
+ 43
+
+ "#,
+ )
+ .unwrap();
+
+ assert_eq!(
+ data,
+ List {
+ item: vec![
+ Choice3::One(41),
+ Choice3::Two("42".to_string()),
+ Choice3::Other,
+ ],
+ }
+ );
+
+ from_str::(
+ r#"
+
+ 41
+ - 42
+ 43
+
+ "#,
+ )
+ .unwrap_err();
+ }
+ }
+ }
+}
+
macro_rules! maplike_errors {
($type:ty) => {
mod non_closed {