Skip to content

Commit

Permalink
Merge pull request #393 from Mingun/qname
Browse files Browse the repository at this point in the history
Introduce typified wrappers for names and fix couple of bugs
  • Loading branch information
Mingun committed Jun 13, 2022
2 parents 0a42987 + bac96d1 commit 711525e
Show file tree
Hide file tree
Showing 20 changed files with 1,202 additions and 507 deletions.
16 changes: 16 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@

- [#387]: Allow overlapping between elements of sequence and other elements
(using new feature `overlapped-lists`)
- [#393]: New module `name` with `QName`, `LocalName`, `Namespace`, `Prefix`
and `PrefixDeclaration` wrappers around byte arrays and `ResolveResult` with
the result of namespace resolution

### Bug Fixes

Expand All @@ -23,6 +26,9 @@
- [#387]: Internal deserializer state can be broken when deserializing a map with
a sequence field (such as `Vec<T>`), where elements of this sequence contains
another sequence. This error affects only users with the `serialize` feature enabled
- [#393]: Now `event_namespace`, `attribute_namespace` and `read_event_namespaced`
returns `ResolveResult::Unknown` if prefix was not registered in namespace buffer
- [#393]: Fix breaking processing after encounter an attribute with a reserved name (started with "xmlns")

### Misc Changes

Expand All @@ -42,15 +48,25 @@

- [#391]: Added code coverage

- [#393]: `event_namespace` and `attribute_namespace` now accept `QName`
and returns `ResolveResult` and `LocalName`, `read_event_namespaced` now
returns `ResolveResult` instead of `Option<[u8]>`
- [#393]: Types of `Attribute::key` and `Attr::key()` changed to `QName`
- [#393]: Now `BytesStart::name()` and `BytesEnd::name()` returns `QName`, and
`BytesStart::local_name()` and `BytesEnd::local_name()` returns `LocalName`

### New Tests

- [#9]: Added tests for incorrect nested tags in input
- [#387]: Added a bunch of tests for sequences deserialization
- [#393]: Added more tests for namespace resolver
- [#393]: Added tests for reserved names (started with "xml"i) -- see <https://www.w3.org/TR/xml-names11/#xmlReserved>

[#8]: https://github.com/Mingun/fast-xml/pull/8
[#9]: https://github.com/Mingun/fast-xml/pull/9
[#387]: https://github.com/tafia/quick-xml/pull/387
[#391]: https://github.com/tafia/quick-xml/pull/391
[#393]: https://github.com/tafia/quick-xml/pull/393

## 0.23.0 -- 2022-05-08

Expand Down
2 changes: 1 addition & 1 deletion examples/custom_entities.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
custom_entities.insert(cap[1].to_vec(), cap[2].to_vec());
}
}
Ok(Event::Start(ref e)) => match e.name() {
Ok(Event::Start(ref e)) => match e.name().as_ref() {
b"test" => println!(
"attributes values: {:?}",
e.attributes()
Expand Down
19 changes: 12 additions & 7 deletions examples/issue68.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
#![allow(unused)]

use quick_xml::events::Event;
use quick_xml::name::Namespace;
use quick_xml::Reader;
use std::convert::TryFrom;
use std::io::Read;

struct Resource {
Expand Down Expand Up @@ -81,9 +83,11 @@ fn parse_report(xml_data: &str) -> Vec<Resource> {

loop {
match reader.read_namespaced_event(&mut buf, &mut ns_buffer) {
Ok((namespace_value, Event::Start(e))) => {
let namespace_value = namespace_value.unwrap_or_default();
match (depth, state, namespace_value, e.local_name()) {
Ok((ns, Event::Start(e))) => {
let ns = Option::<Namespace>::try_from(ns)
.unwrap_or_default() // Treat unknown prefixes as not bound to any namespace
.unwrap_or(Namespace(b""));
match (depth, state, ns.as_ref(), e.local_name().as_ref()) {
(0, State::Root, b"DAV:", b"multistatus") => state = State::MultiStatus,
(1, State::MultiStatus, b"DAV:", b"response") => {
state = State::Response;
Expand All @@ -96,10 +100,11 @@ fn parse_report(xml_data: &str) -> Vec<Resource> {
}
depth += 1;
}
Ok((namespace_value, Event::End(e))) => {
let namespace_value = namespace_value.unwrap_or_default();
let local_name = e.local_name();
match (depth, state, &*namespace_value, local_name) {
Ok((ns, Event::End(e))) => {
let ns = Option::<Namespace>::try_from(ns)
.unwrap_or_default() // Treat unknown prefixes as not bound to any namespace
.unwrap_or(Namespace(b""));
match (depth, state, ns.as_ref(), e.local_name().as_ref()) {
(1, State::MultiStatus, b"DAV:", b"multistatus") => state = State::Root,
(2, State::MultiStatus, b"DAV:", b"multistatus") => state = State::MultiStatus,
_ => {}
Expand Down
12 changes: 7 additions & 5 deletions examples/nested_readers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ fn main() -> Result<(), quick_xml::Error> {
let mut found_tables = Vec::new();
loop {
match reader.read_event(&mut buf)? {
Event::Start(element) => match element.name() {
Event::Start(element) => match element.name().as_ref() {
b"w:tbl" => {
count += 1;
let mut stats = TableStat {
Expand All @@ -34,19 +34,21 @@ fn main() -> Result<(), quick_xml::Error> {
loop {
skip_buf.clear();
match reader.read_event(&mut skip_buf)? {
Event::Start(element) => match element.name() {
Event::Start(element) => match element.name().as_ref() {
b"w:tr" => {
stats.rows.push(vec![]);
row_index = stats.rows.len() - 1;
}
b"w:tc" => {
stats.rows[row_index]
.push(String::from_utf8(element.name().to_vec()).unwrap());
stats.rows[row_index].push(
String::from_utf8(element.name().as_ref().to_vec())
.unwrap(),
);
}
_ => {}
},
Event::End(element) => {
if element.name() == b"w:tbl" {
if element.name().as_ref() == b"w:tbl" {
found_tables.push(stats);
break;
}
Expand Down
2 changes: 1 addition & 1 deletion examples/read_texts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ fn main() {

loop {
match reader.read_event(&mut buf) {
Ok(Event::Start(ref e)) if e.name() == b"tag2" => {
Ok(Event::Start(ref e)) if e.name().as_ref() == b"tag2" => {
txt.push(
reader
.read_text(b"tag2", &mut Vec::new())
Expand Down
6 changes: 3 additions & 3 deletions src/de/map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ where
let key = if let Some(p) = self
.unflatten_fields
.iter()
.position(|f| e.name() == &f[UNFLATTEN_PREFIX.len()..])
.position(|f| e.name().as_ref() == &f[UNFLATTEN_PREFIX.len()..])
{
// Used to deserialize elements, like:
// <root>
Expand All @@ -290,7 +290,7 @@ where
// }
seed.deserialize(self.unflatten_fields.remove(p).into_deserializer())
} else {
let name = Cow::Borrowed(e.local_name());
let name = Cow::Borrowed(e.local_name().into_inner());
seed.deserialize(EscapedDeserializer::new(name, decoder, false))
};
key.map(Some)
Expand Down Expand Up @@ -606,7 +606,7 @@ where
// Stop iteration after reaching a closing tag
DeEvent::End(e) if e.name() == self.map.start.name() => Ok(None),
// This is a unmatched closing tag, so the XML is invalid
DeEvent::End(e) => Err(DeError::UnexpectedEnd(e.name().to_owned())),
DeEvent::End(e) => Err(DeError::UnexpectedEnd(e.name().as_ref().to_owned())),
// We cannot get `Eof` legally, because we always inside of the
// opened tag `self.map.start`
DeEvent::Eof => Err(DeError::UnexpectedEof),
Expand Down
51 changes: 29 additions & 22 deletions src/de/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@ pub use crate::errors::serialize::DeError;
use crate::{
errors::Error,
events::{BytesCData, BytesEnd, BytesStart, BytesText, Event},
name::QName,
reader::Decoder,
Reader,
};
Expand Down Expand Up @@ -514,16 +515,16 @@ where
match self.write.back() {
// Skip all subtree, if we skip a start event
Some(DeEvent::Start(e)) => {
let end = e.name().to_owned();
let end = e.name().as_ref().to_owned();
let mut depth = 0;
loop {
let event = self.next()?;
match event {
DeEvent::Start(ref e) if e.name() == end => {
DeEvent::Start(ref e) if e.name().as_ref() == end => {
self.skip_event(event)?;
depth += 1;
}
DeEvent::End(ref e) if e.name() == end => {
DeEvent::End(ref e) if e.name().as_ref() == end => {
self.skip_event(event)?;
if depth == 0 {
return Ok(());
Expand Down Expand Up @@ -571,7 +572,9 @@ where
let e = self.next()?;
match e {
DeEvent::Start(e) => return Ok(Some(e)),
DeEvent::End(e) => return Err(DeError::UnexpectedEnd(e.name().to_owned())),
DeEvent::End(e) => {
return Err(DeError::UnexpectedEnd(e.name().as_ref().to_owned()))
}
DeEvent::Eof => return Ok(None),
_ => (), // ignore texts
}
Expand Down Expand Up @@ -631,20 +634,24 @@ where
DeEvent::Text(t) if unescape => t.unescape()?,
DeEvent::Text(t) => BytesCData::new(t.into_inner()),
DeEvent::CData(t) => t,
DeEvent::Start(s) => return Err(DeError::UnexpectedStart(s.name().to_owned())),
DeEvent::Start(s) => {
return Err(DeError::UnexpectedStart(s.name().as_ref().to_owned()))
}
// We can get End event in case of `<tag></tag>` or `<tag/>` input
// Return empty text in that case
DeEvent::End(end) if end.name() == e.name() => {
return Ok(BytesCData::new(&[] as &[u8]));
}
DeEvent::End(end) => return Err(DeError::UnexpectedEnd(end.name().to_owned())),
DeEvent::End(end) => {
return Err(DeError::UnexpectedEnd(end.name().as_ref().to_owned()))
}
DeEvent::Eof => return Err(DeError::UnexpectedEof),
};
self.read_to_end(e.name())?;
Ok(t)
}
DeEvent::Start(e) => Err(DeError::UnexpectedStart(e.name().to_owned())),
DeEvent::End(e) => Err(DeError::UnexpectedEnd(e.name().to_owned())),
DeEvent::Start(e) => Err(DeError::UnexpectedStart(e.name().as_ref().to_owned())),
DeEvent::End(e) => Err(DeError::UnexpectedEnd(e.name().as_ref().to_owned())),
DeEvent::Eof => Err(DeError::UnexpectedEof),
}
}
Expand All @@ -658,7 +665,7 @@ where
/// Drops all events until event with [name](BytesEnd::name()) `name` won't be
/// dropped. This method should be called after [`Self::next()`]
#[cfg(feature = "overlapped-lists")]
fn read_to_end(&mut self, name: &[u8]) -> Result<(), DeError> {
fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
let mut depth = 0;
loop {
match self.read.pop_front() {
Expand All @@ -682,7 +689,7 @@ where
}
}
#[cfg(not(feature = "overlapped-lists"))]
fn read_to_end(&mut self, name: &[u8]) -> Result<(), DeError> {
fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
// First one might be in self.peek
match self.next()? {
DeEvent::Start(e) => self.reader.read_to_end(e.name())?,
Expand Down Expand Up @@ -751,10 +758,10 @@ where
{
// Try to go to the next `<tag ...>...</tag>` or `<tag .../>`
if let Some(e) = self.next_start()? {
let name = e.name().to_vec();
let name = e.name().as_ref().to_vec();
let map = map::MapAccess::new(self, e, fields)?;
let value = visitor.visit_map(map)?;
self.read_to_end(&name)?;
self.read_to_end(QName(&name))?;
Ok(value)
} else {
Err(DeError::ExpectedStart)
Expand Down Expand Up @@ -789,7 +796,7 @@ where
visitor.visit_unit()
}
DeEvent::Text(_) | DeEvent::CData(_) => visitor.visit_unit(),
DeEvent::End(e) => Err(DeError::UnexpectedEnd(e.name().to_owned())),
DeEvent::End(e) => Err(DeError::UnexpectedEnd(e.name().as_ref().to_owned())),
DeEvent::Eof => Err(DeError::UnexpectedEof),
}
}
Expand Down Expand Up @@ -893,7 +900,7 @@ where
{
match self.next()? {
DeEvent::Start(e) => self.read_to_end(e.name())?,
DeEvent::End(e) => return Err(DeError::UnexpectedEnd(e.name().to_owned())),
DeEvent::End(e) => return Err(DeError::UnexpectedEnd(e.name().as_ref().to_owned())),
DeEvent::Eof => return Err(DeError::UnexpectedEof),
_ => (),
}
Expand Down Expand Up @@ -925,7 +932,7 @@ pub trait XmlRead<'i> {

/// Skips until end element is found. Unlike `next()` it will not allocate
/// when it cannot satisfy the lifetime.
fn read_to_end(&mut self, name: &[u8]) -> Result<(), DeError>;
fn read_to_end(&mut self, name: QName) -> Result<(), DeError>;

/// A copy of the reader's decoder used to decode strings.
fn decoder(&self) -> Decoder;
Expand Down Expand Up @@ -960,7 +967,7 @@ impl<'i, R: BufRead> XmlRead<'i> for IoReader<R> {
event
}

fn read_to_end(&mut self, name: &[u8]) -> Result<(), DeError> {
fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
match self.reader.read_to_end(name, &mut self.buf) {
Err(Error::UnexpectedEof(_)) => Err(DeError::UnexpectedEof),
other => Ok(other?),
Expand Down Expand Up @@ -996,7 +1003,7 @@ impl<'de> XmlRead<'de> for SliceReader<'de> {
}
}

fn read_to_end(&mut self, name: &[u8]) -> Result<(), DeError> {
fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
match self.reader.read_to_end_unbuffered(name) {
Err(Error::UnexpectedEof(_)) => Err(DeError::UnexpectedEof),
other => Ok(other?),
Expand Down Expand Up @@ -1212,7 +1219,7 @@ mod tests {
de.next().unwrap(),
Start(BytesStart::borrowed_name(b"target"))
);
de.read_to_end(b"target").unwrap();
de.read_to_end(QName(b"target")).unwrap();
assert_eq!(de.read, vec![]);
assert_eq!(
de.write,
Expand Down Expand Up @@ -1252,7 +1259,7 @@ mod tests {
de.next().unwrap(),
Start(BytesStart::borrowed_name(b"skip"))
);
de.read_to_end(b"skip").unwrap();
de.read_to_end(QName(b"skip")).unwrap();

assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed(b"root")));
}
Expand Down Expand Up @@ -1313,7 +1320,7 @@ mod tests {
de.next().unwrap(),
Start(BytesStart::borrowed(br#"tag a="1""#, 3))
);
assert_eq!(de.read_to_end(b"tag").unwrap(), ());
assert_eq!(de.read_to_end(QName(b"tag")).unwrap(), ());

assert_eq!(
de.next().unwrap(),
Expand All @@ -1329,7 +1336,7 @@ mod tests {
de.next().unwrap(),
Start(BytesStart::borrowed(b"self-closed", 11))
);
assert_eq!(de.read_to_end(b"self-closed").unwrap(), ());
assert_eq!(de.read_to_end(QName(b"self-closed")).unwrap(), ());

assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed(b"root")));
assert_eq!(de.next().unwrap(), Eof);
Expand Down Expand Up @@ -1432,7 +1439,7 @@ mod tests {
reader.next().unwrap(),
DeEvent::Start(BytesStart::borrowed(b"item ", 4))
);
reader.read_to_end(b"item").unwrap();
reader.read_to_end(QName(b"item")).unwrap();
assert_eq!(reader.next().unwrap(), DeEvent::Eof);
}

Expand Down
4 changes: 2 additions & 2 deletions src/de/seq.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@ pub fn not_in(
decoder: Decoder,
) -> Result<bool, DeError> {
#[cfg(not(feature = "encoding"))]
let tag = Cow::Borrowed(decoder.decode(start.name())?);
let tag = Cow::Borrowed(decoder.decode(start.name().into_inner())?);

#[cfg(feature = "encoding")]
let tag = decoder.decode(start.name());
let tag = decoder.decode(start.name().into_inner());

Ok(fields.iter().all(|&field| field != tag.as_ref()))
}
Expand Down
4 changes: 3 additions & 1 deletion src/de/var.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,9 @@ where
DeEvent::Text(t) => EscapedDeserializer::new(Cow::Borrowed(t), decoder, true),
// Escape sequences does not processed inside CDATA section
DeEvent::CData(t) => EscapedDeserializer::new(Cow::Borrowed(t), decoder, false),
DeEvent::Start(e) => EscapedDeserializer::new(Cow::Borrowed(e.name()), decoder, false),
DeEvent::Start(e) => {
EscapedDeserializer::new(Cow::Borrowed(e.name().into_inner()), decoder, false)
}
_ => {
return Err(DeError::Unsupported(
"Invalid event for Enum, expecting `Text` or `Start`",
Expand Down
Loading

0 comments on commit 711525e

Please sign in to comment.