Skip to content

Commit

Permalink
Add functions for attribute value normalization
Browse files Browse the repository at this point in the history
closes tafia#371
  • Loading branch information
dralley committed Jan 31, 2023
1 parent add7406 commit 7f55cd8
Show file tree
Hide file tree
Showing 8 changed files with 503 additions and 9 deletions.
9 changes: 9 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@

- [#541]: Deserialize specially named `$text` enum variant in [externally tagged]
enums from textual content
- [#379]: Improved compliance with the XML attribute value normalization process by
adding `Attribute::normalized_value()` and `Attribute::normalized_value_with()`,
which ought to be used in place of `Attribute::unescape_value()` and
`Attribute::unescape_value_with()`

### Bug Fixes

Expand All @@ -22,10 +26,15 @@

### Misc Changes

### New Tests

- [#379]: Added tests for attribute value normalization

[externally tagged]: https://serde.rs/enum-representations.html#externally-tagged
[#490]: https://github.com/tafia/quick-xml/pull/490
[#537]: https://github.com/tafia/quick-xml/issues/537
[#541]: https://github.com/tafia/quick-xml/pull/541
[#379]: https://github.com/tafia/quick-xml/pull/379

## 0.27.1 -- 2022-12-28

Expand Down
3 changes: 1 addition & 2 deletions benches/macrobenches.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,13 @@ static INPUTS: &[(&str, &str)] = &[
("players.xml", PLAYERS),
];

// TODO: use fully normalized attribute values
fn parse_document_from_str(doc: &str) -> XmlResult<()> {
let mut r = Reader::from_str(doc);
loop {
match criterion::black_box(r.read_event()?) {
Event::Start(e) | Event::Empty(e) => {
for attr in e.attributes() {
criterion::black_box(attr?.decode_and_unescape_value(&r)?);
criterion::black_box(attr?.decode_and_normalize_value(&r)?);
}
}
Event::Text(e) => {
Expand Down
45 changes: 45 additions & 0 deletions benches/microbenches.rs
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,50 @@ fn attributes(c: &mut Criterion) {
assert_eq!(count, 150);
})
});

group.finish();
}

/// Benchmarks normalizing attribute values
fn attribute_value_normalization(c: &mut Criterion) {
let mut group = c.benchmark_group("attribute_value_normalization");

group.bench_function("noop_short", |b| {
b.iter(|| {
criterion::black_box(unescape("foobar")).unwrap();
})
});

group.bench_function("noop_long", |b| {
b.iter(|| {
criterion::black_box(unescape("just a bit of text without any entities")).unwrap();
})
});

group.bench_function("replacement_chars", |b| {
b.iter(|| {
criterion::black_box(unescape("just a bit\n of text without\tany entities")).unwrap();
})
});

group.bench_function("char_reference", |b| {
b.iter(|| {
let text = "prefix &#34;some stuff&#34;,&#x22;more stuff&#x22;";
criterion::black_box(unescape(text)).unwrap();
let text = "&#38;&#60;";
criterion::black_box(unescape(text)).unwrap();
})
});

group.bench_function("entity_reference", |b| {
b.iter(|| {
let text = "age &gt; 72 &amp;&amp; age &lt; 21";
criterion::black_box(unescape(text)).unwrap();
let text = "&quot;what&apos;s that?&quot;";
criterion::black_box(unescape(text)).unwrap();
})
});

group.finish();
}

Expand Down Expand Up @@ -354,6 +398,7 @@ criterion_group!(
read_resolved_event_into,
one_event,
attributes,
attribute_value_normalization,
escaping,
unescaping,
);
Expand Down
1 change: 1 addition & 0 deletions src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ impl From<EscapeError> for Error {
}

impl From<AttrError> for Error {
/// Creates a new `Error::InvalidAttr` from the given error
#[inline]
fn from(error: AttrError) -> Self {
Error::InvalidAttr(error)
Expand Down
Loading

0 comments on commit 7f55cd8

Please sign in to comment.