quick_xml/events/attributes.rs
1//! Xml Attributes module
2//!
3//! Provides an iterator over attributes key/value pairs
4
5use crate::encoding::Decoder;
6use crate::errors::Result as XmlResult;
7use crate::escape::{escape, resolve_predefined_entity, unescape_with};
8use crate::name::{LocalName, Namespace, QName};
9use crate::reader::NsReader;
10use crate::utils::{is_whitespace, Bytes};
11
12use std::fmt::{self, Debug, Display, Formatter};
13use std::iter::FusedIterator;
14use std::{borrow::Cow, ops::Range};
15
16/// A struct representing a key/value XML attribute.
17///
18/// Field `value` stores raw bytes, possibly containing escape-sequences. Most users will likely
19/// want to access the value using one of the [`unescape_value`] and [`decode_and_unescape_value`]
20/// functions.
21///
22/// [`unescape_value`]: Self::unescape_value
23/// [`decode_and_unescape_value`]: Self::decode_and_unescape_value
24#[derive(Clone, Eq, PartialEq)]
25pub struct Attribute<'a> {
26 /// The key to uniquely define the attribute.
27 ///
28 /// If [`Attributes::with_checks`] is turned off, the key might not be unique.
29 pub key: QName<'a>,
30 /// The raw value of the attribute.
31 pub value: Cow<'a, [u8]>,
32}
33
34impl<'a> Attribute<'a> {
35 /// Decodes using UTF-8 then unescapes the value.
36 ///
37 /// This is normally the value you are interested in. Escape sequences such as `>` are
38 /// replaced with their unescaped equivalents such as `>`.
39 ///
40 /// This will allocate if the value contains any escape sequences.
41 ///
42 /// See also [`unescape_value_with()`](Self::unescape_value_with)
43 ///
44 /// <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
45 ///
46 /// NOTE: Because this method is available only if [`encoding`] feature is **not** enabled,
47 /// should only be used by applications.
48 /// Libs should use [`decode_and_unescape_value()`](Self::decode_and_unescape_value)
49 /// instead, because if lib will be used in a project which depends on quick_xml with
50 /// [`encoding`] feature enabled, the lib will fail to compile due to [feature unification].
51 ///
52 /// </div>
53 ///
54 /// [`encoding`]: ../../index.html#encoding
55 /// [feature unification]: https://doc.rust-lang.org/cargo/reference/features.html#feature-unification
56 #[cfg(any(doc, not(feature = "encoding")))]
57 pub fn unescape_value(&self) -> XmlResult<Cow<'a, str>> {
58 self.unescape_value_with(resolve_predefined_entity)
59 }
60
61 /// Decodes using UTF-8 then unescapes the value, using custom entities.
62 ///
63 /// This is normally the value you are interested in. Escape sequences such as `>` are
64 /// replaced with their unescaped equivalents such as `>`.
65 /// A fallback resolver for additional custom entities can be provided via
66 /// `resolve_entity`.
67 ///
68 /// This will allocate if the value contains any escape sequences.
69 ///
70 /// See also [`unescape_value()`](Self::unescape_value)
71 ///
72 /// <div style="background:rgba(120,145,255,0.45);padding:0.75em;">
73 ///
74 /// NOTE: Because this method is available only if [`encoding`] feature is **not** enabled,
75 /// should only be used by applications.
76 /// Libs should use [`decode_and_unescape_value_with()`](Self::decode_and_unescape_value_with)
77 /// instead, because if lib will be used in a project which depends on quick_xml with
78 /// [`encoding`] feature enabled, the lib will fail to compile due to [feature unification].
79 ///
80 /// </div>
81 ///
82 /// [`encoding`]: ../../index.html#encoding
83 /// [feature unification]: https://doc.rust-lang.org/cargo/reference/features.html#feature-unification
84 #[cfg(any(doc, not(feature = "encoding")))]
85 #[inline]
86 pub fn unescape_value_with<'entity>(
87 &self,
88 resolve_entity: impl FnMut(&str) -> Option<&'entity str>,
89 ) -> XmlResult<Cow<'a, str>> {
90 self.decode_and_unescape_value_with(Decoder::utf8(), resolve_entity)
91 }
92
93 /// Decodes then unescapes the value.
94 ///
95 /// This will allocate if the value contains any escape sequences or in
96 /// non-UTF-8 encoding.
97 pub fn decode_and_unescape_value(&self, decoder: Decoder) -> XmlResult<Cow<'a, str>> {
98 self.decode_and_unescape_value_with(decoder, resolve_predefined_entity)
99 }
100
101 /// Decodes then unescapes the value with custom entities.
102 ///
103 /// This will allocate if the value contains any escape sequences or in
104 /// non-UTF-8 encoding.
105 pub fn decode_and_unescape_value_with<'entity>(
106 &self,
107 decoder: Decoder,
108 resolve_entity: impl FnMut(&str) -> Option<&'entity str>,
109 ) -> XmlResult<Cow<'a, str>> {
110 let decoded = decoder.decode_cow(&self.value)?;
111
112 match unescape_with(&decoded, resolve_entity)? {
113 // Because result is borrowed, no replacements was done and we can use original string
114 Cow::Borrowed(_) => Ok(decoded),
115 Cow::Owned(s) => Ok(s.into()),
116 }
117 }
118
119 /// If attribute value [represents] valid boolean values, returns `Some`, otherwise returns `None`.
120 ///
121 /// The valid boolean representations are only `"true"`, `"false"`, `"1"`, and `"0"`.
122 ///
123 /// # Examples
124 ///
125 /// ```
126 /// # use pretty_assertions::assert_eq;
127 /// use quick_xml::events::attributes::Attribute;
128 ///
129 /// let attr = Attribute::from(("attr", "false"));
130 /// assert_eq!(attr.as_bool(), Some(false));
131 ///
132 /// let attr = Attribute::from(("attr", "0"));
133 /// assert_eq!(attr.as_bool(), Some(false));
134 ///
135 /// let attr = Attribute::from(("attr", "true"));
136 /// assert_eq!(attr.as_bool(), Some(true));
137 ///
138 /// let attr = Attribute::from(("attr", "1"));
139 /// assert_eq!(attr.as_bool(), Some(true));
140 ///
141 /// let attr = Attribute::from(("attr", "bot bool"));
142 /// assert_eq!(attr.as_bool(), None);
143 /// ```
144 ///
145 /// [represents]: https://www.w3.org/TR/xmlschema11-2/#boolean
146 #[inline]
147 pub fn as_bool(&self) -> Option<bool> {
148 match self.value.as_ref() {
149 b"1" | b"true" => Some(true),
150 b"0" | b"false" => Some(false),
151 _ => None,
152 }
153 }
154}
155
156impl<'a> Debug for Attribute<'a> {
157 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
158 f.debug_struct("Attribute")
159 .field("key", &Bytes(self.key.as_ref()))
160 .field("value", &Bytes(&self.value))
161 .finish()
162 }
163}
164
165impl<'a> From<(&'a [u8], &'a [u8])> for Attribute<'a> {
166 /// Creates new attribute from raw bytes.
167 /// Does not apply any transformation to both key and value.
168 ///
169 /// # Examples
170 ///
171 /// ```
172 /// # use pretty_assertions::assert_eq;
173 /// use quick_xml::events::attributes::Attribute;
174 ///
175 /// let features = Attribute::from(("features".as_bytes(), "Bells & whistles".as_bytes()));
176 /// assert_eq!(features.value, "Bells & whistles".as_bytes());
177 /// ```
178 fn from(val: (&'a [u8], &'a [u8])) -> Attribute<'a> {
179 Attribute {
180 key: QName(val.0),
181 value: Cow::from(val.1),
182 }
183 }
184}
185
186impl<'a> From<(&'a str, &'a str)> for Attribute<'a> {
187 /// Creates new attribute from text representation.
188 /// Key is stored as-is, but the value will be escaped.
189 ///
190 /// # Examples
191 ///
192 /// ```
193 /// # use pretty_assertions::assert_eq;
194 /// use quick_xml::events::attributes::Attribute;
195 ///
196 /// let features = Attribute::from(("features", "Bells & whistles"));
197 /// assert_eq!(features.value, "Bells & whistles".as_bytes());
198 /// ```
199 fn from(val: (&'a str, &'a str)) -> Attribute<'a> {
200 Attribute {
201 key: QName(val.0.as_bytes()),
202 value: match escape(val.1) {
203 Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()),
204 Cow::Owned(s) => Cow::Owned(s.into_bytes()),
205 },
206 }
207 }
208}
209
210impl<'a> From<(&'a str, Cow<'a, str>)> for Attribute<'a> {
211 /// Creates new attribute from text representation.
212 /// Key is stored as-is, but the value will be escaped.
213 ///
214 /// # Examples
215 ///
216 /// ```
217 /// # use std::borrow::Cow;
218 /// use pretty_assertions::assert_eq;
219 /// use quick_xml::events::attributes::Attribute;
220 ///
221 /// let features = Attribute::from(("features", Cow::Borrowed("Bells & whistles")));
222 /// assert_eq!(features.value, "Bells & whistles".as_bytes());
223 /// ```
224 fn from(val: (&'a str, Cow<'a, str>)) -> Attribute<'a> {
225 Attribute {
226 key: QName(val.0.as_bytes()),
227 value: match escape(val.1) {
228 Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()),
229 Cow::Owned(s) => Cow::Owned(s.into_bytes()),
230 },
231 }
232 }
233}
234
235impl<'a> From<Attr<&'a [u8]>> for Attribute<'a> {
236 #[inline]
237 fn from(attr: Attr<&'a [u8]>) -> Self {
238 Self {
239 key: attr.key(),
240 value: Cow::Borrowed(attr.value()),
241 }
242 }
243}
244
245////////////////////////////////////////////////////////////////////////////////////////////////////
246
247/// Iterator over XML attributes.
248///
249/// Yields `Result<Attribute>`. An `Err` will be yielded if an attribute is malformed or duplicated.
250/// The duplicate check can be turned off by calling [`with_checks(false)`].
251///
252/// When [`serialize`] feature is enabled, can be converted to serde's deserializer.
253///
254/// [`with_checks(false)`]: Self::with_checks
255/// [`serialize`]: ../../index.html#serialize
256#[derive(Clone)]
257pub struct Attributes<'a> {
258 /// Slice of `BytesStart` corresponding to attributes
259 bytes: &'a [u8],
260 /// Iterator state, independent from the actual source of bytes
261 state: IterState,
262 /// Encoding used for `bytes`
263 decoder: Decoder,
264}
265
266impl<'a> Attributes<'a> {
267 /// Internal constructor, used by `BytesStart`. Supplies data in reader's encoding
268 #[inline]
269 pub(crate) const fn wrap(buf: &'a [u8], pos: usize, html: bool, decoder: Decoder) -> Self {
270 Self {
271 bytes: buf,
272 state: IterState::new(pos, html),
273 decoder,
274 }
275 }
276
277 /// Creates a new attribute iterator from a buffer, which recognizes only XML-style
278 /// attributes, i. e. those which in the form `name = "value"` or `name = 'value'`.
279 /// HTML style attributes (i. e. without quotes or only name) will return a error.
280 ///
281 /// # Parameters
282 /// - `buf`: a buffer with a tag name and attributes, usually this is the whole
283 /// string between `<` and `>` (or `/>`) of a tag;
284 /// - `pos`: a position in the `buf` where tag name is finished and attributes
285 /// is started. It is not necessary to point exactly to the end of a tag name,
286 /// although that is usually that. If it will be more than the `buf` length,
287 /// then the iterator will return `None`` immediately.
288 ///
289 /// # Example
290 /// ```
291 /// # use quick_xml::events::attributes::{Attribute, Attributes};
292 /// # use pretty_assertions::assert_eq;
293 /// #
294 /// let mut iter = Attributes::new("tag-name attr1 = 'value1' attr2='value2' ", 9);
295 /// // ^0 ^9
296 /// assert_eq!(iter.next(), Some(Ok(Attribute::from(("attr1", "value1")))));
297 /// assert_eq!(iter.next(), Some(Ok(Attribute::from(("attr2", "value2")))));
298 /// assert_eq!(iter.next(), None);
299 /// ```
300 pub const fn new(buf: &'a str, pos: usize) -> Self {
301 Self::wrap(buf.as_bytes(), pos, false, Decoder::utf8())
302 }
303
304 /// Creates a new attribute iterator from a buffer, allowing HTML attribute syntax.
305 ///
306 /// # Parameters
307 /// - `buf`: a buffer with a tag name and attributes, usually this is the whole
308 /// string between `<` and `>` (or `/>`) of a tag;
309 /// - `pos`: a position in the `buf` where tag name is finished and attributes
310 /// is started. It is not necessary to point exactly to the end of a tag name,
311 /// although that is usually that. If it will be more than the `buf` length,
312 /// then the iterator will return `None`` immediately.
313 ///
314 /// # Example
315 /// ```
316 /// # use quick_xml::events::attributes::{Attribute, Attributes};
317 /// # use pretty_assertions::assert_eq;
318 /// #
319 /// let mut iter = Attributes::html("tag-name attr1 = value1 attr2 ", 9);
320 /// // ^0 ^9
321 /// assert_eq!(iter.next(), Some(Ok(Attribute::from(("attr1", "value1")))));
322 /// assert_eq!(iter.next(), Some(Ok(Attribute::from(("attr2", "")))));
323 /// assert_eq!(iter.next(), None);
324 /// ```
325 pub const fn html(buf: &'a str, pos: usize) -> Self {
326 Self::wrap(buf.as_bytes(), pos, true, Decoder::utf8())
327 }
328
329 /// Changes whether attributes should be checked for uniqueness.
330 ///
331 /// The XML specification requires attribute keys in the same element to be unique. This check
332 /// can be disabled to improve performance slightly.
333 ///
334 /// (`true` by default)
335 pub fn with_checks(&mut self, val: bool) -> &mut Attributes<'a> {
336 self.state.check_duplicates = val;
337 self
338 }
339
340 /// Checks if the current tag has a [`xsi:nil`] attribute. This method ignores any errors in
341 /// attributes.
342 ///
343 /// # Examples
344 ///
345 /// ```
346 /// # use pretty_assertions::assert_eq;
347 /// use quick_xml::events::Event;
348 /// use quick_xml::name::QName;
349 /// use quick_xml::reader::NsReader;
350 ///
351 /// let mut reader = NsReader::from_str("
352 /// <root xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'>
353 /// <true xsi:nil='true'/>
354 /// <false xsi:nil='false'/>
355 /// <none/>
356 /// <non-xsi xsi:nil='true' xmlns:xsi='namespace'/>
357 /// <unbound-nil nil='true' xmlns='http://www.w3.org/2001/XMLSchema-instance'/>
358 /// <another-xmlns f:nil='true' xmlns:f='http://www.w3.org/2001/XMLSchema-instance'/>
359 /// </root>
360 /// ");
361 /// reader.config_mut().trim_text(true);
362 ///
363 /// macro_rules! check {
364 /// ($reader:expr, $name:literal, $value:literal) => {
365 /// let event = match $reader.read_event().unwrap() {
366 /// Event::Empty(e) => e,
367 /// e => panic!("Unexpected event {:?}", e),
368 /// };
369 /// assert_eq!(
370 /// (event.name(), event.attributes().has_nil(&$reader)),
371 /// (QName($name.as_bytes()), $value),
372 /// );
373 /// };
374 /// }
375 ///
376 /// let root = match reader.read_event().unwrap() {
377 /// Event::Start(e) => e,
378 /// e => panic!("Unexpected event {:?}", e),
379 /// };
380 /// assert_eq!(root.attributes().has_nil(&reader), false);
381 ///
382 /// // definitely true
383 /// check!(reader, "true", true);
384 /// // definitely false
385 /// check!(reader, "false", false);
386 /// // absence of the attribute means that attribute is not set
387 /// check!(reader, "none", false);
388 /// // attribute not bound to the correct namespace
389 /// check!(reader, "non-xsi", false);
390 /// // attributes without prefix not bound to any namespace
391 /// check!(reader, "unbound-nil", false);
392 /// // prefix can be any while it is bound to the correct namespace
393 /// check!(reader, "another-xmlns", true);
394 /// ```
395 ///
396 /// [`xsi:nil`]: https://www.w3.org/TR/xmlschema-1/#xsi_nil
397 pub fn has_nil<R>(&mut self, reader: &NsReader<R>) -> bool {
398 use crate::name::ResolveResult::*;
399
400 self.any(|attr| {
401 if let Ok(attr) = attr {
402 match reader.resolve_attribute(attr.key) {
403 (
404 Bound(Namespace(b"http://www.w3.org/2001/XMLSchema-instance")),
405 LocalName(b"nil"),
406 ) => attr.as_bool().unwrap_or_default(),
407 _ => false,
408 }
409 } else {
410 false
411 }
412 })
413 }
414
415 /// Get the decoder, used to decode bytes, read by the reader which produces
416 /// this iterator, to the strings.
417 ///
418 /// When iterator was created manually or get from a manually created [`BytesStart`],
419 /// encoding is UTF-8.
420 ///
421 /// If [`encoding`] feature is enabled and no encoding is specified in declaration,
422 /// defaults to UTF-8.
423 ///
424 /// [`BytesStart`]: crate::events::BytesStart
425 /// [`encoding`]: ../index.html#encoding
426 #[inline]
427 pub const fn decoder(&self) -> Decoder {
428 self.decoder
429 }
430}
431
432impl<'a> Debug for Attributes<'a> {
433 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
434 f.debug_struct("Attributes")
435 .field("bytes", &Bytes(&self.bytes))
436 .field("state", &self.state)
437 .field("decoder", &self.decoder)
438 .finish()
439 }
440}
441
442impl<'a> Iterator for Attributes<'a> {
443 type Item = Result<Attribute<'a>, AttrError>;
444
445 #[inline]
446 fn next(&mut self) -> Option<Self::Item> {
447 match self.state.next(self.bytes) {
448 None => None,
449 Some(Ok(a)) => Some(Ok(a.map(|range| &self.bytes[range]).into())),
450 Some(Err(e)) => Some(Err(e)),
451 }
452 }
453}
454
455impl<'a> FusedIterator for Attributes<'a> {}
456
457////////////////////////////////////////////////////////////////////////////////////////////////////
458
459/// Errors that can be raised during parsing attributes.
460///
461/// Recovery position in examples shows the position from which parsing of the
462/// next attribute will be attempted.
463#[derive(Clone, Debug, PartialEq, Eq)]
464pub enum AttrError {
465 /// Attribute key was not followed by `=`, position relative to the start of
466 /// the owning tag is provided.
467 ///
468 /// Example of input that raises this error:
469 ///
470 /// ```xml
471 /// <tag key another="attribute"/>
472 /// <!-- ^~~ error position, recovery position (8) -->
473 /// ```
474 ///
475 /// This error can be raised only when the iterator is in XML mode.
476 ExpectedEq(usize),
477 /// Attribute value was not found after `=`, position relative to the start
478 /// of the owning tag is provided.
479 ///
480 /// Example of input that raises this error:
481 ///
482 /// ```xml
483 /// <tag key = />
484 /// <!-- ^~~ error position, recovery position (10) -->
485 /// ```
486 ///
487 /// This error can be returned only for the last attribute in the list,
488 /// because otherwise any content after `=` will be threated as a value.
489 /// The XML
490 ///
491 /// ```xml
492 /// <tag key = another-key = "value"/>
493 /// <!-- ^ ^- recovery position (24) -->
494 /// <!-- '~~ error position (22) -->
495 /// ```
496 ///
497 /// will be treated as `Attribute { key = b"key", value = b"another-key" }`
498 /// and or [`Attribute`] is returned, or [`AttrError::UnquotedValue`] is raised,
499 /// depending on the parsing mode.
500 ExpectedValue(usize),
501 /// Attribute value is not quoted, position relative to the start of the
502 /// owning tag is provided.
503 ///
504 /// Example of input that raises this error:
505 ///
506 /// ```xml
507 /// <tag key = value />
508 /// <!-- ^ ^~~ recovery position (15) -->
509 /// <!-- '~~ error position (10) -->
510 /// ```
511 ///
512 /// This error can be raised only when the iterator is in XML mode.
513 UnquotedValue(usize),
514 /// Attribute value was not finished with a matching quote, position relative
515 /// to the start of owning tag and a quote is provided. That position is always
516 /// a last character in the tag content.
517 ///
518 /// Example of input that raises this error:
519 ///
520 /// ```xml
521 /// <tag key = "value />
522 /// <tag key = 'value />
523 /// <!-- ^~~ error position, recovery position (18) -->
524 /// ```
525 ///
526 /// This error can be returned only for the last attribute in the list,
527 /// because all input was consumed during scanning for a quote.
528 ExpectedQuote(usize, u8),
529 /// An attribute with the same name was already encountered. Two parameters
530 /// define (1) the error position relative to the start of the owning tag
531 /// for a new attribute and (2) the start position of a previously encountered
532 /// attribute with the same name.
533 ///
534 /// Example of input that raises this error:
535 ///
536 /// ```xml
537 /// <tag key = 'value' key="value2" attr3='value3' />
538 /// <!-- ^ ^ ^~~ recovery position (32) -->
539 /// <!-- | '~~ error position (19) -->
540 /// <!-- '~~ previous position (4) -->
541 /// ```
542 ///
543 /// This error is returned only when [`Attributes::with_checks()`] is set
544 /// to `true` (that is default behavior).
545 Duplicated(usize, usize),
546}
547
548impl Display for AttrError {
549 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
550 match self {
551 Self::ExpectedEq(pos) => write!(
552 f,
553 r#"position {}: attribute key must be directly followed by `=` or space"#,
554 pos
555 ),
556 Self::ExpectedValue(pos) => write!(
557 f,
558 r#"position {}: `=` must be followed by an attribute value"#,
559 pos
560 ),
561 Self::UnquotedValue(pos) => write!(
562 f,
563 r#"position {}: attribute value must be enclosed in `"` or `'`"#,
564 pos
565 ),
566 Self::ExpectedQuote(pos, quote) => write!(
567 f,
568 r#"position {}: missing closing quote `{}` in attribute value"#,
569 pos, *quote as char
570 ),
571 Self::Duplicated(pos1, pos2) => write!(
572 f,
573 r#"position {}: duplicated attribute, previous declaration at position {}"#,
574 pos1, pos2
575 ),
576 }
577 }
578}
579
580impl std::error::Error for AttrError {}
581
582////////////////////////////////////////////////////////////////////////////////////////////////////
583
584/// A struct representing a key/value XML or HTML [attribute].
585///
586/// [attribute]: https://www.w3.org/TR/xml11/#NT-Attribute
587#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
588pub enum Attr<T> {
589 /// Attribute with value enclosed in double quotes (`"`). Attribute key and
590 /// value provided. This is a canonical XML-style attribute.
591 DoubleQ(T, T),
592 /// Attribute with value enclosed in single quotes (`'`). Attribute key and
593 /// value provided. This is an XML-style attribute.
594 SingleQ(T, T),
595 /// Attribute with value not enclosed in quotes. Attribute key and value
596 /// provided. This is HTML-style attribute, it can be returned in HTML-mode
597 /// parsing only. In an XML mode [`AttrError::UnquotedValue`] will be raised
598 /// instead.
599 ///
600 /// Attribute value can be invalid according to the [HTML specification],
601 /// in particular, it can contain `"`, `'`, `=`, `<`, and <code>`</code>
602 /// characters. The absence of the `>` character is nevertheless guaranteed,
603 /// since the parser extracts [events] based on them even before the start
604 /// of parsing attributes.
605 ///
606 /// [HTML specification]: https://html.spec.whatwg.org/#unquoted
607 /// [events]: crate::events::Event::Start
608 Unquoted(T, T),
609 /// Attribute without value. Attribute key provided. This is HTML-style attribute,
610 /// it can be returned in HTML-mode parsing only. In XML mode
611 /// [`AttrError::ExpectedEq`] will be raised instead.
612 Empty(T),
613}
614
615impl<T> Attr<T> {
616 /// Maps an `Attr<T>` to `Attr<U>` by applying a function to a contained key and value.
617 #[inline]
618 pub fn map<U, F>(self, mut f: F) -> Attr<U>
619 where
620 F: FnMut(T) -> U,
621 {
622 match self {
623 Attr::DoubleQ(key, value) => Attr::DoubleQ(f(key), f(value)),
624 Attr::SingleQ(key, value) => Attr::SingleQ(f(key), f(value)),
625 Attr::Empty(key) => Attr::Empty(f(key)),
626 Attr::Unquoted(key, value) => Attr::Unquoted(f(key), f(value)),
627 }
628 }
629}
630
631impl<'a> Attr<&'a [u8]> {
632 /// Returns the key value
633 #[inline]
634 pub const fn key(&self) -> QName<'a> {
635 QName(match self {
636 Attr::DoubleQ(key, _) => key,
637 Attr::SingleQ(key, _) => key,
638 Attr::Empty(key) => key,
639 Attr::Unquoted(key, _) => key,
640 })
641 }
642 /// Returns the attribute value. For [`Self::Empty`] variant an empty slice
643 /// is returned according to the [HTML specification].
644 ///
645 /// [HTML specification]: https://www.w3.org/TR/2012/WD-html-markup-20120329/syntax.html#syntax-attr-empty
646 #[inline]
647 pub const fn value(&self) -> &'a [u8] {
648 match self {
649 Attr::DoubleQ(_, value) => value,
650 Attr::SingleQ(_, value) => value,
651 Attr::Empty(_) => &[],
652 Attr::Unquoted(_, value) => value,
653 }
654 }
655}
656
657impl<T: AsRef<[u8]>> Debug for Attr<T> {
658 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
659 match self {
660 Attr::DoubleQ(key, value) => f
661 .debug_tuple("Attr::DoubleQ")
662 .field(&Bytes(key.as_ref()))
663 .field(&Bytes(value.as_ref()))
664 .finish(),
665 Attr::SingleQ(key, value) => f
666 .debug_tuple("Attr::SingleQ")
667 .field(&Bytes(key.as_ref()))
668 .field(&Bytes(value.as_ref()))
669 .finish(),
670 Attr::Empty(key) => f
671 .debug_tuple("Attr::Empty")
672 // Comment to prevent formatting and keep style consistent
673 .field(&Bytes(key.as_ref()))
674 .finish(),
675 Attr::Unquoted(key, value) => f
676 .debug_tuple("Attr::Unquoted")
677 .field(&Bytes(key.as_ref()))
678 .field(&Bytes(value.as_ref()))
679 .finish(),
680 }
681 }
682}
683
684/// Unpacks attribute key and value into tuple of this two elements.
685/// `None` value element is returned only for [`Attr::Empty`] variant.
686impl<T> From<Attr<T>> for (T, Option<T>) {
687 #[inline]
688 fn from(attr: Attr<T>) -> Self {
689 match attr {
690 Attr::DoubleQ(key, value) => (key, Some(value)),
691 Attr::SingleQ(key, value) => (key, Some(value)),
692 Attr::Empty(key) => (key, None),
693 Attr::Unquoted(key, value) => (key, Some(value)),
694 }
695 }
696}
697
698////////////////////////////////////////////////////////////////////////////////////////////////////
699
700type AttrResult = Result<Attr<Range<usize>>, AttrError>;
701
702#[derive(Clone, Copy, Debug)]
703enum State {
704 /// Iteration finished, iterator will return `None` to all [`IterState::next`]
705 /// requests.
706 Done,
707 /// The last attribute returned was deserialized successfully. Contains an
708 /// offset from which next attribute should be searched.
709 Next(usize),
710 /// The last attribute returns [`AttrError::UnquotedValue`], offset pointed
711 /// to the beginning of the value. Recover should skip a value
712 SkipValue(usize),
713 /// The last attribute returns [`AttrError::Duplicated`], offset pointed to
714 /// the equal (`=`) sign. Recover should skip it and a value
715 SkipEqValue(usize),
716}
717
718/// External iterator over spans of attribute key and value
719#[derive(Clone, Debug)]
720pub(crate) struct IterState {
721 /// Iteration state that determines what actions should be done before the
722 /// actual parsing of the next attribute
723 state: State,
724 /// If `true`, enables ability to parse unquoted values and key-only (empty)
725 /// attributes
726 html: bool,
727 /// If `true`, checks for duplicate names
728 check_duplicates: bool,
729 /// If `check_duplicates` is set, contains the ranges of already parsed attribute
730 /// names. We store a ranges instead of slices to able to report a previous
731 /// attribute position
732 keys: Vec<Range<usize>>,
733}
734
735impl IterState {
736 pub const fn new(offset: usize, html: bool) -> Self {
737 Self {
738 state: State::Next(offset),
739 html,
740 check_duplicates: true,
741 keys: Vec::new(),
742 }
743 }
744
745 /// Recover from an error that could have been made on a previous step.
746 /// Returns an offset from which parsing should continue.
747 /// If there no input left, returns `None`.
748 fn recover(&self, slice: &[u8]) -> Option<usize> {
749 match self.state {
750 State::Done => None,
751 State::Next(offset) => Some(offset),
752 State::SkipValue(offset) => self.skip_value(slice, offset),
753 State::SkipEqValue(offset) => self.skip_eq_value(slice, offset),
754 }
755 }
756
757 /// Skip all characters up to first space symbol or end-of-input
758 #[inline]
759 #[allow(clippy::manual_map)]
760 fn skip_value(&self, slice: &[u8], offset: usize) -> Option<usize> {
761 let mut iter = (offset..).zip(slice[offset..].iter());
762
763 match iter.find(|(_, &b)| is_whitespace(b)) {
764 // Input: ` key = value `
765 // | ^
766 // offset e
767 Some((e, _)) => Some(e),
768 // Input: ` key = value`
769 // | ^
770 // offset e = len()
771 None => None,
772 }
773 }
774
775 /// Skip all characters up to first space symbol or end-of-input
776 #[inline]
777 fn skip_eq_value(&self, slice: &[u8], offset: usize) -> Option<usize> {
778 let mut iter = (offset..).zip(slice[offset..].iter());
779
780 // Skip all up to the quote and get the quote type
781 let quote = match iter.find(|(_, &b)| !is_whitespace(b)) {
782 // Input: ` key = "`
783 // | ^
784 // offset
785 Some((_, b'"')) => b'"',
786 // Input: ` key = '`
787 // | ^
788 // offset
789 Some((_, b'\'')) => b'\'',
790
791 // Input: ` key = x`
792 // | ^
793 // offset
794 Some((offset, _)) => return self.skip_value(slice, offset),
795 // Input: ` key = `
796 // | ^
797 // offset
798 None => return None,
799 };
800
801 match iter.find(|(_, &b)| b == quote) {
802 // Input: ` key = " "`
803 // ^
804 Some((e, b'"')) => Some(e),
805 // Input: ` key = ' '`
806 // ^
807 Some((e, _)) => Some(e),
808
809 // Input: ` key = " `
810 // Input: ` key = ' `
811 // ^
812 // Closing quote not found
813 None => None,
814 }
815 }
816
817 #[inline]
818 fn check_for_duplicates(
819 &mut self,
820 slice: &[u8],
821 key: Range<usize>,
822 ) -> Result<Range<usize>, AttrError> {
823 if self.check_duplicates {
824 if let Some(prev) = self
825 .keys
826 .iter()
827 .find(|r| slice[(*r).clone()] == slice[key.clone()])
828 {
829 return Err(AttrError::Duplicated(key.start, prev.start));
830 }
831 self.keys.push(key.clone());
832 }
833 Ok(key)
834 }
835
836 /// # Parameters
837 ///
838 /// - `slice`: content of the tag, used for checking for duplicates
839 /// - `key`: Range of key in slice, if iterator in HTML mode
840 /// - `offset`: Position of error if iterator in XML mode
841 #[inline]
842 fn key_only(&mut self, slice: &[u8], key: Range<usize>, offset: usize) -> Option<AttrResult> {
843 Some(if self.html {
844 self.check_for_duplicates(slice, key).map(Attr::Empty)
845 } else {
846 Err(AttrError::ExpectedEq(offset))
847 })
848 }
849
850 #[inline]
851 fn double_q(&mut self, key: Range<usize>, value: Range<usize>) -> Option<AttrResult> {
852 self.state = State::Next(value.end + 1); // +1 for `"`
853
854 Some(Ok(Attr::DoubleQ(key, value)))
855 }
856
857 #[inline]
858 fn single_q(&mut self, key: Range<usize>, value: Range<usize>) -> Option<AttrResult> {
859 self.state = State::Next(value.end + 1); // +1 for `'`
860
861 Some(Ok(Attr::SingleQ(key, value)))
862 }
863
864 pub fn next(&mut self, slice: &[u8]) -> Option<AttrResult> {
865 let mut iter = match self.recover(slice) {
866 Some(offset) => (offset..).zip(slice[offset..].iter()),
867 None => return None,
868 };
869
870 // Index where next key started
871 let start_key = match iter.find(|(_, &b)| !is_whitespace(b)) {
872 // Input: ` key`
873 // ^
874 Some((s, _)) => s,
875 // Input: ` `
876 // ^
877 None => {
878 // Because we reach end-of-input, stop iteration on next call
879 self.state = State::Done;
880 return None;
881 }
882 };
883 // Span of a key
884 let (key, offset) = match iter.find(|(_, &b)| b == b'=' || is_whitespace(b)) {
885 // Input: ` key=`
886 // | ^
887 // s e
888 Some((e, b'=')) => (start_key..e, e),
889
890 // Input: ` key `
891 // ^
892 Some((e, _)) => match iter.find(|(_, &b)| !is_whitespace(b)) {
893 // Input: ` key =`
894 // | | ^
895 // start_key e
896 Some((offset, b'=')) => (start_key..e, offset),
897 // Input: ` key x`
898 // | | ^
899 // start_key e
900 // If HTML-like attributes is allowed, this is the result, otherwise error
901 Some((offset, _)) => {
902 // In any case, recovering is not required
903 self.state = State::Next(offset);
904 return self.key_only(slice, start_key..e, offset);
905 }
906 // Input: ` key `
907 // | | ^
908 // start_key e
909 // If HTML-like attributes is allowed, this is the result, otherwise error
910 None => {
911 // Because we reach end-of-input, stop iteration on next call
912 self.state = State::Done;
913 return self.key_only(slice, start_key..e, slice.len());
914 }
915 },
916
917 // Input: ` key`
918 // | ^
919 // s e = len()
920 // If HTML-like attributes is allowed, this is the result, otherwise error
921 None => {
922 // Because we reach end-of-input, stop iteration on next call
923 self.state = State::Done;
924 let e = slice.len();
925 return self.key_only(slice, start_key..e, e);
926 }
927 };
928
929 let key = match self.check_for_duplicates(slice, key) {
930 Err(e) => {
931 self.state = State::SkipEqValue(offset);
932 return Some(Err(e));
933 }
934 Ok(key) => key,
935 };
936
937 ////////////////////////////////////////////////////////////////////////
938
939 // Gets the position of quote and quote type
940 let (start_value, quote) = match iter.find(|(_, &b)| !is_whitespace(b)) {
941 // Input: ` key = "`
942 // ^
943 Some((s, b'"')) => (s + 1, b'"'),
944 // Input: ` key = '`
945 // ^
946 Some((s, b'\'')) => (s + 1, b'\''),
947
948 // Input: ` key = x`
949 // ^
950 // If HTML-like attributes is allowed, this is the start of the value
951 Some((s, _)) if self.html => {
952 // We do not check validity of attribute value characters as required
953 // according to https://html.spec.whatwg.org/#unquoted. It can be done
954 // during validation phase
955 let end = match iter.find(|(_, &b)| is_whitespace(b)) {
956 // Input: ` key = value `
957 // | ^
958 // s e
959 Some((e, _)) => e,
960 // Input: ` key = value`
961 // | ^
962 // s e = len()
963 None => slice.len(),
964 };
965 self.state = State::Next(end);
966 return Some(Ok(Attr::Unquoted(key, s..end)));
967 }
968 // Input: ` key = x`
969 // ^
970 Some((s, _)) => {
971 self.state = State::SkipValue(s);
972 return Some(Err(AttrError::UnquotedValue(s)));
973 }
974
975 // Input: ` key = `
976 // ^
977 None => {
978 // Because we reach end-of-input, stop iteration on next call
979 self.state = State::Done;
980 return Some(Err(AttrError::ExpectedValue(slice.len())));
981 }
982 };
983
984 match iter.find(|(_, &b)| b == quote) {
985 // Input: ` key = " "`
986 // ^
987 Some((e, b'"')) => self.double_q(key, start_value..e),
988 // Input: ` key = ' '`
989 // ^
990 Some((e, _)) => self.single_q(key, start_value..e),
991
992 // Input: ` key = " `
993 // Input: ` key = ' `
994 // ^
995 // Closing quote not found
996 None => {
997 // Because we reach end-of-input, stop iteration on next call
998 self.state = State::Done;
999 Some(Err(AttrError::ExpectedQuote(slice.len(), quote)))
1000 }
1001 }
1002 }
1003}
1004
1005////////////////////////////////////////////////////////////////////////////////////////////////////
1006
1007/// Checks, how parsing of XML-style attributes works. Each attribute should
1008/// have a value, enclosed in single or double quotes.
1009#[cfg(test)]
1010mod xml {
1011 use super::*;
1012 use pretty_assertions::assert_eq;
1013
1014 /// Checked attribute is the single attribute
1015 mod single {
1016 use super::*;
1017 use pretty_assertions::assert_eq;
1018
1019 /// Attribute have a value enclosed in single quotes
1020 #[test]
1021 fn single_quoted() {
1022 let mut iter = Attributes::new(r#"tag key='value'"#, 3);
1023
1024 assert_eq!(
1025 iter.next(),
1026 Some(Ok(Attribute {
1027 key: QName(b"key"),
1028 value: Cow::Borrowed(b"value"),
1029 }))
1030 );
1031 assert_eq!(iter.next(), None);
1032 assert_eq!(iter.next(), None);
1033 }
1034
1035 /// Attribute have a value enclosed in double quotes
1036 #[test]
1037 fn double_quoted() {
1038 let mut iter = Attributes::new(r#"tag key="value""#, 3);
1039
1040 assert_eq!(
1041 iter.next(),
1042 Some(Ok(Attribute {
1043 key: QName(b"key"),
1044 value: Cow::Borrowed(b"value"),
1045 }))
1046 );
1047 assert_eq!(iter.next(), None);
1048 assert_eq!(iter.next(), None);
1049 }
1050
1051 /// Attribute have a value, not enclosed in quotes
1052 #[test]
1053 fn unquoted() {
1054 let mut iter = Attributes::new(r#"tag key=value"#, 3);
1055 // 0 ^ = 8
1056
1057 assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(8))));
1058 assert_eq!(iter.next(), None);
1059 assert_eq!(iter.next(), None);
1060 }
1061
1062 /// Only attribute key is present
1063 #[test]
1064 fn key_only() {
1065 let mut iter = Attributes::new(r#"tag key"#, 3);
1066 // 0 ^ = 7
1067
1068 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(7))));
1069 assert_eq!(iter.next(), None);
1070 assert_eq!(iter.next(), None);
1071 }
1072
1073 /// Key is started with an invalid symbol (a single quote in this test).
1074 /// Because we do not check validity of keys and values during parsing,
1075 /// that invalid attribute will be returned
1076 #[test]
1077 fn key_start_invalid() {
1078 let mut iter = Attributes::new(r#"tag 'key'='value'"#, 3);
1079
1080 assert_eq!(
1081 iter.next(),
1082 Some(Ok(Attribute {
1083 key: QName(b"'key'"),
1084 value: Cow::Borrowed(b"value"),
1085 }))
1086 );
1087 assert_eq!(iter.next(), None);
1088 assert_eq!(iter.next(), None);
1089 }
1090
1091 /// Key contains an invalid symbol (an ampersand in this test).
1092 /// Because we do not check validity of keys and values during parsing,
1093 /// that invalid attribute will be returned
1094 #[test]
1095 fn key_contains_invalid() {
1096 let mut iter = Attributes::new(r#"tag key&jey='value'"#, 3);
1097
1098 assert_eq!(
1099 iter.next(),
1100 Some(Ok(Attribute {
1101 key: QName(b"key&jey"),
1102 value: Cow::Borrowed(b"value"),
1103 }))
1104 );
1105 assert_eq!(iter.next(), None);
1106 assert_eq!(iter.next(), None);
1107 }
1108
1109 /// Attribute value is missing after `=`
1110 #[test]
1111 fn missed_value() {
1112 let mut iter = Attributes::new(r#"tag key="#, 3);
1113 // 0 ^ = 8
1114
1115 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedValue(8))));
1116 assert_eq!(iter.next(), None);
1117 assert_eq!(iter.next(), None);
1118 }
1119 }
1120
1121 /// Checked attribute is the first attribute in the list of many attributes
1122 mod first {
1123 use super::*;
1124 use pretty_assertions::assert_eq;
1125
1126 /// Attribute have a value enclosed in single quotes
1127 #[test]
1128 fn single_quoted() {
1129 let mut iter = Attributes::new(r#"tag key='value' regular='attribute'"#, 3);
1130
1131 assert_eq!(
1132 iter.next(),
1133 Some(Ok(Attribute {
1134 key: QName(b"key"),
1135 value: Cow::Borrowed(b"value"),
1136 }))
1137 );
1138 assert_eq!(
1139 iter.next(),
1140 Some(Ok(Attribute {
1141 key: QName(b"regular"),
1142 value: Cow::Borrowed(b"attribute"),
1143 }))
1144 );
1145 assert_eq!(iter.next(), None);
1146 assert_eq!(iter.next(), None);
1147 }
1148
1149 /// Attribute have a value enclosed in double quotes
1150 #[test]
1151 fn double_quoted() {
1152 let mut iter = Attributes::new(r#"tag key="value" regular='attribute'"#, 3);
1153
1154 assert_eq!(
1155 iter.next(),
1156 Some(Ok(Attribute {
1157 key: QName(b"key"),
1158 value: Cow::Borrowed(b"value"),
1159 }))
1160 );
1161 assert_eq!(
1162 iter.next(),
1163 Some(Ok(Attribute {
1164 key: QName(b"regular"),
1165 value: Cow::Borrowed(b"attribute"),
1166 }))
1167 );
1168 assert_eq!(iter.next(), None);
1169 assert_eq!(iter.next(), None);
1170 }
1171
1172 /// Attribute have a value, not enclosed in quotes
1173 #[test]
1174 fn unquoted() {
1175 let mut iter = Attributes::new(r#"tag key=value regular='attribute'"#, 3);
1176 // 0 ^ = 8
1177
1178 assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(8))));
1179 // check error recovery
1180 assert_eq!(
1181 iter.next(),
1182 Some(Ok(Attribute {
1183 key: QName(b"regular"),
1184 value: Cow::Borrowed(b"attribute"),
1185 }))
1186 );
1187 assert_eq!(iter.next(), None);
1188 assert_eq!(iter.next(), None);
1189 }
1190
1191 /// Only attribute key is present
1192 #[test]
1193 fn key_only() {
1194 let mut iter = Attributes::new(r#"tag key regular='attribute'"#, 3);
1195 // 0 ^ = 8
1196
1197 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(8))));
1198 // check error recovery
1199 assert_eq!(
1200 iter.next(),
1201 Some(Ok(Attribute {
1202 key: QName(b"regular"),
1203 value: Cow::Borrowed(b"attribute"),
1204 }))
1205 );
1206 assert_eq!(iter.next(), None);
1207 assert_eq!(iter.next(), None);
1208 }
1209
1210 /// Key is started with an invalid symbol (a single quote in this test).
1211 /// Because we do not check validity of keys and values during parsing,
1212 /// that invalid attribute will be returned
1213 #[test]
1214 fn key_start_invalid() {
1215 let mut iter = Attributes::new(r#"tag 'key'='value' regular='attribute'"#, 3);
1216
1217 assert_eq!(
1218 iter.next(),
1219 Some(Ok(Attribute {
1220 key: QName(b"'key'"),
1221 value: Cow::Borrowed(b"value"),
1222 }))
1223 );
1224 assert_eq!(
1225 iter.next(),
1226 Some(Ok(Attribute {
1227 key: QName(b"regular"),
1228 value: Cow::Borrowed(b"attribute"),
1229 }))
1230 );
1231 assert_eq!(iter.next(), None);
1232 assert_eq!(iter.next(), None);
1233 }
1234
1235 /// Key contains an invalid symbol (an ampersand in this test).
1236 /// Because we do not check validity of keys and values during parsing,
1237 /// that invalid attribute will be returned
1238 #[test]
1239 fn key_contains_invalid() {
1240 let mut iter = Attributes::new(r#"tag key&jey='value' regular='attribute'"#, 3);
1241
1242 assert_eq!(
1243 iter.next(),
1244 Some(Ok(Attribute {
1245 key: QName(b"key&jey"),
1246 value: Cow::Borrowed(b"value"),
1247 }))
1248 );
1249 assert_eq!(
1250 iter.next(),
1251 Some(Ok(Attribute {
1252 key: QName(b"regular"),
1253 value: Cow::Borrowed(b"attribute"),
1254 }))
1255 );
1256 assert_eq!(iter.next(), None);
1257 assert_eq!(iter.next(), None);
1258 }
1259
1260 /// Attribute value is missing after `=`.
1261 #[test]
1262 fn missed_value() {
1263 let mut iter = Attributes::new(r#"tag key= regular='attribute'"#, 3);
1264 // 0 ^ = 9
1265
1266 assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(9))));
1267 // Because we do not check validity of keys and values during parsing,
1268 // "error='recovery'" is considered, as unquoted attribute value and
1269 // skipped during recovery and iteration finished
1270 assert_eq!(iter.next(), None);
1271 assert_eq!(iter.next(), None);
1272
1273 ////////////////////////////////////////////////////////////////////
1274
1275 let mut iter = Attributes::new(r#"tag key= regular= 'attribute'"#, 3);
1276 // 0 ^ = 9 ^ = 29
1277
1278 // In that case "regular=" considered as unquoted value
1279 assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(9))));
1280 // In that case "'attribute'" considered as a key, because we do not check
1281 // validity of key names
1282 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(29))));
1283 assert_eq!(iter.next(), None);
1284 assert_eq!(iter.next(), None);
1285
1286 ////////////////////////////////////////////////////////////////////
1287
1288 let mut iter = Attributes::new(r#"tag key= regular ='attribute'"#, 3);
1289 // 0 ^ = 9 ^ = 29
1290
1291 // In that case "regular" considered as unquoted value
1292 assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(9))));
1293 // In that case "='attribute'" considered as a key, because we do not check
1294 // validity of key names
1295 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(29))));
1296 assert_eq!(iter.next(), None);
1297 assert_eq!(iter.next(), None);
1298
1299 ////////////////////////////////////////////////////////////////////
1300
1301 let mut iter = Attributes::new(r#"tag key= regular = 'attribute'"#, 3);
1302 // 0 ^ = 9 ^ = 19 ^ = 30
1303
1304 assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(9))));
1305 // In that case second "=" considered as a key, because we do not check
1306 // validity of key names
1307 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(19))));
1308 // In that case "'attribute'" considered as a key, because we do not check
1309 // validity of key names
1310 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(30))));
1311 assert_eq!(iter.next(), None);
1312 assert_eq!(iter.next(), None);
1313 }
1314 }
1315
1316 /// Copy of single, but with additional spaces in markup
1317 mod sparsed {
1318 use super::*;
1319 use pretty_assertions::assert_eq;
1320
1321 /// Attribute have a value enclosed in single quotes
1322 #[test]
1323 fn single_quoted() {
1324 let mut iter = Attributes::new(r#"tag key = 'value' "#, 3);
1325
1326 assert_eq!(
1327 iter.next(),
1328 Some(Ok(Attribute {
1329 key: QName(b"key"),
1330 value: Cow::Borrowed(b"value"),
1331 }))
1332 );
1333 assert_eq!(iter.next(), None);
1334 assert_eq!(iter.next(), None);
1335 }
1336
1337 /// Attribute have a value enclosed in double quotes
1338 #[test]
1339 fn double_quoted() {
1340 let mut iter = Attributes::new(r#"tag key = "value" "#, 3);
1341
1342 assert_eq!(
1343 iter.next(),
1344 Some(Ok(Attribute {
1345 key: QName(b"key"),
1346 value: Cow::Borrowed(b"value"),
1347 }))
1348 );
1349 assert_eq!(iter.next(), None);
1350 assert_eq!(iter.next(), None);
1351 }
1352
1353 /// Attribute have a value, not enclosed in quotes
1354 #[test]
1355 fn unquoted() {
1356 let mut iter = Attributes::new(r#"tag key = value "#, 3);
1357 // 0 ^ = 10
1358
1359 assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(10))));
1360 assert_eq!(iter.next(), None);
1361 assert_eq!(iter.next(), None);
1362 }
1363
1364 /// Only attribute key is present
1365 #[test]
1366 fn key_only() {
1367 let mut iter = Attributes::new(r#"tag key "#, 3);
1368 // 0 ^ = 8
1369
1370 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(8))));
1371 assert_eq!(iter.next(), None);
1372 assert_eq!(iter.next(), None);
1373 }
1374
1375 /// Key is started with an invalid symbol (a single quote in this test).
1376 /// Because we do not check validity of keys and values during parsing,
1377 /// that invalid attribute will be returned
1378 #[test]
1379 fn key_start_invalid() {
1380 let mut iter = Attributes::new(r#"tag 'key' = 'value' "#, 3);
1381
1382 assert_eq!(
1383 iter.next(),
1384 Some(Ok(Attribute {
1385 key: QName(b"'key'"),
1386 value: Cow::Borrowed(b"value"),
1387 }))
1388 );
1389 assert_eq!(iter.next(), None);
1390 assert_eq!(iter.next(), None);
1391 }
1392
1393 /// Key contains an invalid symbol (an ampersand in this test).
1394 /// Because we do not check validity of keys and values during parsing,
1395 /// that invalid attribute will be returned
1396 #[test]
1397 fn key_contains_invalid() {
1398 let mut iter = Attributes::new(r#"tag key&jey = 'value' "#, 3);
1399
1400 assert_eq!(
1401 iter.next(),
1402 Some(Ok(Attribute {
1403 key: QName(b"key&jey"),
1404 value: Cow::Borrowed(b"value"),
1405 }))
1406 );
1407 assert_eq!(iter.next(), None);
1408 assert_eq!(iter.next(), None);
1409 }
1410
1411 /// Attribute value is missing after `=`
1412 #[test]
1413 fn missed_value() {
1414 let mut iter = Attributes::new(r#"tag key = "#, 3);
1415 // 0 ^ = 10
1416
1417 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedValue(10))));
1418 assert_eq!(iter.next(), None);
1419 assert_eq!(iter.next(), None);
1420 }
1421 }
1422
1423 /// Checks that duplicated attributes correctly reported and recovering is
1424 /// possible after that
1425 mod duplicated {
1426 use super::*;
1427
1428 mod with_check {
1429 use super::*;
1430 use pretty_assertions::assert_eq;
1431
1432 /// Attribute have a value enclosed in single quotes
1433 #[test]
1434 fn single_quoted() {
1435 let mut iter = Attributes::new(r#"tag key='value' key='dup' another=''"#, 3);
1436 // 0 ^ = 4 ^ = 16
1437
1438 assert_eq!(
1439 iter.next(),
1440 Some(Ok(Attribute {
1441 key: QName(b"key"),
1442 value: Cow::Borrowed(b"value"),
1443 }))
1444 );
1445 assert_eq!(iter.next(), Some(Err(AttrError::Duplicated(16, 4))));
1446 assert_eq!(
1447 iter.next(),
1448 Some(Ok(Attribute {
1449 key: QName(b"another"),
1450 value: Cow::Borrowed(b""),
1451 }))
1452 );
1453 assert_eq!(iter.next(), None);
1454 assert_eq!(iter.next(), None);
1455 }
1456
1457 /// Attribute have a value enclosed in double quotes
1458 #[test]
1459 fn double_quoted() {
1460 let mut iter = Attributes::new(r#"tag key='value' key="dup" another=''"#, 3);
1461 // 0 ^ = 4 ^ = 16
1462
1463 assert_eq!(
1464 iter.next(),
1465 Some(Ok(Attribute {
1466 key: QName(b"key"),
1467 value: Cow::Borrowed(b"value"),
1468 }))
1469 );
1470 assert_eq!(iter.next(), Some(Err(AttrError::Duplicated(16, 4))));
1471 assert_eq!(
1472 iter.next(),
1473 Some(Ok(Attribute {
1474 key: QName(b"another"),
1475 value: Cow::Borrowed(b""),
1476 }))
1477 );
1478 assert_eq!(iter.next(), None);
1479 assert_eq!(iter.next(), None);
1480 }
1481
1482 /// Attribute have a value, not enclosed in quotes
1483 #[test]
1484 fn unquoted() {
1485 let mut iter = Attributes::new(r#"tag key='value' key=dup another=''"#, 3);
1486 // 0 ^ = 4 ^ = 16
1487
1488 assert_eq!(
1489 iter.next(),
1490 Some(Ok(Attribute {
1491 key: QName(b"key"),
1492 value: Cow::Borrowed(b"value"),
1493 }))
1494 );
1495 assert_eq!(iter.next(), Some(Err(AttrError::Duplicated(16, 4))));
1496 assert_eq!(
1497 iter.next(),
1498 Some(Ok(Attribute {
1499 key: QName(b"another"),
1500 value: Cow::Borrowed(b""),
1501 }))
1502 );
1503 assert_eq!(iter.next(), None);
1504 assert_eq!(iter.next(), None);
1505 }
1506
1507 /// Only attribute key is present
1508 #[test]
1509 fn key_only() {
1510 let mut iter = Attributes::new(r#"tag key='value' key another=''"#, 3);
1511 // 0 ^ = 20
1512
1513 assert_eq!(
1514 iter.next(),
1515 Some(Ok(Attribute {
1516 key: QName(b"key"),
1517 value: Cow::Borrowed(b"value"),
1518 }))
1519 );
1520 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(20))));
1521 assert_eq!(
1522 iter.next(),
1523 Some(Ok(Attribute {
1524 key: QName(b"another"),
1525 value: Cow::Borrowed(b""),
1526 }))
1527 );
1528 assert_eq!(iter.next(), None);
1529 assert_eq!(iter.next(), None);
1530 }
1531 }
1532
1533 /// Check for duplicated names is disabled
1534 mod without_check {
1535 use super::*;
1536 use pretty_assertions::assert_eq;
1537
1538 /// Attribute have a value enclosed in single quotes
1539 #[test]
1540 fn single_quoted() {
1541 let mut iter = Attributes::new(r#"tag key='value' key='dup' another=''"#, 3);
1542 iter.with_checks(false);
1543
1544 assert_eq!(
1545 iter.next(),
1546 Some(Ok(Attribute {
1547 key: QName(b"key"),
1548 value: Cow::Borrowed(b"value"),
1549 }))
1550 );
1551 assert_eq!(
1552 iter.next(),
1553 Some(Ok(Attribute {
1554 key: QName(b"key"),
1555 value: Cow::Borrowed(b"dup"),
1556 }))
1557 );
1558 assert_eq!(
1559 iter.next(),
1560 Some(Ok(Attribute {
1561 key: QName(b"another"),
1562 value: Cow::Borrowed(b""),
1563 }))
1564 );
1565 assert_eq!(iter.next(), None);
1566 assert_eq!(iter.next(), None);
1567 }
1568
1569 /// Attribute have a value enclosed in double quotes
1570 #[test]
1571 fn double_quoted() {
1572 let mut iter = Attributes::new(r#"tag key='value' key="dup" another=''"#, 3);
1573 iter.with_checks(false);
1574
1575 assert_eq!(
1576 iter.next(),
1577 Some(Ok(Attribute {
1578 key: QName(b"key"),
1579 value: Cow::Borrowed(b"value"),
1580 }))
1581 );
1582 assert_eq!(
1583 iter.next(),
1584 Some(Ok(Attribute {
1585 key: QName(b"key"),
1586 value: Cow::Borrowed(b"dup"),
1587 }))
1588 );
1589 assert_eq!(
1590 iter.next(),
1591 Some(Ok(Attribute {
1592 key: QName(b"another"),
1593 value: Cow::Borrowed(b""),
1594 }))
1595 );
1596 assert_eq!(iter.next(), None);
1597 assert_eq!(iter.next(), None);
1598 }
1599
1600 /// Attribute have a value, not enclosed in quotes
1601 #[test]
1602 fn unquoted() {
1603 let mut iter = Attributes::new(r#"tag key='value' key=dup another=''"#, 3);
1604 // 0 ^ = 20
1605 iter.with_checks(false);
1606
1607 assert_eq!(
1608 iter.next(),
1609 Some(Ok(Attribute {
1610 key: QName(b"key"),
1611 value: Cow::Borrowed(b"value"),
1612 }))
1613 );
1614 assert_eq!(iter.next(), Some(Err(AttrError::UnquotedValue(20))));
1615 assert_eq!(
1616 iter.next(),
1617 Some(Ok(Attribute {
1618 key: QName(b"another"),
1619 value: Cow::Borrowed(b""),
1620 }))
1621 );
1622 assert_eq!(iter.next(), None);
1623 assert_eq!(iter.next(), None);
1624 }
1625
1626 /// Only attribute key is present
1627 #[test]
1628 fn key_only() {
1629 let mut iter = Attributes::new(r#"tag key='value' key another=''"#, 3);
1630 // 0 ^ = 20
1631 iter.with_checks(false);
1632
1633 assert_eq!(
1634 iter.next(),
1635 Some(Ok(Attribute {
1636 key: QName(b"key"),
1637 value: Cow::Borrowed(b"value"),
1638 }))
1639 );
1640 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedEq(20))));
1641 assert_eq!(
1642 iter.next(),
1643 Some(Ok(Attribute {
1644 key: QName(b"another"),
1645 value: Cow::Borrowed(b""),
1646 }))
1647 );
1648 assert_eq!(iter.next(), None);
1649 assert_eq!(iter.next(), None);
1650 }
1651 }
1652 }
1653
1654 #[test]
1655 fn mixed_quote() {
1656 let mut iter = Attributes::new(r#"tag a='a' b = "b" c='cc"cc' d="dd'dd""#, 3);
1657
1658 assert_eq!(
1659 iter.next(),
1660 Some(Ok(Attribute {
1661 key: QName(b"a"),
1662 value: Cow::Borrowed(b"a"),
1663 }))
1664 );
1665 assert_eq!(
1666 iter.next(),
1667 Some(Ok(Attribute {
1668 key: QName(b"b"),
1669 value: Cow::Borrowed(b"b"),
1670 }))
1671 );
1672 assert_eq!(
1673 iter.next(),
1674 Some(Ok(Attribute {
1675 key: QName(b"c"),
1676 value: Cow::Borrowed(br#"cc"cc"#),
1677 }))
1678 );
1679 assert_eq!(
1680 iter.next(),
1681 Some(Ok(Attribute {
1682 key: QName(b"d"),
1683 value: Cow::Borrowed(b"dd'dd"),
1684 }))
1685 );
1686 assert_eq!(iter.next(), None);
1687 assert_eq!(iter.next(), None);
1688 }
1689}
1690
1691/// Checks, how parsing of HTML-style attributes works. Each attribute can be
1692/// in three forms:
1693/// - XML-like: have a value, enclosed in single or double quotes
1694/// - have a value, do not enclosed in quotes
1695/// - without value, key only
1696#[cfg(test)]
1697mod html {
1698 use super::*;
1699 use pretty_assertions::assert_eq;
1700
1701 /// Checked attribute is the single attribute
1702 mod single {
1703 use super::*;
1704 use pretty_assertions::assert_eq;
1705
1706 /// Attribute have a value enclosed in single quotes
1707 #[test]
1708 fn single_quoted() {
1709 let mut iter = Attributes::html(r#"tag key='value'"#, 3);
1710
1711 assert_eq!(
1712 iter.next(),
1713 Some(Ok(Attribute {
1714 key: QName(b"key"),
1715 value: Cow::Borrowed(b"value"),
1716 }))
1717 );
1718 assert_eq!(iter.next(), None);
1719 assert_eq!(iter.next(), None);
1720 }
1721
1722 /// Attribute have a value enclosed in double quotes
1723 #[test]
1724 fn double_quoted() {
1725 let mut iter = Attributes::html(r#"tag key="value""#, 3);
1726
1727 assert_eq!(
1728 iter.next(),
1729 Some(Ok(Attribute {
1730 key: QName(b"key"),
1731 value: Cow::Borrowed(b"value"),
1732 }))
1733 );
1734 assert_eq!(iter.next(), None);
1735 assert_eq!(iter.next(), None);
1736 }
1737
1738 /// Attribute have a value, not enclosed in quotes
1739 #[test]
1740 fn unquoted() {
1741 let mut iter = Attributes::html(r#"tag key=value"#, 3);
1742
1743 assert_eq!(
1744 iter.next(),
1745 Some(Ok(Attribute {
1746 key: QName(b"key"),
1747 value: Cow::Borrowed(b"value"),
1748 }))
1749 );
1750 assert_eq!(iter.next(), None);
1751 assert_eq!(iter.next(), None);
1752 }
1753
1754 /// Only attribute key is present
1755 #[test]
1756 fn key_only() {
1757 let mut iter = Attributes::html(r#"tag key"#, 3);
1758
1759 assert_eq!(
1760 iter.next(),
1761 Some(Ok(Attribute {
1762 key: QName(b"key"),
1763 value: Cow::Borrowed(&[]),
1764 }))
1765 );
1766 assert_eq!(iter.next(), None);
1767 assert_eq!(iter.next(), None);
1768 }
1769
1770 /// Key is started with an invalid symbol (a single quote in this test).
1771 /// Because we do not check validity of keys and values during parsing,
1772 /// that invalid attribute will be returned
1773 #[test]
1774 fn key_start_invalid() {
1775 let mut iter = Attributes::html(r#"tag 'key'='value'"#, 3);
1776
1777 assert_eq!(
1778 iter.next(),
1779 Some(Ok(Attribute {
1780 key: QName(b"'key'"),
1781 value: Cow::Borrowed(b"value"),
1782 }))
1783 );
1784 assert_eq!(iter.next(), None);
1785 assert_eq!(iter.next(), None);
1786 }
1787
1788 /// Key contains an invalid symbol (an ampersand in this test).
1789 /// Because we do not check validity of keys and values during parsing,
1790 /// that invalid attribute will be returned
1791 #[test]
1792 fn key_contains_invalid() {
1793 let mut iter = Attributes::html(r#"tag key&jey='value'"#, 3);
1794
1795 assert_eq!(
1796 iter.next(),
1797 Some(Ok(Attribute {
1798 key: QName(b"key&jey"),
1799 value: Cow::Borrowed(b"value"),
1800 }))
1801 );
1802 assert_eq!(iter.next(), None);
1803 assert_eq!(iter.next(), None);
1804 }
1805
1806 /// Attribute value is missing after `=`
1807 #[test]
1808 fn missed_value() {
1809 let mut iter = Attributes::html(r#"tag key="#, 3);
1810 // 0 ^ = 8
1811
1812 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedValue(8))));
1813 assert_eq!(iter.next(), None);
1814 assert_eq!(iter.next(), None);
1815 }
1816 }
1817
1818 /// Checked attribute is the first attribute in the list of many attributes
1819 mod first {
1820 use super::*;
1821 use pretty_assertions::assert_eq;
1822
1823 /// Attribute have a value enclosed in single quotes
1824 #[test]
1825 fn single_quoted() {
1826 let mut iter = Attributes::html(r#"tag key='value' regular='attribute'"#, 3);
1827
1828 assert_eq!(
1829 iter.next(),
1830 Some(Ok(Attribute {
1831 key: QName(b"key"),
1832 value: Cow::Borrowed(b"value"),
1833 }))
1834 );
1835 assert_eq!(
1836 iter.next(),
1837 Some(Ok(Attribute {
1838 key: QName(b"regular"),
1839 value: Cow::Borrowed(b"attribute"),
1840 }))
1841 );
1842 assert_eq!(iter.next(), None);
1843 assert_eq!(iter.next(), None);
1844 }
1845
1846 /// Attribute have a value enclosed in double quotes
1847 #[test]
1848 fn double_quoted() {
1849 let mut iter = Attributes::html(r#"tag key="value" regular='attribute'"#, 3);
1850
1851 assert_eq!(
1852 iter.next(),
1853 Some(Ok(Attribute {
1854 key: QName(b"key"),
1855 value: Cow::Borrowed(b"value"),
1856 }))
1857 );
1858 assert_eq!(
1859 iter.next(),
1860 Some(Ok(Attribute {
1861 key: QName(b"regular"),
1862 value: Cow::Borrowed(b"attribute"),
1863 }))
1864 );
1865 assert_eq!(iter.next(), None);
1866 assert_eq!(iter.next(), None);
1867 }
1868
1869 /// Attribute have a value, not enclosed in quotes
1870 #[test]
1871 fn unquoted() {
1872 let mut iter = Attributes::html(r#"tag key=value regular='attribute'"#, 3);
1873
1874 assert_eq!(
1875 iter.next(),
1876 Some(Ok(Attribute {
1877 key: QName(b"key"),
1878 value: Cow::Borrowed(b"value"),
1879 }))
1880 );
1881 assert_eq!(
1882 iter.next(),
1883 Some(Ok(Attribute {
1884 key: QName(b"regular"),
1885 value: Cow::Borrowed(b"attribute"),
1886 }))
1887 );
1888 assert_eq!(iter.next(), None);
1889 assert_eq!(iter.next(), None);
1890 }
1891
1892 /// Only attribute key is present
1893 #[test]
1894 fn key_only() {
1895 let mut iter = Attributes::html(r#"tag key regular='attribute'"#, 3);
1896
1897 assert_eq!(
1898 iter.next(),
1899 Some(Ok(Attribute {
1900 key: QName(b"key"),
1901 value: Cow::Borrowed(&[]),
1902 }))
1903 );
1904 assert_eq!(
1905 iter.next(),
1906 Some(Ok(Attribute {
1907 key: QName(b"regular"),
1908 value: Cow::Borrowed(b"attribute"),
1909 }))
1910 );
1911 assert_eq!(iter.next(), None);
1912 assert_eq!(iter.next(), None);
1913 }
1914
1915 /// Key is started with an invalid symbol (a single quote in this test).
1916 /// Because we do not check validity of keys and values during parsing,
1917 /// that invalid attribute will be returned
1918 #[test]
1919 fn key_start_invalid() {
1920 let mut iter = Attributes::html(r#"tag 'key'='value' regular='attribute'"#, 3);
1921
1922 assert_eq!(
1923 iter.next(),
1924 Some(Ok(Attribute {
1925 key: QName(b"'key'"),
1926 value: Cow::Borrowed(b"value"),
1927 }))
1928 );
1929 assert_eq!(
1930 iter.next(),
1931 Some(Ok(Attribute {
1932 key: QName(b"regular"),
1933 value: Cow::Borrowed(b"attribute"),
1934 }))
1935 );
1936 assert_eq!(iter.next(), None);
1937 assert_eq!(iter.next(), None);
1938 }
1939
1940 /// Key contains an invalid symbol (an ampersand in this test).
1941 /// Because we do not check validity of keys and values during parsing,
1942 /// that invalid attribute will be returned
1943 #[test]
1944 fn key_contains_invalid() {
1945 let mut iter = Attributes::html(r#"tag key&jey='value' regular='attribute'"#, 3);
1946
1947 assert_eq!(
1948 iter.next(),
1949 Some(Ok(Attribute {
1950 key: QName(b"key&jey"),
1951 value: Cow::Borrowed(b"value"),
1952 }))
1953 );
1954 assert_eq!(
1955 iter.next(),
1956 Some(Ok(Attribute {
1957 key: QName(b"regular"),
1958 value: Cow::Borrowed(b"attribute"),
1959 }))
1960 );
1961 assert_eq!(iter.next(), None);
1962 assert_eq!(iter.next(), None);
1963 }
1964
1965 /// Attribute value is missing after `=`
1966 #[test]
1967 fn missed_value() {
1968 let mut iter = Attributes::html(r#"tag key= regular='attribute'"#, 3);
1969
1970 // Because we do not check validity of keys and values during parsing,
1971 // "regular='attribute'" is considered as unquoted attribute value
1972 assert_eq!(
1973 iter.next(),
1974 Some(Ok(Attribute {
1975 key: QName(b"key"),
1976 value: Cow::Borrowed(b"regular='attribute'"),
1977 }))
1978 );
1979 assert_eq!(iter.next(), None);
1980 assert_eq!(iter.next(), None);
1981
1982 ////////////////////////////////////////////////////////////////////
1983
1984 let mut iter = Attributes::html(r#"tag key= regular= 'attribute'"#, 3);
1985
1986 // Because we do not check validity of keys and values during parsing,
1987 // "regular=" is considered as unquoted attribute value
1988 assert_eq!(
1989 iter.next(),
1990 Some(Ok(Attribute {
1991 key: QName(b"key"),
1992 value: Cow::Borrowed(b"regular="),
1993 }))
1994 );
1995 // Because we do not check validity of keys and values during parsing,
1996 // "'attribute'" is considered as key-only attribute
1997 assert_eq!(
1998 iter.next(),
1999 Some(Ok(Attribute {
2000 key: QName(b"'attribute'"),
2001 value: Cow::Borrowed(&[]),
2002 }))
2003 );
2004 assert_eq!(iter.next(), None);
2005 assert_eq!(iter.next(), None);
2006
2007 ////////////////////////////////////////////////////////////////////
2008
2009 let mut iter = Attributes::html(r#"tag key= regular ='attribute'"#, 3);
2010
2011 // Because we do not check validity of keys and values during parsing,
2012 // "regular" is considered as unquoted attribute value
2013 assert_eq!(
2014 iter.next(),
2015 Some(Ok(Attribute {
2016 key: QName(b"key"),
2017 value: Cow::Borrowed(b"regular"),
2018 }))
2019 );
2020 // Because we do not check validity of keys and values during parsing,
2021 // "='attribute'" is considered as key-only attribute
2022 assert_eq!(
2023 iter.next(),
2024 Some(Ok(Attribute {
2025 key: QName(b"='attribute'"),
2026 value: Cow::Borrowed(&[]),
2027 }))
2028 );
2029 assert_eq!(iter.next(), None);
2030 assert_eq!(iter.next(), None);
2031
2032 ////////////////////////////////////////////////////////////////////
2033
2034 let mut iter = Attributes::html(r#"tag key= regular = 'attribute'"#, 3);
2035 // 0 ^ = 9 ^ = 19 ^ = 30
2036
2037 // Because we do not check validity of keys and values during parsing,
2038 // "regular" is considered as unquoted attribute value
2039 assert_eq!(
2040 iter.next(),
2041 Some(Ok(Attribute {
2042 key: QName(b"key"),
2043 value: Cow::Borrowed(b"regular"),
2044 }))
2045 );
2046 // Because we do not check validity of keys and values during parsing,
2047 // "=" is considered as key-only attribute
2048 assert_eq!(
2049 iter.next(),
2050 Some(Ok(Attribute {
2051 key: QName(b"="),
2052 value: Cow::Borrowed(&[]),
2053 }))
2054 );
2055 // Because we do not check validity of keys and values during parsing,
2056 // "'attribute'" is considered as key-only attribute
2057 assert_eq!(
2058 iter.next(),
2059 Some(Ok(Attribute {
2060 key: QName(b"'attribute'"),
2061 value: Cow::Borrowed(&[]),
2062 }))
2063 );
2064 assert_eq!(iter.next(), None);
2065 assert_eq!(iter.next(), None);
2066 }
2067 }
2068
2069 /// Copy of single, but with additional spaces in markup
2070 mod sparsed {
2071 use super::*;
2072 use pretty_assertions::assert_eq;
2073
2074 /// Attribute have a value enclosed in single quotes
2075 #[test]
2076 fn single_quoted() {
2077 let mut iter = Attributes::html(r#"tag key = 'value' "#, 3);
2078
2079 assert_eq!(
2080 iter.next(),
2081 Some(Ok(Attribute {
2082 key: QName(b"key"),
2083 value: Cow::Borrowed(b"value"),
2084 }))
2085 );
2086 assert_eq!(iter.next(), None);
2087 assert_eq!(iter.next(), None);
2088 }
2089
2090 /// Attribute have a value enclosed in double quotes
2091 #[test]
2092 fn double_quoted() {
2093 let mut iter = Attributes::html(r#"tag key = "value" "#, 3);
2094
2095 assert_eq!(
2096 iter.next(),
2097 Some(Ok(Attribute {
2098 key: QName(b"key"),
2099 value: Cow::Borrowed(b"value"),
2100 }))
2101 );
2102 assert_eq!(iter.next(), None);
2103 assert_eq!(iter.next(), None);
2104 }
2105
2106 /// Attribute have a value, not enclosed in quotes
2107 #[test]
2108 fn unquoted() {
2109 let mut iter = Attributes::html(r#"tag key = value "#, 3);
2110
2111 assert_eq!(
2112 iter.next(),
2113 Some(Ok(Attribute {
2114 key: QName(b"key"),
2115 value: Cow::Borrowed(b"value"),
2116 }))
2117 );
2118 assert_eq!(iter.next(), None);
2119 assert_eq!(iter.next(), None);
2120 }
2121
2122 /// Only attribute key is present
2123 #[test]
2124 fn key_only() {
2125 let mut iter = Attributes::html(r#"tag key "#, 3);
2126
2127 assert_eq!(
2128 iter.next(),
2129 Some(Ok(Attribute {
2130 key: QName(b"key"),
2131 value: Cow::Borrowed(&[]),
2132 }))
2133 );
2134 assert_eq!(iter.next(), None);
2135 assert_eq!(iter.next(), None);
2136 }
2137
2138 /// Key is started with an invalid symbol (a single quote in this test).
2139 /// Because we do not check validity of keys and values during parsing,
2140 /// that invalid attribute will be returned
2141 #[test]
2142 fn key_start_invalid() {
2143 let mut iter = Attributes::html(r#"tag 'key' = 'value' "#, 3);
2144
2145 assert_eq!(
2146 iter.next(),
2147 Some(Ok(Attribute {
2148 key: QName(b"'key'"),
2149 value: Cow::Borrowed(b"value"),
2150 }))
2151 );
2152 assert_eq!(iter.next(), None);
2153 assert_eq!(iter.next(), None);
2154 }
2155
2156 /// Key contains an invalid symbol (an ampersand in this test).
2157 /// Because we do not check validity of keys and values during parsing,
2158 /// that invalid attribute will be returned
2159 #[test]
2160 fn key_contains_invalid() {
2161 let mut iter = Attributes::html(r#"tag key&jey = 'value' "#, 3);
2162
2163 assert_eq!(
2164 iter.next(),
2165 Some(Ok(Attribute {
2166 key: QName(b"key&jey"),
2167 value: Cow::Borrowed(b"value"),
2168 }))
2169 );
2170 assert_eq!(iter.next(), None);
2171 assert_eq!(iter.next(), None);
2172 }
2173
2174 /// Attribute value is missing after `=`
2175 #[test]
2176 fn missed_value() {
2177 let mut iter = Attributes::html(r#"tag key = "#, 3);
2178 // 0 ^ = 10
2179
2180 assert_eq!(iter.next(), Some(Err(AttrError::ExpectedValue(10))));
2181 assert_eq!(iter.next(), None);
2182 assert_eq!(iter.next(), None);
2183 }
2184 }
2185
2186 /// Checks that duplicated attributes correctly reported and recovering is
2187 /// possible after that
2188 mod duplicated {
2189 use super::*;
2190
2191 mod with_check {
2192 use super::*;
2193 use pretty_assertions::assert_eq;
2194
2195 /// Attribute have a value enclosed in single quotes
2196 #[test]
2197 fn single_quoted() {
2198 let mut iter = Attributes::html(r#"tag key='value' key='dup' another=''"#, 3);
2199 // 0 ^ = 4 ^ = 16
2200
2201 assert_eq!(
2202 iter.next(),
2203 Some(Ok(Attribute {
2204 key: QName(b"key"),
2205 value: Cow::Borrowed(b"value"),
2206 }))
2207 );
2208 assert_eq!(iter.next(), Some(Err(AttrError::Duplicated(16, 4))));
2209 assert_eq!(
2210 iter.next(),
2211 Some(Ok(Attribute {
2212 key: QName(b"another"),
2213 value: Cow::Borrowed(b""),
2214 }))
2215 );
2216 assert_eq!(iter.next(), None);
2217 assert_eq!(iter.next(), None);
2218 }
2219
2220 /// Attribute have a value enclosed in double quotes
2221 #[test]
2222 fn double_quoted() {
2223 let mut iter = Attributes::html(r#"tag key='value' key="dup" another=''"#, 3);
2224 // 0 ^ = 4 ^ = 16
2225
2226 assert_eq!(
2227 iter.next(),
2228 Some(Ok(Attribute {
2229 key: QName(b"key"),
2230 value: Cow::Borrowed(b"value"),
2231 }))
2232 );
2233 assert_eq!(iter.next(), Some(Err(AttrError::Duplicated(16, 4))));
2234 assert_eq!(
2235 iter.next(),
2236 Some(Ok(Attribute {
2237 key: QName(b"another"),
2238 value: Cow::Borrowed(b""),
2239 }))
2240 );
2241 assert_eq!(iter.next(), None);
2242 assert_eq!(iter.next(), None);
2243 }
2244
2245 /// Attribute have a value, not enclosed in quotes
2246 #[test]
2247 fn unquoted() {
2248 let mut iter = Attributes::html(r#"tag key='value' key=dup another=''"#, 3);
2249 // 0 ^ = 4 ^ = 16
2250
2251 assert_eq!(
2252 iter.next(),
2253 Some(Ok(Attribute {
2254 key: QName(b"key"),
2255 value: Cow::Borrowed(b"value"),
2256 }))
2257 );
2258 assert_eq!(iter.next(), Some(Err(AttrError::Duplicated(16, 4))));
2259 assert_eq!(
2260 iter.next(),
2261 Some(Ok(Attribute {
2262 key: QName(b"another"),
2263 value: Cow::Borrowed(b""),
2264 }))
2265 );
2266 assert_eq!(iter.next(), None);
2267 assert_eq!(iter.next(), None);
2268 }
2269
2270 /// Only attribute key is present
2271 #[test]
2272 fn key_only() {
2273 let mut iter = Attributes::html(r#"tag key='value' key another=''"#, 3);
2274 // 0 ^ = 4 ^ = 16
2275
2276 assert_eq!(
2277 iter.next(),
2278 Some(Ok(Attribute {
2279 key: QName(b"key"),
2280 value: Cow::Borrowed(b"value"),
2281 }))
2282 );
2283 assert_eq!(iter.next(), Some(Err(AttrError::Duplicated(16, 4))));
2284 assert_eq!(
2285 iter.next(),
2286 Some(Ok(Attribute {
2287 key: QName(b"another"),
2288 value: Cow::Borrowed(b""),
2289 }))
2290 );
2291 assert_eq!(iter.next(), None);
2292 assert_eq!(iter.next(), None);
2293 }
2294 }
2295
2296 /// Check for duplicated names is disabled
2297 mod without_check {
2298 use super::*;
2299 use pretty_assertions::assert_eq;
2300
2301 /// Attribute have a value enclosed in single quotes
2302 #[test]
2303 fn single_quoted() {
2304 let mut iter = Attributes::html(r#"tag key='value' key='dup' another=''"#, 3);
2305 iter.with_checks(false);
2306
2307 assert_eq!(
2308 iter.next(),
2309 Some(Ok(Attribute {
2310 key: QName(b"key"),
2311 value: Cow::Borrowed(b"value"),
2312 }))
2313 );
2314 assert_eq!(
2315 iter.next(),
2316 Some(Ok(Attribute {
2317 key: QName(b"key"),
2318 value: Cow::Borrowed(b"dup"),
2319 }))
2320 );
2321 assert_eq!(
2322 iter.next(),
2323 Some(Ok(Attribute {
2324 key: QName(b"another"),
2325 value: Cow::Borrowed(b""),
2326 }))
2327 );
2328 assert_eq!(iter.next(), None);
2329 assert_eq!(iter.next(), None);
2330 }
2331
2332 /// Attribute have a value enclosed in double quotes
2333 #[test]
2334 fn double_quoted() {
2335 let mut iter = Attributes::html(r#"tag key='value' key="dup" another=''"#, 3);
2336 iter.with_checks(false);
2337
2338 assert_eq!(
2339 iter.next(),
2340 Some(Ok(Attribute {
2341 key: QName(b"key"),
2342 value: Cow::Borrowed(b"value"),
2343 }))
2344 );
2345 assert_eq!(
2346 iter.next(),
2347 Some(Ok(Attribute {
2348 key: QName(b"key"),
2349 value: Cow::Borrowed(b"dup"),
2350 }))
2351 );
2352 assert_eq!(
2353 iter.next(),
2354 Some(Ok(Attribute {
2355 key: QName(b"another"),
2356 value: Cow::Borrowed(b""),
2357 }))
2358 );
2359 assert_eq!(iter.next(), None);
2360 assert_eq!(iter.next(), None);
2361 }
2362
2363 /// Attribute have a value, not enclosed in quotes
2364 #[test]
2365 fn unquoted() {
2366 let mut iter = Attributes::html(r#"tag key='value' key=dup another=''"#, 3);
2367 iter.with_checks(false);
2368
2369 assert_eq!(
2370 iter.next(),
2371 Some(Ok(Attribute {
2372 key: QName(b"key"),
2373 value: Cow::Borrowed(b"value"),
2374 }))
2375 );
2376 assert_eq!(
2377 iter.next(),
2378 Some(Ok(Attribute {
2379 key: QName(b"key"),
2380 value: Cow::Borrowed(b"dup"),
2381 }))
2382 );
2383 assert_eq!(
2384 iter.next(),
2385 Some(Ok(Attribute {
2386 key: QName(b"another"),
2387 value: Cow::Borrowed(b""),
2388 }))
2389 );
2390 assert_eq!(iter.next(), None);
2391 assert_eq!(iter.next(), None);
2392 }
2393
2394 /// Only attribute key is present
2395 #[test]
2396 fn key_only() {
2397 let mut iter = Attributes::html(r#"tag key='value' key another=''"#, 3);
2398 iter.with_checks(false);
2399
2400 assert_eq!(
2401 iter.next(),
2402 Some(Ok(Attribute {
2403 key: QName(b"key"),
2404 value: Cow::Borrowed(b"value"),
2405 }))
2406 );
2407 assert_eq!(
2408 iter.next(),
2409 Some(Ok(Attribute {
2410 key: QName(b"key"),
2411 value: Cow::Borrowed(&[]),
2412 }))
2413 );
2414 assert_eq!(
2415 iter.next(),
2416 Some(Ok(Attribute {
2417 key: QName(b"another"),
2418 value: Cow::Borrowed(b""),
2419 }))
2420 );
2421 assert_eq!(iter.next(), None);
2422 assert_eq!(iter.next(), None);
2423 }
2424 }
2425 }
2426
2427 #[test]
2428 fn mixed_quote() {
2429 let mut iter = Attributes::html(r#"tag a='a' b = "b" c='cc"cc' d="dd'dd""#, 3);
2430
2431 assert_eq!(
2432 iter.next(),
2433 Some(Ok(Attribute {
2434 key: QName(b"a"),
2435 value: Cow::Borrowed(b"a"),
2436 }))
2437 );
2438 assert_eq!(
2439 iter.next(),
2440 Some(Ok(Attribute {
2441 key: QName(b"b"),
2442 value: Cow::Borrowed(b"b"),
2443 }))
2444 );
2445 assert_eq!(
2446 iter.next(),
2447 Some(Ok(Attribute {
2448 key: QName(b"c"),
2449 value: Cow::Borrowed(br#"cc"cc"#),
2450 }))
2451 );
2452 assert_eq!(
2453 iter.next(),
2454 Some(Ok(Attribute {
2455 key: QName(b"d"),
2456 value: Cow::Borrowed(b"dd'dd"),
2457 }))
2458 );
2459 assert_eq!(iter.next(), None);
2460 assert_eq!(iter.next(), None);
2461 }
2462}