quick_xml/events/mod.rs
1//! Defines zero-copy XML events used throughout this library.
2//!
3//! A XML event often represents part of a XML element.
4//! They occur both during reading and writing and are
5//! usually used with the stream-oriented API.
6//!
7//! For example, the XML element
8//! ```xml
9//! <name attr="value">Inner text</name>
10//! ```
11//! consists of the three events `Start`, `Text` and `End`.
12//! They can also represent other parts in an XML document like the
13//! XML declaration. Each Event usually contains further information,
14//! like the tag name, the attribute or the inner text.
15//!
16//! See [`Event`] for a list of all possible events.
17//!
18//! # Reading
19//! When reading a XML stream, the events are emitted by [`Reader::read_event`]
20//! and [`Reader::read_event_into`]. You must listen
21//! for the different types of events you are interested in.
22//!
23//! See [`Reader`] for further information.
24//!
25//! # Writing
26//! When writing the XML document, you must create the XML element
27//! by constructing the events it consists of and pass them to the writer
28//! sequentially.
29//!
30//! See [`Writer`] for further information.
31//!
32//! [`Reader::read_event`]: crate::reader::Reader::read_event
33//! [`Reader::read_event_into`]: crate::reader::Reader::read_event_into
34//! [`Reader`]: crate::reader::Reader
35//! [`Writer`]: crate::writer::Writer
36//! [`Event`]: crate::events::Event
37
38pub mod attributes;
39
40#[cfg(feature = "encoding")]
41use encoding_rs::Encoding;
42use std::borrow::Cow;
43use std::fmt::{self, Debug, Formatter};
44use std::iter::FusedIterator;
45use std::mem::replace;
46use std::ops::Deref;
47use std::str::from_utf8;
48
49use crate::encoding::{Decoder, EncodingError};
50use crate::errors::{Error, IllFormedError};
51use crate::escape::{
52 escape, minimal_escape, normalize_html_eols, normalize_xml_eols, parse_number, partial_escape,
53 EscapeError,
54};
55use crate::name::{LocalName, QName};
56use crate::utils::{name_len, trim_xml_end, trim_xml_start, write_cow_string, Bytes};
57use attributes::{AttrError, Attribute, Attributes};
58
59/// Opening tag data (`Event::Start`), with optional attributes: `<name attr="value">`.
60///
61/// The name can be accessed using the [`name`] or [`local_name`] methods.
62/// An iterator over the attributes is returned by the [`attributes`] method.
63///
64/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
65/// returns the content of this event between `<` and `>` or `/>`:
66///
67/// ```
68/// # use quick_xml::events::{BytesStart, Event};
69/// # use quick_xml::reader::Reader;
70/// # use pretty_assertions::assert_eq;
71/// // Remember, that \ at the end of string literal strips
72/// // all space characters to the first non-space character
73/// let mut reader = Reader::from_str("\
74/// <element a1 = 'val1' a2=\"val2\" />\
75/// <element a1 = 'val1' a2=\"val2\" >"
76/// );
77/// let content = "element a1 = 'val1' a2=\"val2\" ";
78/// let event = BytesStart::from_content(content, 7);
79///
80/// assert_eq!(reader.read_event().unwrap(), Event::Empty(event.borrow()));
81/// assert_eq!(reader.read_event().unwrap(), Event::Start(event.borrow()));
82/// // deref coercion of &BytesStart to &[u8]
83/// assert_eq!(&event as &[u8], content.as_bytes());
84/// // AsRef<[u8]> for &T + deref coercion
85/// assert_eq!(event.as_ref(), content.as_bytes());
86/// ```
87///
88/// [`name`]: Self::name
89/// [`local_name`]: Self::local_name
90/// [`attributes`]: Self::attributes
91#[derive(Clone, Eq, PartialEq)]
92pub struct BytesStart<'a> {
93 /// content of the element, before any utf8 conversion
94 pub(crate) buf: Cow<'a, [u8]>,
95 /// end of the element name, the name starts at that the start of `buf`
96 pub(crate) name_len: usize,
97 /// Encoding used for `buf`
98 decoder: Decoder,
99}
100
101impl<'a> BytesStart<'a> {
102 /// Internal constructor, used by `Reader`. Supplies data in reader's encoding
103 #[inline]
104 pub(crate) const fn wrap(content: &'a [u8], name_len: usize, decoder: Decoder) -> Self {
105 BytesStart {
106 buf: Cow::Borrowed(content),
107 name_len,
108 decoder,
109 }
110 }
111
112 /// Creates a new `BytesStart` from the given name.
113 ///
114 /// # Warning
115 ///
116 /// `name` must be a valid name.
117 #[inline]
118 pub fn new<C: Into<Cow<'a, str>>>(name: C) -> Self {
119 let buf = str_cow_to_bytes(name);
120 BytesStart {
121 name_len: buf.len(),
122 buf,
123 decoder: Decoder::utf8(),
124 }
125 }
126
127 /// Creates a new `BytesStart` from the given content (name + attributes).
128 ///
129 /// # Warning
130 ///
131 /// `&content[..name_len]` must be a valid name, and the remainder of `content`
132 /// must be correctly-formed attributes. Neither are checked, it is possible
133 /// to generate invalid XML if `content` or `name_len` are incorrect.
134 #[inline]
135 pub fn from_content<C: Into<Cow<'a, str>>>(content: C, name_len: usize) -> Self {
136 BytesStart {
137 buf: str_cow_to_bytes(content),
138 name_len,
139 decoder: Decoder::utf8(),
140 }
141 }
142
143 /// Converts the event into an owned event.
144 pub fn into_owned(self) -> BytesStart<'static> {
145 BytesStart {
146 buf: Cow::Owned(self.buf.into_owned()),
147 name_len: self.name_len,
148 decoder: self.decoder,
149 }
150 }
151
152 /// Converts the event into an owned event without taking ownership of Event
153 pub fn to_owned(&self) -> BytesStart<'static> {
154 BytesStart {
155 buf: Cow::Owned(self.buf.clone().into_owned()),
156 name_len: self.name_len,
157 decoder: self.decoder,
158 }
159 }
160
161 /// Converts the event into a borrowed event. Most useful when paired with [`to_end`].
162 ///
163 /// # Example
164 ///
165 /// ```
166 /// use quick_xml::events::{BytesStart, Event};
167 /// # use quick_xml::writer::Writer;
168 /// # use quick_xml::Error;
169 ///
170 /// struct SomeStruct<'a> {
171 /// attrs: BytesStart<'a>,
172 /// // ...
173 /// }
174 /// # impl<'a> SomeStruct<'a> {
175 /// # fn example(&self) -> Result<(), Error> {
176 /// # let mut writer = Writer::new(Vec::new());
177 ///
178 /// writer.write_event(Event::Start(self.attrs.borrow()))?;
179 /// // ...
180 /// writer.write_event(Event::End(self.attrs.to_end()))?;
181 /// # Ok(())
182 /// # }}
183 /// ```
184 ///
185 /// [`to_end`]: Self::to_end
186 pub fn borrow(&self) -> BytesStart {
187 BytesStart {
188 buf: Cow::Borrowed(&self.buf),
189 name_len: self.name_len,
190 decoder: self.decoder,
191 }
192 }
193
194 /// Creates new paired close tag
195 #[inline]
196 pub fn to_end(&self) -> BytesEnd {
197 BytesEnd::from(self.name())
198 }
199
200 /// Get the decoder, used to decode bytes, read by the reader which produces
201 /// this event, to the strings.
202 ///
203 /// When event was created manually, encoding is UTF-8.
204 ///
205 /// If [`encoding`] feature is enabled and no encoding is specified in declaration,
206 /// defaults to UTF-8.
207 ///
208 /// [`encoding`]: ../index.html#encoding
209 #[inline]
210 pub const fn decoder(&self) -> Decoder {
211 self.decoder
212 }
213
214 /// Gets the undecoded raw tag name, as present in the input stream.
215 #[inline]
216 pub fn name(&self) -> QName {
217 QName(&self.buf[..self.name_len])
218 }
219
220 /// Gets the undecoded raw local tag name (excluding namespace) as present
221 /// in the input stream.
222 ///
223 /// All content up to and including the first `:` character is removed from the tag name.
224 #[inline]
225 pub fn local_name(&self) -> LocalName {
226 self.name().into()
227 }
228
229 /// Edit the name of the BytesStart in-place
230 ///
231 /// # Warning
232 ///
233 /// `name` must be a valid name.
234 pub fn set_name(&mut self, name: &[u8]) -> &mut BytesStart<'a> {
235 let bytes = self.buf.to_mut();
236 bytes.splice(..self.name_len, name.iter().cloned());
237 self.name_len = name.len();
238 self
239 }
240}
241
242/// Attribute-related methods
243impl<'a> BytesStart<'a> {
244 /// Consumes `self` and yield a new `BytesStart` with additional attributes from an iterator.
245 ///
246 /// The yielded items must be convertible to [`Attribute`] using `Into`.
247 pub fn with_attributes<'b, I>(mut self, attributes: I) -> Self
248 where
249 I: IntoIterator,
250 I::Item: Into<Attribute<'b>>,
251 {
252 self.extend_attributes(attributes);
253 self
254 }
255
256 /// Add additional attributes to this tag using an iterator.
257 ///
258 /// The yielded items must be convertible to [`Attribute`] using `Into`.
259 pub fn extend_attributes<'b, I>(&mut self, attributes: I) -> &mut BytesStart<'a>
260 where
261 I: IntoIterator,
262 I::Item: Into<Attribute<'b>>,
263 {
264 for attr in attributes {
265 self.push_attribute(attr);
266 }
267 self
268 }
269
270 /// Adds an attribute to this element.
271 pub fn push_attribute<'b, A>(&mut self, attr: A)
272 where
273 A: Into<Attribute<'b>>,
274 {
275 self.buf.to_mut().push(b' ');
276 self.push_attr(attr.into());
277 }
278
279 /// Remove all attributes from the ByteStart
280 pub fn clear_attributes(&mut self) -> &mut BytesStart<'a> {
281 self.buf.to_mut().truncate(self.name_len);
282 self
283 }
284
285 /// Returns an iterator over the attributes of this tag.
286 pub fn attributes(&self) -> Attributes {
287 Attributes::wrap(&self.buf, self.name_len, false, self.decoder)
288 }
289
290 /// Returns an iterator over the HTML-like attributes of this tag (no mandatory quotes or `=`).
291 pub fn html_attributes(&self) -> Attributes {
292 Attributes::wrap(&self.buf, self.name_len, true, self.decoder)
293 }
294
295 /// Gets the undecoded raw string with the attributes of this tag as a `&[u8]`,
296 /// including the whitespace after the tag name if there is any.
297 #[inline]
298 pub fn attributes_raw(&self) -> &[u8] {
299 &self.buf[self.name_len..]
300 }
301
302 /// Try to get an attribute
303 pub fn try_get_attribute<N: AsRef<[u8]> + Sized>(
304 &'a self,
305 attr_name: N,
306 ) -> Result<Option<Attribute<'a>>, AttrError> {
307 for a in self.attributes().with_checks(false) {
308 let a = a?;
309 if a.key.as_ref() == attr_name.as_ref() {
310 return Ok(Some(a));
311 }
312 }
313 Ok(None)
314 }
315
316 /// Adds an attribute to this element.
317 pub(crate) fn push_attr<'b>(&mut self, attr: Attribute<'b>) {
318 let bytes = self.buf.to_mut();
319 bytes.extend_from_slice(attr.key.as_ref());
320 bytes.extend_from_slice(b"=\"");
321 // FIXME: need to escape attribute content
322 bytes.extend_from_slice(attr.value.as_ref());
323 bytes.push(b'"');
324 }
325
326 /// Adds new line in existing element
327 pub(crate) fn push_newline(&mut self) {
328 self.buf.to_mut().push(b'\n');
329 }
330
331 /// Adds indentation bytes in existing element
332 pub(crate) fn push_indent(&mut self, indent: &[u8]) {
333 self.buf.to_mut().extend_from_slice(indent);
334 }
335}
336
337impl<'a> Debug for BytesStart<'a> {
338 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
339 write!(f, "BytesStart {{ buf: ")?;
340 write_cow_string(f, &self.buf)?;
341 write!(f, ", name_len: {} }}", self.name_len)
342 }
343}
344
345impl<'a> Deref for BytesStart<'a> {
346 type Target = [u8];
347
348 fn deref(&self) -> &[u8] {
349 &self.buf
350 }
351}
352
353#[cfg(feature = "arbitrary")]
354impl<'a> arbitrary::Arbitrary<'a> for BytesStart<'a> {
355 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
356 let s = <&str>::arbitrary(u)?;
357 if s.is_empty() || !s.chars().all(char::is_alphanumeric) {
358 return Err(arbitrary::Error::IncorrectFormat);
359 }
360 let mut result = Self::new(s);
361 result.extend_attributes(Vec::<(&str, &str)>::arbitrary(u)?.into_iter());
362 Ok(result)
363 }
364
365 fn size_hint(depth: usize) -> (usize, Option<usize>) {
366 return <&str as arbitrary::Arbitrary>::size_hint(depth);
367 }
368}
369
370////////////////////////////////////////////////////////////////////////////////////////////////////
371
372/// Closing tag data (`Event::End`): `</name>`.
373///
374/// The name can be accessed using the [`name`] or [`local_name`] methods.
375///
376/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
377/// returns the content of this event between `</` and `>`.
378///
379/// Note, that inner text will not contain `>` character inside:
380///
381/// ```
382/// # use quick_xml::events::{BytesEnd, Event};
383/// # use quick_xml::reader::Reader;
384/// # use pretty_assertions::assert_eq;
385/// let mut reader = Reader::from_str(r#"<element></element a1 = 'val1' a2="val2" >"#);
386/// // Note, that this entire string considered as a .name()
387/// let content = "element a1 = 'val1' a2=\"val2\" ";
388/// let event = BytesEnd::new(content);
389///
390/// reader.config_mut().trim_markup_names_in_closing_tags = false;
391/// reader.config_mut().check_end_names = false;
392/// reader.read_event().unwrap(); // Skip `<element>`
393///
394/// assert_eq!(reader.read_event().unwrap(), Event::End(event.borrow()));
395/// assert_eq!(event.name().as_ref(), content.as_bytes());
396/// // deref coercion of &BytesEnd to &[u8]
397/// assert_eq!(&event as &[u8], content.as_bytes());
398/// // AsRef<[u8]> for &T + deref coercion
399/// assert_eq!(event.as_ref(), content.as_bytes());
400/// ```
401///
402/// [`name`]: Self::name
403/// [`local_name`]: Self::local_name
404#[derive(Clone, Eq, PartialEq)]
405pub struct BytesEnd<'a> {
406 name: Cow<'a, [u8]>,
407}
408
409impl<'a> BytesEnd<'a> {
410 /// Internal constructor, used by `Reader`. Supplies data in reader's encoding
411 #[inline]
412 pub(crate) const fn wrap(name: Cow<'a, [u8]>) -> Self {
413 BytesEnd { name }
414 }
415
416 /// Creates a new `BytesEnd` borrowing a slice.
417 ///
418 /// # Warning
419 ///
420 /// `name` must be a valid name.
421 #[inline]
422 pub fn new<C: Into<Cow<'a, str>>>(name: C) -> Self {
423 Self::wrap(str_cow_to_bytes(name))
424 }
425
426 /// Converts the event into an owned event.
427 pub fn into_owned(self) -> BytesEnd<'static> {
428 BytesEnd {
429 name: Cow::Owned(self.name.into_owned()),
430 }
431 }
432
433 /// Converts the event into a borrowed event.
434 #[inline]
435 pub fn borrow(&self) -> BytesEnd {
436 BytesEnd {
437 name: Cow::Borrowed(&self.name),
438 }
439 }
440
441 /// Gets the undecoded raw tag name, as present in the input stream.
442 #[inline]
443 pub fn name(&self) -> QName {
444 QName(&self.name)
445 }
446
447 /// Gets the undecoded raw local tag name (excluding namespace) as present
448 /// in the input stream.
449 ///
450 /// All content up to and including the first `:` character is removed from the tag name.
451 #[inline]
452 pub fn local_name(&self) -> LocalName {
453 self.name().into()
454 }
455}
456
457impl<'a> Debug for BytesEnd<'a> {
458 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
459 write!(f, "BytesEnd {{ name: ")?;
460 write_cow_string(f, &self.name)?;
461 write!(f, " }}")
462 }
463}
464
465impl<'a> Deref for BytesEnd<'a> {
466 type Target = [u8];
467
468 fn deref(&self) -> &[u8] {
469 &self.name
470 }
471}
472
473impl<'a> From<QName<'a>> for BytesEnd<'a> {
474 #[inline]
475 fn from(name: QName<'a>) -> Self {
476 Self::wrap(name.into_inner().into())
477 }
478}
479
480#[cfg(feature = "arbitrary")]
481impl<'a> arbitrary::Arbitrary<'a> for BytesEnd<'a> {
482 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
483 Ok(Self::new(<&str>::arbitrary(u)?))
484 }
485 fn size_hint(depth: usize) -> (usize, Option<usize>) {
486 return <&str as arbitrary::Arbitrary>::size_hint(depth);
487 }
488}
489
490////////////////////////////////////////////////////////////////////////////////////////////////////
491
492/// Data from various events (most notably, `Event::Text`) that stored in XML
493/// in escaped form. Internally data is stored in escaped form.
494///
495/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
496/// returns the content of this event. In case of comment this is everything
497/// between `<!--` and `-->` and the text of comment will not contain `-->` inside.
498/// In case of DTD this is everything between `<!DOCTYPE` + spaces and closing `>`
499/// (i.e. in case of DTD the first character is never space):
500///
501/// ```
502/// # use quick_xml::events::{BytesText, Event};
503/// # use quick_xml::reader::Reader;
504/// # use pretty_assertions::assert_eq;
505/// // Remember, that \ at the end of string literal strips
506/// // all space characters to the first non-space character
507/// let mut reader = Reader::from_str("\
508/// <!DOCTYPE comment or text >\
509/// comment or text \
510/// <!--comment or text -->"
511/// );
512/// let content = "comment or text ";
513/// let event = BytesText::new(content);
514///
515/// assert_eq!(reader.read_event().unwrap(), Event::DocType(event.borrow()));
516/// assert_eq!(reader.read_event().unwrap(), Event::Text(event.borrow()));
517/// assert_eq!(reader.read_event().unwrap(), Event::Comment(event.borrow()));
518/// // deref coercion of &BytesText to &[u8]
519/// assert_eq!(&event as &[u8], content.as_bytes());
520/// // AsRef<[u8]> for &T + deref coercion
521/// assert_eq!(event.as_ref(), content.as_bytes());
522/// ```
523#[derive(Clone, Eq, PartialEq)]
524pub struct BytesText<'a> {
525 /// Escaped then encoded content of the event. Content is encoded in the XML
526 /// document encoding when event comes from the reader and should be in the
527 /// document encoding when event passed to the writer
528 content: Cow<'a, [u8]>,
529 /// Encoding in which the `content` is stored inside the event
530 decoder: Decoder,
531}
532
533impl<'a> BytesText<'a> {
534 /// Creates a new `BytesText` from an escaped byte sequence in the specified encoding.
535 #[inline]
536 pub(crate) fn wrap<C: Into<Cow<'a, [u8]>>>(content: C, decoder: Decoder) -> Self {
537 Self {
538 content: content.into(),
539 decoder,
540 }
541 }
542
543 /// Creates a new `BytesText` from an escaped string.
544 #[inline]
545 pub fn from_escaped<C: Into<Cow<'a, str>>>(content: C) -> Self {
546 Self::wrap(str_cow_to_bytes(content), Decoder::utf8())
547 }
548
549 /// Creates a new `BytesText` from a string. The string is expected not to
550 /// be escaped.
551 #[inline]
552 pub fn new(content: &'a str) -> Self {
553 Self::from_escaped(escape(content))
554 }
555
556 /// Ensures that all data is owned to extend the object's lifetime if
557 /// necessary.
558 #[inline]
559 pub fn into_owned(self) -> BytesText<'static> {
560 BytesText {
561 content: self.content.into_owned().into(),
562 decoder: self.decoder,
563 }
564 }
565
566 /// Extracts the inner `Cow` from the `BytesText` event container.
567 #[inline]
568 pub fn into_inner(self) -> Cow<'a, [u8]> {
569 self.content
570 }
571
572 /// Converts the event into a borrowed event.
573 #[inline]
574 pub fn borrow(&self) -> BytesText {
575 BytesText {
576 content: Cow::Borrowed(&self.content),
577 decoder: self.decoder,
578 }
579 }
580
581 /// Decodes the content of the event.
582 ///
583 /// This will allocate if the value contains any escape sequences or in
584 /// non-UTF-8 encoding.
585 ///
586 /// This method does not normalizes end-of-line characters as required by [specification].
587 /// Usually you need [`xml_content()`](Self::xml_content) instead of this method.
588 ///
589 /// [specification]: https://www.w3.org/TR/xml11/#sec-line-ends
590 pub fn decode(&self) -> Result<Cow<'a, str>, EncodingError> {
591 self.decoder.decode_cow(&self.content)
592 }
593
594 /// Decodes the content of the XML event.
595 ///
596 /// When this event produced by the reader, it uses the encoding information
597 /// associated with that reader to interpret the raw bytes contained within
598 /// this text event.
599 ///
600 /// This will allocate if the value contains any escape sequences or in non-UTF-8
601 /// encoding, or EOL normalization is required.
602 ///
603 /// Note, that this method should be used only if event represents XML content,
604 /// because rules for normalizing EOLs for [XML] and [HTML] differs.
605 ///
606 /// To get HTML content use [`html_content()`](Self::html_content).
607 ///
608 /// [XML]: https://www.w3.org/TR/xml11/#sec-line-ends
609 /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines
610 pub fn xml_content(&self) -> Result<Cow<'a, str>, EncodingError> {
611 self.decoder.content(&self.content, normalize_xml_eols)
612 }
613
614 /// Decodes the content of the HTML event.
615 ///
616 /// When this event produced by the reader, it uses the encoding information
617 /// associated with that reader to interpret the raw bytes contained within
618 /// this text event.
619 ///
620 /// This will allocate if the value contains any escape sequences or in non-UTF-8
621 /// encoding, or EOL normalization is required.
622 ///
623 /// Note, that this method should be used only if event represents HTML content,
624 /// because rules for normalizing EOLs for [XML] and [HTML] differs.
625 ///
626 /// To get XML content use [`xml_content()`](Self::xml_content).
627 ///
628 /// [XML]: https://www.w3.org/TR/xml11/#sec-line-ends
629 /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines
630 pub fn html_content(&self) -> Result<Cow<'a, str>, EncodingError> {
631 self.decoder.content(&self.content, normalize_html_eols)
632 }
633
634 /// Removes leading XML whitespace bytes from text content.
635 ///
636 /// Returns `true` if content is empty after that
637 pub fn inplace_trim_start(&mut self) -> bool {
638 self.content = trim_cow(
639 replace(&mut self.content, Cow::Borrowed(b"")),
640 trim_xml_start,
641 );
642 self.content.is_empty()
643 }
644
645 /// Removes trailing XML whitespace bytes from text content.
646 ///
647 /// Returns `true` if content is empty after that
648 pub fn inplace_trim_end(&mut self) -> bool {
649 self.content = trim_cow(replace(&mut self.content, Cow::Borrowed(b"")), trim_xml_end);
650 self.content.is_empty()
651 }
652}
653
654impl<'a> Debug for BytesText<'a> {
655 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
656 write!(f, "BytesText {{ content: ")?;
657 write_cow_string(f, &self.content)?;
658 write!(f, " }}")
659 }
660}
661
662impl<'a> Deref for BytesText<'a> {
663 type Target = [u8];
664
665 fn deref(&self) -> &[u8] {
666 &self.content
667 }
668}
669
670#[cfg(feature = "arbitrary")]
671impl<'a> arbitrary::Arbitrary<'a> for BytesText<'a> {
672 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
673 let s = <&str>::arbitrary(u)?;
674 if !s.chars().all(char::is_alphanumeric) {
675 return Err(arbitrary::Error::IncorrectFormat);
676 }
677 Ok(Self::new(s))
678 }
679
680 fn size_hint(depth: usize) -> (usize, Option<usize>) {
681 return <&str as arbitrary::Arbitrary>::size_hint(depth);
682 }
683}
684
685////////////////////////////////////////////////////////////////////////////////////////////////////
686
687/// CDATA content contains unescaped data from the reader. If you want to write them as a text,
688/// [convert](Self::escape) it to [`BytesText`].
689///
690/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
691/// returns the content of this event between `<![CDATA[` and `]]>`.
692///
693/// Note, that inner text will not contain `]]>` sequence inside:
694///
695/// ```
696/// # use quick_xml::events::{BytesCData, Event};
697/// # use quick_xml::reader::Reader;
698/// # use pretty_assertions::assert_eq;
699/// let mut reader = Reader::from_str("<![CDATA[ CDATA section ]]>");
700/// let content = " CDATA section ";
701/// let event = BytesCData::new(content);
702///
703/// assert_eq!(reader.read_event().unwrap(), Event::CData(event.borrow()));
704/// // deref coercion of &BytesCData to &[u8]
705/// assert_eq!(&event as &[u8], content.as_bytes());
706/// // AsRef<[u8]> for &T + deref coercion
707/// assert_eq!(event.as_ref(), content.as_bytes());
708/// ```
709#[derive(Clone, Eq, PartialEq)]
710pub struct BytesCData<'a> {
711 content: Cow<'a, [u8]>,
712 /// Encoding in which the `content` is stored inside the event
713 decoder: Decoder,
714}
715
716impl<'a> BytesCData<'a> {
717 /// Creates a new `BytesCData` from a byte sequence in the specified encoding.
718 #[inline]
719 pub(crate) fn wrap<C: Into<Cow<'a, [u8]>>>(content: C, decoder: Decoder) -> Self {
720 Self {
721 content: content.into(),
722 decoder,
723 }
724 }
725
726 /// Creates a new `BytesCData` from a string.
727 ///
728 /// # Warning
729 ///
730 /// `content` must not contain the `]]>` sequence. You can use
731 /// [`BytesCData::escaped`] to escape the content instead.
732 #[inline]
733 pub fn new<C: Into<Cow<'a, str>>>(content: C) -> Self {
734 Self::wrap(str_cow_to_bytes(content), Decoder::utf8())
735 }
736
737 /// Creates an iterator of `BytesCData` from a string.
738 ///
739 /// If a string contains `]]>`, it needs to be split into multiple `CDATA`
740 /// sections, splitting the `]]` and `>` characters, because the CDATA closing
741 /// sequence cannot be escaped. This iterator yields a `BytesCData` instance
742 /// for each of those sections.
743 ///
744 /// # Examples
745 ///
746 /// ```
747 /// # use quick_xml::events::BytesCData;
748 /// # use pretty_assertions::assert_eq;
749 /// let content = "";
750 /// let cdata = BytesCData::escaped(content).collect::<Vec<_>>();
751 /// assert_eq!(cdata, &[BytesCData::new("")]);
752 ///
753 /// let content = "Certain tokens like ]]> can be difficult and <invalid>";
754 /// let cdata = BytesCData::escaped(content).collect::<Vec<_>>();
755 /// assert_eq!(cdata, &[
756 /// BytesCData::new("Certain tokens like ]]"),
757 /// BytesCData::new("> can be difficult and <invalid>"),
758 /// ]);
759 ///
760 /// let content = "foo]]>bar]]>baz]]>quux";
761 /// let cdata = BytesCData::escaped(content).collect::<Vec<_>>();
762 /// assert_eq!(cdata, &[
763 /// BytesCData::new("foo]]"),
764 /// BytesCData::new(">bar]]"),
765 /// BytesCData::new(">baz]]"),
766 /// BytesCData::new(">quux"),
767 /// ]);
768 /// ```
769 #[inline]
770 pub fn escaped(content: &'a str) -> CDataIterator<'a> {
771 CDataIterator {
772 unprocessed: content.as_bytes(),
773 finished: false,
774 }
775 }
776
777 /// Ensures that all data is owned to extend the object's lifetime if
778 /// necessary.
779 #[inline]
780 pub fn into_owned(self) -> BytesCData<'static> {
781 BytesCData {
782 content: self.content.into_owned().into(),
783 decoder: self.decoder,
784 }
785 }
786
787 /// Extracts the inner `Cow` from the `BytesCData` event container.
788 #[inline]
789 pub fn into_inner(self) -> Cow<'a, [u8]> {
790 self.content
791 }
792
793 /// Converts the event into a borrowed event.
794 #[inline]
795 pub fn borrow(&self) -> BytesCData {
796 BytesCData {
797 content: Cow::Borrowed(&self.content),
798 decoder: self.decoder,
799 }
800 }
801
802 /// Converts this CDATA content to an escaped version, that can be written
803 /// as an usual text in XML.
804 ///
805 /// This function performs following replacements:
806 ///
807 /// | Character | Replacement
808 /// |-----------|------------
809 /// | `<` | `<`
810 /// | `>` | `>`
811 /// | `&` | `&`
812 /// | `'` | `'`
813 /// | `"` | `"`
814 pub fn escape(self) -> Result<BytesText<'a>, EncodingError> {
815 let decoded = self.decode()?;
816 Ok(BytesText::wrap(
817 match escape(decoded) {
818 Cow::Borrowed(escaped) => Cow::Borrowed(escaped.as_bytes()),
819 Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
820 },
821 Decoder::utf8(),
822 ))
823 }
824
825 /// Converts this CDATA content to an escaped version, that can be written
826 /// as an usual text in XML.
827 ///
828 /// In XML text content, it is allowed (though not recommended) to leave
829 /// the quote special characters `"` and `'` unescaped.
830 ///
831 /// This function performs following replacements:
832 ///
833 /// | Character | Replacement
834 /// |-----------|------------
835 /// | `<` | `<`
836 /// | `>` | `>`
837 /// | `&` | `&`
838 pub fn partial_escape(self) -> Result<BytesText<'a>, EncodingError> {
839 let decoded = self.decode()?;
840 Ok(BytesText::wrap(
841 match partial_escape(decoded) {
842 Cow::Borrowed(escaped) => Cow::Borrowed(escaped.as_bytes()),
843 Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
844 },
845 Decoder::utf8(),
846 ))
847 }
848
849 /// Converts this CDATA content to an escaped version, that can be written
850 /// as an usual text in XML. This method escapes only those characters that
851 /// must be escaped according to the [specification].
852 ///
853 /// This function performs following replacements:
854 ///
855 /// | Character | Replacement
856 /// |-----------|------------
857 /// | `<` | `<`
858 /// | `&` | `&`
859 ///
860 /// [specification]: https://www.w3.org/TR/xml11/#syntax
861 pub fn minimal_escape(self) -> Result<BytesText<'a>, EncodingError> {
862 let decoded = self.decode()?;
863 Ok(BytesText::wrap(
864 match minimal_escape(decoded) {
865 Cow::Borrowed(escaped) => Cow::Borrowed(escaped.as_bytes()),
866 Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
867 },
868 Decoder::utf8(),
869 ))
870 }
871
872 /// Decodes the raw input byte content of the CDATA section into a string,
873 /// without performing XML entity escaping.
874 ///
875 /// When this event produced by the XML reader, it uses the encoding information
876 /// associated with that reader to interpret the raw bytes contained within this
877 /// CDATA event.
878 ///
879 /// This method does not normalizes end-of-line characters as required by [specification].
880 /// Usually you need [`xml_content()`](Self::xml_content) instead of this method.
881 ///
882 /// [specification]: https://www.w3.org/TR/xml11/#sec-line-ends
883 pub fn decode(&self) -> Result<Cow<'a, str>, EncodingError> {
884 self.decoder.decode_cow(&self.content)
885 }
886
887 /// Decodes the raw input byte content of the CDATA section of the XML event
888 /// into a string.
889 ///
890 /// When this event produced by the reader, it uses the encoding information
891 /// associated with that reader to interpret the raw bytes contained within
892 /// this CDATA event.
893 ///
894 /// This will allocate if the value in non-UTF-8 encoding, or EOL normalization
895 /// is required.
896 ///
897 /// Note, that this method should be used only if event represents XML content,
898 /// because rules for normalizing EOLs for [XML] and [HTML] differs.
899 ///
900 /// To get HTML content use [`html_content()`](Self::html_content).
901 ///
902 /// [XML]: https://www.w3.org/TR/xml11/#sec-line-ends
903 /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines
904 pub fn xml_content(&self) -> Result<Cow<'a, str>, EncodingError> {
905 self.decoder.content(&self.content, normalize_xml_eols)
906 }
907
908 /// Decodes the raw input byte content of the CDATA section of the HTML event
909 /// into a string.
910 ///
911 /// When this event produced by the reader, it uses the encoding information
912 /// associated with that reader to interpret the raw bytes contained within
913 /// this CDATA event.
914 ///
915 /// This will allocate if the value in non-UTF-8 encoding, or EOL normalization
916 /// is required.
917 ///
918 /// Note, that this method should be used only if event represents HTML content,
919 /// because rules for normalizing EOLs for [XML] and [HTML] differs.
920 ///
921 /// To get XML content use [`xml_content()`](Self::xml_content).
922 ///
923 /// [XML]: https://www.w3.org/TR/xml11/#sec-line-ends
924 /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines
925 pub fn html_content(&self) -> Result<Cow<'a, str>, EncodingError> {
926 self.decoder.content(&self.content, normalize_html_eols)
927 }
928}
929
930impl<'a> Debug for BytesCData<'a> {
931 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
932 write!(f, "BytesCData {{ content: ")?;
933 write_cow_string(f, &self.content)?;
934 write!(f, " }}")
935 }
936}
937
938impl<'a> Deref for BytesCData<'a> {
939 type Target = [u8];
940
941 fn deref(&self) -> &[u8] {
942 &self.content
943 }
944}
945
946#[cfg(feature = "arbitrary")]
947impl<'a> arbitrary::Arbitrary<'a> for BytesCData<'a> {
948 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
949 Ok(Self::new(<&str>::arbitrary(u)?))
950 }
951 fn size_hint(depth: usize) -> (usize, Option<usize>) {
952 return <&str as arbitrary::Arbitrary>::size_hint(depth);
953 }
954}
955
956/// Iterator over `CDATA` sections in a string.
957///
958/// This iterator is created by the [`BytesCData::escaped`] method.
959#[derive(Clone)]
960pub struct CDataIterator<'a> {
961 /// The unprocessed data which should be emitted as `BytesCData` events.
962 /// At each iteration, the processed data is cut from this slice.
963 unprocessed: &'a [u8],
964 finished: bool,
965}
966
967impl<'a> Debug for CDataIterator<'a> {
968 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
969 f.debug_struct("CDataIterator")
970 .field("unprocessed", &Bytes(self.unprocessed))
971 .field("finished", &self.finished)
972 .finish()
973 }
974}
975
976impl<'a> Iterator for CDataIterator<'a> {
977 type Item = BytesCData<'a>;
978
979 fn next(&mut self) -> Option<BytesCData<'a>> {
980 if self.finished {
981 return None;
982 }
983
984 for gt in memchr::memchr_iter(b'>', self.unprocessed) {
985 if self.unprocessed[..gt].ends_with(b"]]") {
986 let (slice, rest) = self.unprocessed.split_at(gt);
987 self.unprocessed = rest;
988 return Some(BytesCData::wrap(slice, Decoder::utf8()));
989 }
990 }
991
992 self.finished = true;
993 Some(BytesCData::wrap(self.unprocessed, Decoder::utf8()))
994 }
995}
996
997impl FusedIterator for CDataIterator<'_> {}
998
999////////////////////////////////////////////////////////////////////////////////////////////////////
1000
1001/// [Processing instructions][PI] (PIs) allow documents to contain instructions for applications.
1002///
1003/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
1004/// returns the content of this event between `<?` and `?>`.
1005///
1006/// Note, that inner text will not contain `?>` sequence inside:
1007///
1008/// ```
1009/// # use quick_xml::events::{BytesPI, Event};
1010/// # use quick_xml::reader::Reader;
1011/// # use pretty_assertions::assert_eq;
1012/// let mut reader = Reader::from_str("<?processing instruction >:-<~ ?>");
1013/// let content = "processing instruction >:-<~ ";
1014/// let event = BytesPI::new(content);
1015///
1016/// assert_eq!(reader.read_event().unwrap(), Event::PI(event.borrow()));
1017/// // deref coercion of &BytesPI to &[u8]
1018/// assert_eq!(&event as &[u8], content.as_bytes());
1019/// // AsRef<[u8]> for &T + deref coercion
1020/// assert_eq!(event.as_ref(), content.as_bytes());
1021/// ```
1022///
1023/// [PI]: https://www.w3.org/TR/xml11/#sec-pi
1024#[derive(Clone, Eq, PartialEq)]
1025pub struct BytesPI<'a> {
1026 content: BytesStart<'a>,
1027}
1028
1029impl<'a> BytesPI<'a> {
1030 /// Creates a new `BytesPI` from a byte sequence in the specified encoding.
1031 #[inline]
1032 pub(crate) const fn wrap(content: &'a [u8], target_len: usize, decoder: Decoder) -> Self {
1033 Self {
1034 content: BytesStart::wrap(content, target_len, decoder),
1035 }
1036 }
1037
1038 /// Creates a new `BytesPI` from a string.
1039 ///
1040 /// # Warning
1041 ///
1042 /// `content` must not contain the `?>` sequence.
1043 #[inline]
1044 pub fn new<C: Into<Cow<'a, str>>>(content: C) -> Self {
1045 let buf = str_cow_to_bytes(content);
1046 let name_len = name_len(&buf);
1047 Self {
1048 content: BytesStart {
1049 buf,
1050 name_len,
1051 decoder: Decoder::utf8(),
1052 },
1053 }
1054 }
1055
1056 /// Ensures that all data is owned to extend the object's lifetime if
1057 /// necessary.
1058 #[inline]
1059 pub fn into_owned(self) -> BytesPI<'static> {
1060 BytesPI {
1061 content: self.content.into_owned().into(),
1062 }
1063 }
1064
1065 /// Extracts the inner `Cow` from the `BytesPI` event container.
1066 #[inline]
1067 pub fn into_inner(self) -> Cow<'a, [u8]> {
1068 self.content.buf
1069 }
1070
1071 /// Converts the event into a borrowed event.
1072 #[inline]
1073 pub fn borrow(&self) -> BytesPI {
1074 BytesPI {
1075 content: self.content.borrow(),
1076 }
1077 }
1078
1079 /// A target used to identify the application to which the instruction is directed.
1080 ///
1081 /// # Example
1082 ///
1083 /// ```
1084 /// # use pretty_assertions::assert_eq;
1085 /// use quick_xml::events::BytesPI;
1086 ///
1087 /// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#);
1088 /// assert_eq!(instruction.target(), b"xml-stylesheet");
1089 /// ```
1090 #[inline]
1091 pub fn target(&self) -> &[u8] {
1092 self.content.name().0
1093 }
1094
1095 /// Content of the processing instruction. Contains everything between target
1096 /// name and the end of the instruction. A direct consequence is that the first
1097 /// character is always a space character.
1098 ///
1099 /// # Example
1100 ///
1101 /// ```
1102 /// # use pretty_assertions::assert_eq;
1103 /// use quick_xml::events::BytesPI;
1104 ///
1105 /// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#);
1106 /// assert_eq!(instruction.content(), br#" href="style.css""#);
1107 /// ```
1108 #[inline]
1109 pub fn content(&self) -> &[u8] {
1110 self.content.attributes_raw()
1111 }
1112
1113 /// A view of the processing instructions' content as a list of key-value pairs.
1114 ///
1115 /// Key-value pairs are used in some processing instructions, for example in
1116 /// `<?xml-stylesheet?>`.
1117 ///
1118 /// Returned iterator does not validate attribute values as may required by
1119 /// target's rules. For example, it doesn't check that substring `?>` is not
1120 /// present in the attribute value. That shouldn't be the problem when event
1121 /// is produced by the reader, because reader detects end of processing instruction
1122 /// by the first `?>` sequence, as required by the specification, and therefore
1123 /// this sequence cannot appear inside it.
1124 ///
1125 /// # Example
1126 ///
1127 /// ```
1128 /// # use pretty_assertions::assert_eq;
1129 /// use std::borrow::Cow;
1130 /// use quick_xml::events::attributes::Attribute;
1131 /// use quick_xml::events::BytesPI;
1132 /// use quick_xml::name::QName;
1133 ///
1134 /// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#);
1135 /// for attr in instruction.attributes() {
1136 /// assert_eq!(attr, Ok(Attribute {
1137 /// key: QName(b"href"),
1138 /// value: Cow::Borrowed(b"style.css"),
1139 /// }));
1140 /// }
1141 /// ```
1142 #[inline]
1143 pub fn attributes(&self) -> Attributes {
1144 self.content.attributes()
1145 }
1146}
1147
1148impl<'a> Debug for BytesPI<'a> {
1149 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1150 write!(f, "BytesPI {{ content: ")?;
1151 write_cow_string(f, &self.content.buf)?;
1152 write!(f, " }}")
1153 }
1154}
1155
1156impl<'a> Deref for BytesPI<'a> {
1157 type Target = [u8];
1158
1159 fn deref(&self) -> &[u8] {
1160 &self.content
1161 }
1162}
1163
1164#[cfg(feature = "arbitrary")]
1165impl<'a> arbitrary::Arbitrary<'a> for BytesPI<'a> {
1166 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
1167 Ok(Self::new(<&str>::arbitrary(u)?))
1168 }
1169 fn size_hint(depth: usize) -> (usize, Option<usize>) {
1170 return <&str as arbitrary::Arbitrary>::size_hint(depth);
1171 }
1172}
1173
1174////////////////////////////////////////////////////////////////////////////////////////////////////
1175
1176/// An XML declaration (`Event::Decl`).
1177///
1178/// [W3C XML 1.1 Prolog and Document Type Declaration](http://w3.org/TR/xml11/#sec-prolog-dtd)
1179///
1180/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
1181/// returns the content of this event between `<?` and `?>`.
1182///
1183/// Note, that inner text will not contain `?>` sequence inside:
1184///
1185/// ```
1186/// # use quick_xml::events::{BytesDecl, BytesStart, Event};
1187/// # use quick_xml::reader::Reader;
1188/// # use pretty_assertions::assert_eq;
1189/// let mut reader = Reader::from_str("<?xml version = '1.0' ?>");
1190/// let content = "xml version = '1.0' ";
1191/// let event = BytesDecl::from_start(BytesStart::from_content(content, 3));
1192///
1193/// assert_eq!(reader.read_event().unwrap(), Event::Decl(event.borrow()));
1194/// // deref coercion of &BytesDecl to &[u8]
1195/// assert_eq!(&event as &[u8], content.as_bytes());
1196/// // AsRef<[u8]> for &T + deref coercion
1197/// assert_eq!(event.as_ref(), content.as_bytes());
1198/// ```
1199#[derive(Clone, Debug, Eq, PartialEq)]
1200pub struct BytesDecl<'a> {
1201 content: BytesStart<'a>,
1202}
1203
1204impl<'a> BytesDecl<'a> {
1205 /// Constructs a new `XmlDecl` from the (mandatory) _version_ (should be `1.0` or `1.1`),
1206 /// the optional _encoding_ (e.g., `UTF-8`) and the optional _standalone_ (`yes` or `no`)
1207 /// attribute.
1208 ///
1209 /// Does not escape any of its inputs. Always uses double quotes to wrap the attribute values.
1210 /// The caller is responsible for escaping attribute values. Shouldn't usually be relevant since
1211 /// the double quote character is not allowed in any of the attribute values.
1212 pub fn new(
1213 version: &str,
1214 encoding: Option<&str>,
1215 standalone: Option<&str>,
1216 ) -> BytesDecl<'static> {
1217 // Compute length of the buffer based on supplied attributes
1218 // ' encoding=""' => 12
1219 let encoding_attr_len = if let Some(xs) = encoding {
1220 12 + xs.len()
1221 } else {
1222 0
1223 };
1224 // ' standalone=""' => 14
1225 let standalone_attr_len = if let Some(xs) = standalone {
1226 14 + xs.len()
1227 } else {
1228 0
1229 };
1230 // 'xml version=""' => 14
1231 let mut buf = String::with_capacity(14 + encoding_attr_len + standalone_attr_len);
1232
1233 buf.push_str("xml version=\"");
1234 buf.push_str(version);
1235
1236 if let Some(encoding_val) = encoding {
1237 buf.push_str("\" encoding=\"");
1238 buf.push_str(encoding_val);
1239 }
1240
1241 if let Some(standalone_val) = standalone {
1242 buf.push_str("\" standalone=\"");
1243 buf.push_str(standalone_val);
1244 }
1245 buf.push('"');
1246
1247 BytesDecl {
1248 content: BytesStart::from_content(buf, 3),
1249 }
1250 }
1251
1252 /// Creates a `BytesDecl` from a `BytesStart`
1253 pub const fn from_start(start: BytesStart<'a>) -> Self {
1254 Self { content: start }
1255 }
1256
1257 /// Gets xml version, excluding quotes (`'` or `"`).
1258 ///
1259 /// According to the [grammar], the version *must* be the first thing in the declaration.
1260 /// This method tries to extract the first thing in the declaration and return it.
1261 /// In case of multiple attributes value of the first one is returned.
1262 ///
1263 /// If version is missed in the declaration, or the first thing is not a version,
1264 /// [`IllFormedError::MissingDeclVersion`] will be returned.
1265 ///
1266 /// # Examples
1267 ///
1268 /// ```
1269 /// use quick_xml::errors::{Error, IllFormedError};
1270 /// use quick_xml::events::{BytesDecl, BytesStart};
1271 ///
1272 /// // <?xml version='1.1'?>
1273 /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0));
1274 /// assert_eq!(decl.version().unwrap(), b"1.1".as_ref());
1275 ///
1276 /// // <?xml version='1.0' version='1.1'?>
1277 /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.0' version='1.1'", 0));
1278 /// assert_eq!(decl.version().unwrap(), b"1.0".as_ref());
1279 ///
1280 /// // <?xml encoding='utf-8'?>
1281 /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8'", 0));
1282 /// match decl.version() {
1283 /// Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(key)))) => assert_eq!(key, "encoding"),
1284 /// _ => assert!(false),
1285 /// }
1286 ///
1287 /// // <?xml encoding='utf-8' version='1.1'?>
1288 /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8' version='1.1'", 0));
1289 /// match decl.version() {
1290 /// Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(key)))) => assert_eq!(key, "encoding"),
1291 /// _ => assert!(false),
1292 /// }
1293 ///
1294 /// // <?xml?>
1295 /// let decl = BytesDecl::from_start(BytesStart::from_content("", 0));
1296 /// match decl.version() {
1297 /// Err(Error::IllFormed(IllFormedError::MissingDeclVersion(None))) => {},
1298 /// _ => assert!(false),
1299 /// }
1300 /// ```
1301 ///
1302 /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
1303 pub fn version(&self) -> Result<Cow<[u8]>, Error> {
1304 // The version *must* be the first thing in the declaration.
1305 match self.content.attributes().with_checks(false).next() {
1306 Some(Ok(a)) if a.key.as_ref() == b"version" => Ok(a.value),
1307 // first attribute was not "version"
1308 Some(Ok(a)) => {
1309 let found = from_utf8(a.key.as_ref())
1310 .map_err(|_| IllFormedError::MissingDeclVersion(None))?
1311 .to_string();
1312 Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(
1313 found,
1314 ))))
1315 }
1316 // error parsing attributes
1317 Some(Err(e)) => Err(e.into()),
1318 // no attributes
1319 None => Err(Error::IllFormed(IllFormedError::MissingDeclVersion(None))),
1320 }
1321 }
1322
1323 /// Gets xml encoding, excluding quotes (`'` or `"`).
1324 ///
1325 /// Although according to the [grammar] encoding must appear before `"standalone"`
1326 /// and after `"version"`, this method does not check that. The first occurrence
1327 /// of the attribute will be returned even if there are several. Also, method does
1328 /// not restrict symbols that can forming the encoding, so the returned encoding
1329 /// name may not correspond to the grammar.
1330 ///
1331 /// # Examples
1332 ///
1333 /// ```
1334 /// use std::borrow::Cow;
1335 /// use quick_xml::Error;
1336 /// use quick_xml::events::{BytesDecl, BytesStart};
1337 ///
1338 /// // <?xml version='1.1'?>
1339 /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0));
1340 /// assert!(decl.encoding().is_none());
1341 ///
1342 /// // <?xml encoding='utf-8'?>
1343 /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8'", 0));
1344 /// match decl.encoding() {
1345 /// Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"utf-8"),
1346 /// _ => assert!(false),
1347 /// }
1348 ///
1349 /// // <?xml encoding='something_WRONG' encoding='utf-8'?>
1350 /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='something_WRONG' encoding='utf-8'", 0));
1351 /// match decl.encoding() {
1352 /// Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"something_WRONG"),
1353 /// _ => assert!(false),
1354 /// }
1355 /// ```
1356 ///
1357 /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
1358 pub fn encoding(&self) -> Option<Result<Cow<[u8]>, AttrError>> {
1359 self.content
1360 .try_get_attribute("encoding")
1361 .map(|a| a.map(|a| a.value))
1362 .transpose()
1363 }
1364
1365 /// Gets xml standalone, excluding quotes (`'` or `"`).
1366 ///
1367 /// Although according to the [grammar] standalone flag must appear after `"version"`
1368 /// and `"encoding"`, this method does not check that. The first occurrence of the
1369 /// attribute will be returned even if there are several. Also, method does not
1370 /// restrict symbols that can forming the value, so the returned flag name may not
1371 /// correspond to the grammar.
1372 ///
1373 /// # Examples
1374 ///
1375 /// ```
1376 /// use std::borrow::Cow;
1377 /// use quick_xml::Error;
1378 /// use quick_xml::events::{BytesDecl, BytesStart};
1379 ///
1380 /// // <?xml version='1.1'?>
1381 /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0));
1382 /// assert!(decl.standalone().is_none());
1383 ///
1384 /// // <?xml standalone='yes'?>
1385 /// let decl = BytesDecl::from_start(BytesStart::from_content(" standalone='yes'", 0));
1386 /// match decl.standalone() {
1387 /// Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"yes"),
1388 /// _ => assert!(false),
1389 /// }
1390 ///
1391 /// // <?xml standalone='something_WRONG' encoding='utf-8'?>
1392 /// let decl = BytesDecl::from_start(BytesStart::from_content(" standalone='something_WRONG' encoding='utf-8'", 0));
1393 /// match decl.standalone() {
1394 /// Some(Ok(Cow::Borrowed(flag))) => assert_eq!(flag, b"something_WRONG"),
1395 /// _ => assert!(false),
1396 /// }
1397 /// ```
1398 ///
1399 /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
1400 pub fn standalone(&self) -> Option<Result<Cow<[u8]>, AttrError>> {
1401 self.content
1402 .try_get_attribute("standalone")
1403 .map(|a| a.map(|a| a.value))
1404 .transpose()
1405 }
1406
1407 /// Gets the actual encoding using [_get an encoding_](https://encoding.spec.whatwg.org/#concept-encoding-get)
1408 /// algorithm.
1409 ///
1410 /// If encoding in not known, or `encoding` key was not found, returns `None`.
1411 /// In case of duplicated `encoding` key, encoding, corresponding to the first
1412 /// one, is returned.
1413 #[cfg(feature = "encoding")]
1414 pub fn encoder(&self) -> Option<&'static Encoding> {
1415 self.encoding()
1416 .and_then(|e| e.ok())
1417 .and_then(|e| Encoding::for_label(&e))
1418 }
1419
1420 /// Converts the event into an owned event.
1421 pub fn into_owned(self) -> BytesDecl<'static> {
1422 BytesDecl {
1423 content: self.content.into_owned(),
1424 }
1425 }
1426
1427 /// Converts the event into a borrowed event.
1428 #[inline]
1429 pub fn borrow(&self) -> BytesDecl {
1430 BytesDecl {
1431 content: self.content.borrow(),
1432 }
1433 }
1434}
1435
1436impl<'a> Deref for BytesDecl<'a> {
1437 type Target = [u8];
1438
1439 fn deref(&self) -> &[u8] {
1440 &self.content
1441 }
1442}
1443
1444#[cfg(feature = "arbitrary")]
1445impl<'a> arbitrary::Arbitrary<'a> for BytesDecl<'a> {
1446 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
1447 Ok(Self::new(
1448 <&str>::arbitrary(u)?,
1449 Option::<&str>::arbitrary(u)?,
1450 Option::<&str>::arbitrary(u)?,
1451 ))
1452 }
1453
1454 fn size_hint(depth: usize) -> (usize, Option<usize>) {
1455 return <&str as arbitrary::Arbitrary>::size_hint(depth);
1456 }
1457}
1458
1459////////////////////////////////////////////////////////////////////////////////////////////////////
1460
1461/// Character or general entity reference (`Event::GeneralRef`): `&ref;` or `&#<number>;`.
1462///
1463/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
1464/// returns the content of this event between `&` and `;`:
1465///
1466/// ```
1467/// # use quick_xml::events::{BytesRef, Event};
1468/// # use quick_xml::reader::Reader;
1469/// # use pretty_assertions::assert_eq;
1470/// let mut reader = Reader::from_str(r#"&entity;"#);
1471/// let content = "entity";
1472/// let event = BytesRef::new(content);
1473///
1474/// assert_eq!(reader.read_event().unwrap(), Event::GeneralRef(event.borrow()));
1475/// // deref coercion of &BytesRef to &[u8]
1476/// assert_eq!(&event as &[u8], content.as_bytes());
1477/// // AsRef<[u8]> for &T + deref coercion
1478/// assert_eq!(event.as_ref(), content.as_bytes());
1479/// ```
1480#[derive(Clone, Eq, PartialEq)]
1481pub struct BytesRef<'a> {
1482 content: Cow<'a, [u8]>,
1483 /// Encoding in which the `content` is stored inside the event.
1484 decoder: Decoder,
1485}
1486
1487impl<'a> BytesRef<'a> {
1488 /// Internal constructor, used by `Reader`. Supplies data in reader's encoding
1489 #[inline]
1490 pub(crate) const fn wrap(content: &'a [u8], decoder: Decoder) -> Self {
1491 Self {
1492 content: Cow::Borrowed(content),
1493 decoder,
1494 }
1495 }
1496
1497 /// Creates a new `BytesRef` borrowing a slice.
1498 ///
1499 /// # Warning
1500 ///
1501 /// `name` must be a valid name.
1502 #[inline]
1503 pub fn new<C: Into<Cow<'a, str>>>(name: C) -> Self {
1504 Self {
1505 content: str_cow_to_bytes(name),
1506 decoder: Decoder::utf8(),
1507 }
1508 }
1509
1510 /// Converts the event into an owned event.
1511 pub fn into_owned(self) -> BytesRef<'static> {
1512 BytesRef {
1513 content: Cow::Owned(self.content.into_owned()),
1514 decoder: self.decoder,
1515 }
1516 }
1517
1518 /// Extracts the inner `Cow` from the `BytesRef` event container.
1519 #[inline]
1520 pub fn into_inner(self) -> Cow<'a, [u8]> {
1521 self.content
1522 }
1523
1524 /// Converts the event into a borrowed event.
1525 #[inline]
1526 pub fn borrow(&self) -> BytesRef {
1527 BytesRef {
1528 content: Cow::Borrowed(&self.content),
1529 decoder: self.decoder,
1530 }
1531 }
1532
1533 /// Decodes the content of the event.
1534 ///
1535 /// This will allocate if the value contains any escape sequences or in
1536 /// non-UTF-8 encoding.
1537 ///
1538 /// This method does not normalizes end-of-line characters as required by [specification].
1539 /// Usually you need [`xml_content()`](Self::xml_content) instead of this method.
1540 ///
1541 /// [specification]: https://www.w3.org/TR/xml11/#sec-line-ends
1542 pub fn decode(&self) -> Result<Cow<'a, str>, EncodingError> {
1543 self.decoder.decode_cow(&self.content)
1544 }
1545
1546 /// Decodes the content of the XML event.
1547 ///
1548 /// When this event produced by the reader, it uses the encoding information
1549 /// associated with that reader to interpret the raw bytes contained within
1550 /// this general reference event.
1551 ///
1552 /// This will allocate if the value in non-UTF-8 encoding, or EOL normalization
1553 /// is required.
1554 ///
1555 /// Note, that this method should be used only if event represents XML content,
1556 /// because rules for normalizing EOLs for [XML] and [HTML] differs.
1557 ///
1558 /// To get HTML content use [`html_content()`](Self::html_content).
1559 ///
1560 /// [XML]: https://www.w3.org/TR/xml11/#sec-line-ends
1561 /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines
1562 pub fn xml_content(&self) -> Result<Cow<'a, str>, EncodingError> {
1563 self.decoder.content(&self.content, normalize_xml_eols)
1564 }
1565
1566 /// Decodes the content of the HTML event.
1567 ///
1568 /// When this event produced by the reader, it uses the encoding information
1569 /// associated with that reader to interpret the raw bytes contained within
1570 /// this general reference event.
1571 ///
1572 /// This will allocate if the value in non-UTF-8 encoding, or EOL normalization
1573 /// is required.
1574 ///
1575 /// Note, that this method should be used only if event represents HTML content,
1576 /// because rules for normalizing EOLs for [XML] and [HTML] differs.
1577 ///
1578 /// To get XML content use [`xml_content()`](Self::xml_content).
1579 ///
1580 /// [XML]: https://www.w3.org/TR/xml11/#sec-line-ends
1581 /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines
1582 pub fn html_content(&self) -> Result<Cow<'a, str>, EncodingError> {
1583 self.decoder.content(&self.content, normalize_html_eols)
1584 }
1585
1586 /// Returns `true` if the specified reference represents the character reference
1587 /// (`&#<number>;`).
1588 ///
1589 /// ```
1590 /// # use quick_xml::events::BytesRef;
1591 /// # use pretty_assertions::assert_eq;
1592 /// assert_eq!(BytesRef::new("#x30").is_char_ref(), true);
1593 /// assert_eq!(BytesRef::new("#49" ).is_char_ref(), true);
1594 /// assert_eq!(BytesRef::new("lt" ).is_char_ref(), false);
1595 /// ```
1596 pub fn is_char_ref(&self) -> bool {
1597 matches!(self.content.first(), Some(b'#'))
1598 }
1599
1600 /// If this reference represents character reference, then resolves it and
1601 /// returns the character, otherwise returns `None`.
1602 ///
1603 /// This method does not check if character is allowed for XML, in other words,
1604 /// well-formedness constraint [WFC: Legal Char] is not enforced.
1605 /// The character `0x0`, however, will return `EscapeError::InvalidCharRef`.
1606 ///
1607 /// ```
1608 /// # use quick_xml::events::BytesRef;
1609 /// # use pretty_assertions::assert_eq;
1610 /// assert_eq!(BytesRef::new("#x30").resolve_char_ref().unwrap(), Some('0'));
1611 /// assert_eq!(BytesRef::new("#49" ).resolve_char_ref().unwrap(), Some('1'));
1612 /// assert_eq!(BytesRef::new("lt" ).resolve_char_ref().unwrap(), None);
1613 /// ```
1614 ///
1615 /// [WFC: Legal Char]: https://www.w3.org/TR/xml11/#wf-Legalchar
1616 pub fn resolve_char_ref(&self) -> Result<Option<char>, Error> {
1617 if let Some(num) = self.decode()?.strip_prefix('#') {
1618 let ch = parse_number(num).map_err(EscapeError::InvalidCharRef)?;
1619 return Ok(Some(ch));
1620 }
1621 Ok(None)
1622 }
1623}
1624
1625impl<'a> Debug for BytesRef<'a> {
1626 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1627 write!(f, "BytesRef {{ content: ")?;
1628 write_cow_string(f, &self.content)?;
1629 write!(f, " }}")
1630 }
1631}
1632
1633impl<'a> Deref for BytesRef<'a> {
1634 type Target = [u8];
1635
1636 fn deref(&self) -> &[u8] {
1637 &self.content
1638 }
1639}
1640
1641#[cfg(feature = "arbitrary")]
1642impl<'a> arbitrary::Arbitrary<'a> for BytesRef<'a> {
1643 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
1644 Ok(Self::new(<&str>::arbitrary(u)?))
1645 }
1646
1647 fn size_hint(depth: usize) -> (usize, Option<usize>) {
1648 <&str as arbitrary::Arbitrary>::size_hint(depth)
1649 }
1650}
1651
1652////////////////////////////////////////////////////////////////////////////////////////////////////
1653
1654/// Event emitted by [`Reader::read_event_into`].
1655///
1656/// [`Reader::read_event_into`]: crate::reader::Reader::read_event_into
1657#[derive(Clone, Debug, Eq, PartialEq)]
1658#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
1659pub enum Event<'a> {
1660 /// Start tag (with attributes) `<tag attr="value">`.
1661 Start(BytesStart<'a>),
1662 /// End tag `</tag>`.
1663 End(BytesEnd<'a>),
1664 /// Empty element tag (with attributes) `<tag attr="value" />`.
1665 Empty(BytesStart<'a>),
1666 /// Escaped character data between tags.
1667 Text(BytesText<'a>),
1668 /// Unescaped character data stored in `<![CDATA[...]]>`.
1669 CData(BytesCData<'a>),
1670 /// Comment `<!-- ... -->`.
1671 Comment(BytesText<'a>),
1672 /// XML declaration `<?xml ...?>`.
1673 Decl(BytesDecl<'a>),
1674 /// Processing instruction `<?...?>`.
1675 PI(BytesPI<'a>),
1676 /// Document type definition data (DTD) stored in `<!DOCTYPE ...>`.
1677 DocType(BytesText<'a>),
1678 /// General reference `&entity;` in the textual data. Can be either an entity
1679 /// reference, or a character reference.
1680 GeneralRef(BytesRef<'a>),
1681 /// End of XML document.
1682 Eof,
1683}
1684
1685impl<'a> Event<'a> {
1686 /// Converts the event to an owned version, untied to the lifetime of
1687 /// buffer used when reading but incurring a new, separate allocation.
1688 pub fn into_owned(self) -> Event<'static> {
1689 match self {
1690 Event::Start(e) => Event::Start(e.into_owned()),
1691 Event::End(e) => Event::End(e.into_owned()),
1692 Event::Empty(e) => Event::Empty(e.into_owned()),
1693 Event::Text(e) => Event::Text(e.into_owned()),
1694 Event::Comment(e) => Event::Comment(e.into_owned()),
1695 Event::CData(e) => Event::CData(e.into_owned()),
1696 Event::Decl(e) => Event::Decl(e.into_owned()),
1697 Event::PI(e) => Event::PI(e.into_owned()),
1698 Event::DocType(e) => Event::DocType(e.into_owned()),
1699 Event::GeneralRef(e) => Event::GeneralRef(e.into_owned()),
1700 Event::Eof => Event::Eof,
1701 }
1702 }
1703
1704 /// Converts the event into a borrowed event.
1705 #[inline]
1706 pub fn borrow(&self) -> Event {
1707 match self {
1708 Event::Start(e) => Event::Start(e.borrow()),
1709 Event::End(e) => Event::End(e.borrow()),
1710 Event::Empty(e) => Event::Empty(e.borrow()),
1711 Event::Text(e) => Event::Text(e.borrow()),
1712 Event::Comment(e) => Event::Comment(e.borrow()),
1713 Event::CData(e) => Event::CData(e.borrow()),
1714 Event::Decl(e) => Event::Decl(e.borrow()),
1715 Event::PI(e) => Event::PI(e.borrow()),
1716 Event::DocType(e) => Event::DocType(e.borrow()),
1717 Event::GeneralRef(e) => Event::GeneralRef(e.borrow()),
1718 Event::Eof => Event::Eof,
1719 }
1720 }
1721}
1722
1723impl<'a> Deref for Event<'a> {
1724 type Target = [u8];
1725
1726 fn deref(&self) -> &[u8] {
1727 match *self {
1728 Event::Start(ref e) | Event::Empty(ref e) => e,
1729 Event::End(ref e) => e,
1730 Event::Text(ref e) => e,
1731 Event::Decl(ref e) => e,
1732 Event::PI(ref e) => e,
1733 Event::CData(ref e) => e,
1734 Event::Comment(ref e) => e,
1735 Event::DocType(ref e) => e,
1736 Event::GeneralRef(ref e) => e,
1737 Event::Eof => &[],
1738 }
1739 }
1740}
1741
1742impl<'a> AsRef<Event<'a>> for Event<'a> {
1743 fn as_ref(&self) -> &Event<'a> {
1744 self
1745 }
1746}
1747
1748////////////////////////////////////////////////////////////////////////////////////////////////////
1749
1750#[inline]
1751fn str_cow_to_bytes<'a, C: Into<Cow<'a, str>>>(content: C) -> Cow<'a, [u8]> {
1752 match content.into() {
1753 Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()),
1754 Cow::Owned(s) => Cow::Owned(s.into_bytes()),
1755 }
1756}
1757
1758fn trim_cow<'a, F>(value: Cow<'a, [u8]>, trim: F) -> Cow<'a, [u8]>
1759where
1760 F: FnOnce(&[u8]) -> &[u8],
1761{
1762 match value {
1763 Cow::Borrowed(bytes) => Cow::Borrowed(trim(bytes)),
1764 Cow::Owned(mut bytes) => {
1765 let trimmed = trim(&bytes);
1766 if trimmed.len() != bytes.len() {
1767 bytes = trimmed.to_vec();
1768 }
1769 Cow::Owned(bytes)
1770 }
1771 }
1772}
1773
1774#[cfg(test)]
1775mod test {
1776 use super::*;
1777 use pretty_assertions::assert_eq;
1778
1779 #[test]
1780 fn bytestart_create() {
1781 let b = BytesStart::new("test");
1782 assert_eq!(b.len(), 4);
1783 assert_eq!(b.name(), QName(b"test"));
1784 }
1785
1786 #[test]
1787 fn bytestart_set_name() {
1788 let mut b = BytesStart::new("test");
1789 assert_eq!(b.len(), 4);
1790 assert_eq!(b.name(), QName(b"test"));
1791 assert_eq!(b.attributes_raw(), b"");
1792 b.push_attribute(("x", "a"));
1793 assert_eq!(b.len(), 10);
1794 assert_eq!(b.attributes_raw(), b" x=\"a\"");
1795 b.set_name(b"g");
1796 assert_eq!(b.len(), 7);
1797 assert_eq!(b.name(), QName(b"g"));
1798 }
1799
1800 #[test]
1801 fn bytestart_clear_attributes() {
1802 let mut b = BytesStart::new("test");
1803 b.push_attribute(("x", "y\"z"));
1804 b.push_attribute(("x", "y\"z"));
1805 b.clear_attributes();
1806 assert!(b.attributes().next().is_none());
1807 assert_eq!(b.len(), 4);
1808 assert_eq!(b.name(), QName(b"test"));
1809 }
1810}