quick_xml/events/
mod.rs

1//! Defines zero-copy XML events used throughout this library.
2//!
3//! A XML event often represents part of a XML element.
4//! They occur both during reading and writing and are
5//! usually used with the stream-oriented API.
6//!
7//! For example, the XML element
8//! ```xml
9//! <name attr="value">Inner text</name>
10//! ```
11//! consists of the three events `Start`, `Text` and `End`.
12//! They can also represent other parts in an XML document like the
13//! XML declaration. Each Event usually contains further information,
14//! like the tag name, the attribute or the inner text.
15//!
16//! See [`Event`] for a list of all possible events.
17//!
18//! # Reading
19//! When reading a XML stream, the events are emitted by [`Reader::read_event`]
20//! and [`Reader::read_event_into`]. You must listen
21//! for the different types of events you are interested in.
22//!
23//! See [`Reader`] for further information.
24//!
25//! # Writing
26//! When writing the XML document, you must create the XML element
27//! by constructing the events it consists of and pass them to the writer
28//! sequentially.
29//!
30//! See [`Writer`] for further information.
31//!
32//! [`Reader::read_event`]: crate::reader::Reader::read_event
33//! [`Reader::read_event_into`]: crate::reader::Reader::read_event_into
34//! [`Reader`]: crate::reader::Reader
35//! [`Writer`]: crate::writer::Writer
36//! [`Event`]: crate::events::Event
37
38pub mod attributes;
39
40#[cfg(feature = "encoding")]
41use encoding_rs::Encoding;
42use std::borrow::Cow;
43use std::fmt::{self, Debug, Formatter};
44use std::iter::FusedIterator;
45use std::mem::replace;
46use std::ops::Deref;
47use std::str::from_utf8;
48
49use crate::encoding::{Decoder, EncodingError};
50use crate::errors::{Error, IllFormedError};
51use crate::escape::{
52    escape, minimal_escape, normalize_html_eols, normalize_xml_eols, parse_number, partial_escape,
53    EscapeError,
54};
55use crate::name::{LocalName, QName};
56use crate::utils::{name_len, trim_xml_end, trim_xml_start, write_cow_string, Bytes};
57use attributes::{AttrError, Attribute, Attributes};
58
59/// Opening tag data (`Event::Start`), with optional attributes: `<name attr="value">`.
60///
61/// The name can be accessed using the [`name`] or [`local_name`] methods.
62/// An iterator over the attributes is returned by the [`attributes`] method.
63///
64/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
65/// returns the content of this event between `<` and `>` or `/>`:
66///
67/// ```
68/// # use quick_xml::events::{BytesStart, Event};
69/// # use quick_xml::reader::Reader;
70/// # use pretty_assertions::assert_eq;
71/// // Remember, that \ at the end of string literal strips
72/// // all space characters to the first non-space character
73/// let mut reader = Reader::from_str("\
74///     <element a1 = 'val1' a2=\"val2\" />\
75///     <element a1 = 'val1' a2=\"val2\" >"
76/// );
77/// let content = "element a1 = 'val1' a2=\"val2\" ";
78/// let event = BytesStart::from_content(content, 7);
79///
80/// assert_eq!(reader.read_event().unwrap(), Event::Empty(event.borrow()));
81/// assert_eq!(reader.read_event().unwrap(), Event::Start(event.borrow()));
82/// // deref coercion of &BytesStart to &[u8]
83/// assert_eq!(&event as &[u8], content.as_bytes());
84/// // AsRef<[u8]> for &T + deref coercion
85/// assert_eq!(event.as_ref(), content.as_bytes());
86/// ```
87///
88/// [`name`]: Self::name
89/// [`local_name`]: Self::local_name
90/// [`attributes`]: Self::attributes
91#[derive(Clone, Eq, PartialEq)]
92pub struct BytesStart<'a> {
93    /// content of the element, before any utf8 conversion
94    pub(crate) buf: Cow<'a, [u8]>,
95    /// end of the element name, the name starts at that the start of `buf`
96    pub(crate) name_len: usize,
97    /// Encoding used for `buf`
98    decoder: Decoder,
99}
100
101impl<'a> BytesStart<'a> {
102    /// Internal constructor, used by `Reader`. Supplies data in reader's encoding
103    #[inline]
104    pub(crate) const fn wrap(content: &'a [u8], name_len: usize, decoder: Decoder) -> Self {
105        BytesStart {
106            buf: Cow::Borrowed(content),
107            name_len,
108            decoder,
109        }
110    }
111
112    /// Creates a new `BytesStart` from the given name.
113    ///
114    /// # Warning
115    ///
116    /// `name` must be a valid name.
117    #[inline]
118    pub fn new<C: Into<Cow<'a, str>>>(name: C) -> Self {
119        let buf = str_cow_to_bytes(name);
120        BytesStart {
121            name_len: buf.len(),
122            buf,
123            decoder: Decoder::utf8(),
124        }
125    }
126
127    /// Creates a new `BytesStart` from the given content (name + attributes).
128    ///
129    /// # Warning
130    ///
131    /// `&content[..name_len]` must be a valid name, and the remainder of `content`
132    /// must be correctly-formed attributes. Neither are checked, it is possible
133    /// to generate invalid XML if `content` or `name_len` are incorrect.
134    #[inline]
135    pub fn from_content<C: Into<Cow<'a, str>>>(content: C, name_len: usize) -> Self {
136        BytesStart {
137            buf: str_cow_to_bytes(content),
138            name_len,
139            decoder: Decoder::utf8(),
140        }
141    }
142
143    /// Converts the event into an owned event.
144    pub fn into_owned(self) -> BytesStart<'static> {
145        BytesStart {
146            buf: Cow::Owned(self.buf.into_owned()),
147            name_len: self.name_len,
148            decoder: self.decoder,
149        }
150    }
151
152    /// Converts the event into an owned event without taking ownership of Event
153    pub fn to_owned(&self) -> BytesStart<'static> {
154        BytesStart {
155            buf: Cow::Owned(self.buf.clone().into_owned()),
156            name_len: self.name_len,
157            decoder: self.decoder,
158        }
159    }
160
161    /// Converts the event into a borrowed event. Most useful when paired with [`to_end`].
162    ///
163    /// # Example
164    ///
165    /// ```
166    /// use quick_xml::events::{BytesStart, Event};
167    /// # use quick_xml::writer::Writer;
168    /// # use quick_xml::Error;
169    ///
170    /// struct SomeStruct<'a> {
171    ///     attrs: BytesStart<'a>,
172    ///     // ...
173    /// }
174    /// # impl<'a> SomeStruct<'a> {
175    /// # fn example(&self) -> Result<(), Error> {
176    /// # let mut writer = Writer::new(Vec::new());
177    ///
178    /// writer.write_event(Event::Start(self.attrs.borrow()))?;
179    /// // ...
180    /// writer.write_event(Event::End(self.attrs.to_end()))?;
181    /// # Ok(())
182    /// # }}
183    /// ```
184    ///
185    /// [`to_end`]: Self::to_end
186    pub fn borrow(&self) -> BytesStart {
187        BytesStart {
188            buf: Cow::Borrowed(&self.buf),
189            name_len: self.name_len,
190            decoder: self.decoder,
191        }
192    }
193
194    /// Creates new paired close tag
195    #[inline]
196    pub fn to_end(&self) -> BytesEnd {
197        BytesEnd::from(self.name())
198    }
199
200    /// Get the decoder, used to decode bytes, read by the reader which produces
201    /// this event, to the strings.
202    ///
203    /// When event was created manually, encoding is UTF-8.
204    ///
205    /// If [`encoding`] feature is enabled and no encoding is specified in declaration,
206    /// defaults to UTF-8.
207    ///
208    /// [`encoding`]: ../index.html#encoding
209    #[inline]
210    pub const fn decoder(&self) -> Decoder {
211        self.decoder
212    }
213
214    /// Gets the undecoded raw tag name, as present in the input stream.
215    #[inline]
216    pub fn name(&self) -> QName {
217        QName(&self.buf[..self.name_len])
218    }
219
220    /// Gets the undecoded raw local tag name (excluding namespace) as present
221    /// in the input stream.
222    ///
223    /// All content up to and including the first `:` character is removed from the tag name.
224    #[inline]
225    pub fn local_name(&self) -> LocalName {
226        self.name().into()
227    }
228
229    /// Edit the name of the BytesStart in-place
230    ///
231    /// # Warning
232    ///
233    /// `name` must be a valid name.
234    pub fn set_name(&mut self, name: &[u8]) -> &mut BytesStart<'a> {
235        let bytes = self.buf.to_mut();
236        bytes.splice(..self.name_len, name.iter().cloned());
237        self.name_len = name.len();
238        self
239    }
240}
241
242/// Attribute-related methods
243impl<'a> BytesStart<'a> {
244    /// Consumes `self` and yield a new `BytesStart` with additional attributes from an iterator.
245    ///
246    /// The yielded items must be convertible to [`Attribute`] using `Into`.
247    pub fn with_attributes<'b, I>(mut self, attributes: I) -> Self
248    where
249        I: IntoIterator,
250        I::Item: Into<Attribute<'b>>,
251    {
252        self.extend_attributes(attributes);
253        self
254    }
255
256    /// Add additional attributes to this tag using an iterator.
257    ///
258    /// The yielded items must be convertible to [`Attribute`] using `Into`.
259    pub fn extend_attributes<'b, I>(&mut self, attributes: I) -> &mut BytesStart<'a>
260    where
261        I: IntoIterator,
262        I::Item: Into<Attribute<'b>>,
263    {
264        for attr in attributes {
265            self.push_attribute(attr);
266        }
267        self
268    }
269
270    /// Adds an attribute to this element.
271    pub fn push_attribute<'b, A>(&mut self, attr: A)
272    where
273        A: Into<Attribute<'b>>,
274    {
275        self.buf.to_mut().push(b' ');
276        self.push_attr(attr.into());
277    }
278
279    /// Remove all attributes from the ByteStart
280    pub fn clear_attributes(&mut self) -> &mut BytesStart<'a> {
281        self.buf.to_mut().truncate(self.name_len);
282        self
283    }
284
285    /// Returns an iterator over the attributes of this tag.
286    pub fn attributes(&self) -> Attributes {
287        Attributes::wrap(&self.buf, self.name_len, false, self.decoder)
288    }
289
290    /// Returns an iterator over the HTML-like attributes of this tag (no mandatory quotes or `=`).
291    pub fn html_attributes(&self) -> Attributes {
292        Attributes::wrap(&self.buf, self.name_len, true, self.decoder)
293    }
294
295    /// Gets the undecoded raw string with the attributes of this tag as a `&[u8]`,
296    /// including the whitespace after the tag name if there is any.
297    #[inline]
298    pub fn attributes_raw(&self) -> &[u8] {
299        &self.buf[self.name_len..]
300    }
301
302    /// Try to get an attribute
303    pub fn try_get_attribute<N: AsRef<[u8]> + Sized>(
304        &'a self,
305        attr_name: N,
306    ) -> Result<Option<Attribute<'a>>, AttrError> {
307        for a in self.attributes().with_checks(false) {
308            let a = a?;
309            if a.key.as_ref() == attr_name.as_ref() {
310                return Ok(Some(a));
311            }
312        }
313        Ok(None)
314    }
315
316    /// Adds an attribute to this element.
317    pub(crate) fn push_attr<'b>(&mut self, attr: Attribute<'b>) {
318        let bytes = self.buf.to_mut();
319        bytes.extend_from_slice(attr.key.as_ref());
320        bytes.extend_from_slice(b"=\"");
321        // FIXME: need to escape attribute content
322        bytes.extend_from_slice(attr.value.as_ref());
323        bytes.push(b'"');
324    }
325
326    /// Adds new line in existing element
327    pub(crate) fn push_newline(&mut self) {
328        self.buf.to_mut().push(b'\n');
329    }
330
331    /// Adds indentation bytes in existing element
332    pub(crate) fn push_indent(&mut self, indent: &[u8]) {
333        self.buf.to_mut().extend_from_slice(indent);
334    }
335}
336
337impl<'a> Debug for BytesStart<'a> {
338    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
339        write!(f, "BytesStart {{ buf: ")?;
340        write_cow_string(f, &self.buf)?;
341        write!(f, ", name_len: {} }}", self.name_len)
342    }
343}
344
345impl<'a> Deref for BytesStart<'a> {
346    type Target = [u8];
347
348    fn deref(&self) -> &[u8] {
349        &self.buf
350    }
351}
352
353#[cfg(feature = "arbitrary")]
354impl<'a> arbitrary::Arbitrary<'a> for BytesStart<'a> {
355    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
356        let s = <&str>::arbitrary(u)?;
357        if s.is_empty() || !s.chars().all(char::is_alphanumeric) {
358            return Err(arbitrary::Error::IncorrectFormat);
359        }
360        let mut result = Self::new(s);
361        result.extend_attributes(Vec::<(&str, &str)>::arbitrary(u)?.into_iter());
362        Ok(result)
363    }
364
365    fn size_hint(depth: usize) -> (usize, Option<usize>) {
366        return <&str as arbitrary::Arbitrary>::size_hint(depth);
367    }
368}
369
370////////////////////////////////////////////////////////////////////////////////////////////////////
371
372/// Closing tag data (`Event::End`): `</name>`.
373///
374/// The name can be accessed using the [`name`] or [`local_name`] methods.
375///
376/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
377/// returns the content of this event between `</` and `>`.
378///
379/// Note, that inner text will not contain `>` character inside:
380///
381/// ```
382/// # use quick_xml::events::{BytesEnd, Event};
383/// # use quick_xml::reader::Reader;
384/// # use pretty_assertions::assert_eq;
385/// let mut reader = Reader::from_str(r#"<element></element a1 = 'val1' a2="val2" >"#);
386/// // Note, that this entire string considered as a .name()
387/// let content = "element a1 = 'val1' a2=\"val2\" ";
388/// let event = BytesEnd::new(content);
389///
390/// reader.config_mut().trim_markup_names_in_closing_tags = false;
391/// reader.config_mut().check_end_names = false;
392/// reader.read_event().unwrap(); // Skip `<element>`
393///
394/// assert_eq!(reader.read_event().unwrap(), Event::End(event.borrow()));
395/// assert_eq!(event.name().as_ref(), content.as_bytes());
396/// // deref coercion of &BytesEnd to &[u8]
397/// assert_eq!(&event as &[u8], content.as_bytes());
398/// // AsRef<[u8]> for &T + deref coercion
399/// assert_eq!(event.as_ref(), content.as_bytes());
400/// ```
401///
402/// [`name`]: Self::name
403/// [`local_name`]: Self::local_name
404#[derive(Clone, Eq, PartialEq)]
405pub struct BytesEnd<'a> {
406    name: Cow<'a, [u8]>,
407}
408
409impl<'a> BytesEnd<'a> {
410    /// Internal constructor, used by `Reader`. Supplies data in reader's encoding
411    #[inline]
412    pub(crate) const fn wrap(name: Cow<'a, [u8]>) -> Self {
413        BytesEnd { name }
414    }
415
416    /// Creates a new `BytesEnd` borrowing a slice.
417    ///
418    /// # Warning
419    ///
420    /// `name` must be a valid name.
421    #[inline]
422    pub fn new<C: Into<Cow<'a, str>>>(name: C) -> Self {
423        Self::wrap(str_cow_to_bytes(name))
424    }
425
426    /// Converts the event into an owned event.
427    pub fn into_owned(self) -> BytesEnd<'static> {
428        BytesEnd {
429            name: Cow::Owned(self.name.into_owned()),
430        }
431    }
432
433    /// Converts the event into a borrowed event.
434    #[inline]
435    pub fn borrow(&self) -> BytesEnd {
436        BytesEnd {
437            name: Cow::Borrowed(&self.name),
438        }
439    }
440
441    /// Gets the undecoded raw tag name, as present in the input stream.
442    #[inline]
443    pub fn name(&self) -> QName {
444        QName(&self.name)
445    }
446
447    /// Gets the undecoded raw local tag name (excluding namespace) as present
448    /// in the input stream.
449    ///
450    /// All content up to and including the first `:` character is removed from the tag name.
451    #[inline]
452    pub fn local_name(&self) -> LocalName {
453        self.name().into()
454    }
455}
456
457impl<'a> Debug for BytesEnd<'a> {
458    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
459        write!(f, "BytesEnd {{ name: ")?;
460        write_cow_string(f, &self.name)?;
461        write!(f, " }}")
462    }
463}
464
465impl<'a> Deref for BytesEnd<'a> {
466    type Target = [u8];
467
468    fn deref(&self) -> &[u8] {
469        &self.name
470    }
471}
472
473impl<'a> From<QName<'a>> for BytesEnd<'a> {
474    #[inline]
475    fn from(name: QName<'a>) -> Self {
476        Self::wrap(name.into_inner().into())
477    }
478}
479
480#[cfg(feature = "arbitrary")]
481impl<'a> arbitrary::Arbitrary<'a> for BytesEnd<'a> {
482    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
483        Ok(Self::new(<&str>::arbitrary(u)?))
484    }
485    fn size_hint(depth: usize) -> (usize, Option<usize>) {
486        return <&str as arbitrary::Arbitrary>::size_hint(depth);
487    }
488}
489
490////////////////////////////////////////////////////////////////////////////////////////////////////
491
492/// Data from various events (most notably, `Event::Text`) that stored in XML
493/// in escaped form. Internally data is stored in escaped form.
494///
495/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
496/// returns the content of this event. In case of comment this is everything
497/// between `<!--` and `-->` and the text of comment will not contain `-->` inside.
498/// In case of DTD this is everything between `<!DOCTYPE` + spaces and closing `>`
499/// (i.e. in case of DTD the first character is never space):
500///
501/// ```
502/// # use quick_xml::events::{BytesText, Event};
503/// # use quick_xml::reader::Reader;
504/// # use pretty_assertions::assert_eq;
505/// // Remember, that \ at the end of string literal strips
506/// // all space characters to the first non-space character
507/// let mut reader = Reader::from_str("\
508///     <!DOCTYPE comment or text >\
509///     comment or text \
510///     <!--comment or text -->"
511/// );
512/// let content = "comment or text ";
513/// let event = BytesText::new(content);
514///
515/// assert_eq!(reader.read_event().unwrap(), Event::DocType(event.borrow()));
516/// assert_eq!(reader.read_event().unwrap(), Event::Text(event.borrow()));
517/// assert_eq!(reader.read_event().unwrap(), Event::Comment(event.borrow()));
518/// // deref coercion of &BytesText to &[u8]
519/// assert_eq!(&event as &[u8], content.as_bytes());
520/// // AsRef<[u8]> for &T + deref coercion
521/// assert_eq!(event.as_ref(), content.as_bytes());
522/// ```
523#[derive(Clone, Eq, PartialEq)]
524pub struct BytesText<'a> {
525    /// Escaped then encoded content of the event. Content is encoded in the XML
526    /// document encoding when event comes from the reader and should be in the
527    /// document encoding when event passed to the writer
528    content: Cow<'a, [u8]>,
529    /// Encoding in which the `content` is stored inside the event
530    decoder: Decoder,
531}
532
533impl<'a> BytesText<'a> {
534    /// Creates a new `BytesText` from an escaped byte sequence in the specified encoding.
535    #[inline]
536    pub(crate) fn wrap<C: Into<Cow<'a, [u8]>>>(content: C, decoder: Decoder) -> Self {
537        Self {
538            content: content.into(),
539            decoder,
540        }
541    }
542
543    /// Creates a new `BytesText` from an escaped string.
544    #[inline]
545    pub fn from_escaped<C: Into<Cow<'a, str>>>(content: C) -> Self {
546        Self::wrap(str_cow_to_bytes(content), Decoder::utf8())
547    }
548
549    /// Creates a new `BytesText` from a string. The string is expected not to
550    /// be escaped.
551    #[inline]
552    pub fn new(content: &'a str) -> Self {
553        Self::from_escaped(escape(content))
554    }
555
556    /// Ensures that all data is owned to extend the object's lifetime if
557    /// necessary.
558    #[inline]
559    pub fn into_owned(self) -> BytesText<'static> {
560        BytesText {
561            content: self.content.into_owned().into(),
562            decoder: self.decoder,
563        }
564    }
565
566    /// Extracts the inner `Cow` from the `BytesText` event container.
567    #[inline]
568    pub fn into_inner(self) -> Cow<'a, [u8]> {
569        self.content
570    }
571
572    /// Converts the event into a borrowed event.
573    #[inline]
574    pub fn borrow(&self) -> BytesText {
575        BytesText {
576            content: Cow::Borrowed(&self.content),
577            decoder: self.decoder,
578        }
579    }
580
581    /// Decodes the content of the event.
582    ///
583    /// This will allocate if the value contains any escape sequences or in
584    /// non-UTF-8 encoding.
585    ///
586    /// This method does not normalizes end-of-line characters as required by [specification].
587    /// Usually you need [`xml_content()`](Self::xml_content) instead of this method.
588    ///
589    /// [specification]: https://www.w3.org/TR/xml11/#sec-line-ends
590    pub fn decode(&self) -> Result<Cow<'a, str>, EncodingError> {
591        self.decoder.decode_cow(&self.content)
592    }
593
594    /// Decodes the content of the XML event.
595    ///
596    /// When this event produced by the reader, it uses the encoding information
597    /// associated with that reader to interpret the raw bytes contained within
598    /// this text event.
599    ///
600    /// This will allocate if the value contains any escape sequences or in non-UTF-8
601    /// encoding, or EOL normalization is required.
602    ///
603    /// Note, that this method should be used only if event represents XML content,
604    /// because rules for normalizing EOLs for [XML] and [HTML] differs.
605    ///
606    /// To get HTML content use [`html_content()`](Self::html_content).
607    ///
608    /// [XML]: https://www.w3.org/TR/xml11/#sec-line-ends
609    /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines
610    pub fn xml_content(&self) -> Result<Cow<'a, str>, EncodingError> {
611        self.decoder.content(&self.content, normalize_xml_eols)
612    }
613
614    /// Decodes the content of the HTML event.
615    ///
616    /// When this event produced by the reader, it uses the encoding information
617    /// associated with that reader to interpret the raw bytes contained within
618    /// this text event.
619    ///
620    /// This will allocate if the value contains any escape sequences or in non-UTF-8
621    /// encoding, or EOL normalization is required.
622    ///
623    /// Note, that this method should be used only if event represents HTML content,
624    /// because rules for normalizing EOLs for [XML] and [HTML] differs.
625    ///
626    /// To get XML content use [`xml_content()`](Self::xml_content).
627    ///
628    /// [XML]: https://www.w3.org/TR/xml11/#sec-line-ends
629    /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines
630    pub fn html_content(&self) -> Result<Cow<'a, str>, EncodingError> {
631        self.decoder.content(&self.content, normalize_html_eols)
632    }
633
634    /// Removes leading XML whitespace bytes from text content.
635    ///
636    /// Returns `true` if content is empty after that
637    pub fn inplace_trim_start(&mut self) -> bool {
638        self.content = trim_cow(
639            replace(&mut self.content, Cow::Borrowed(b"")),
640            trim_xml_start,
641        );
642        self.content.is_empty()
643    }
644
645    /// Removes trailing XML whitespace bytes from text content.
646    ///
647    /// Returns `true` if content is empty after that
648    pub fn inplace_trim_end(&mut self) -> bool {
649        self.content = trim_cow(replace(&mut self.content, Cow::Borrowed(b"")), trim_xml_end);
650        self.content.is_empty()
651    }
652}
653
654impl<'a> Debug for BytesText<'a> {
655    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
656        write!(f, "BytesText {{ content: ")?;
657        write_cow_string(f, &self.content)?;
658        write!(f, " }}")
659    }
660}
661
662impl<'a> Deref for BytesText<'a> {
663    type Target = [u8];
664
665    fn deref(&self) -> &[u8] {
666        &self.content
667    }
668}
669
670#[cfg(feature = "arbitrary")]
671impl<'a> arbitrary::Arbitrary<'a> for BytesText<'a> {
672    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
673        let s = <&str>::arbitrary(u)?;
674        if !s.chars().all(char::is_alphanumeric) {
675            return Err(arbitrary::Error::IncorrectFormat);
676        }
677        Ok(Self::new(s))
678    }
679
680    fn size_hint(depth: usize) -> (usize, Option<usize>) {
681        return <&str as arbitrary::Arbitrary>::size_hint(depth);
682    }
683}
684
685////////////////////////////////////////////////////////////////////////////////////////////////////
686
687/// CDATA content contains unescaped data from the reader. If you want to write them as a text,
688/// [convert](Self::escape) it to [`BytesText`].
689///
690/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
691/// returns the content of this event between `<![CDATA[` and `]]>`.
692///
693/// Note, that inner text will not contain `]]>` sequence inside:
694///
695/// ```
696/// # use quick_xml::events::{BytesCData, Event};
697/// # use quick_xml::reader::Reader;
698/// # use pretty_assertions::assert_eq;
699/// let mut reader = Reader::from_str("<![CDATA[ CDATA section ]]>");
700/// let content = " CDATA section ";
701/// let event = BytesCData::new(content);
702///
703/// assert_eq!(reader.read_event().unwrap(), Event::CData(event.borrow()));
704/// // deref coercion of &BytesCData to &[u8]
705/// assert_eq!(&event as &[u8], content.as_bytes());
706/// // AsRef<[u8]> for &T + deref coercion
707/// assert_eq!(event.as_ref(), content.as_bytes());
708/// ```
709#[derive(Clone, Eq, PartialEq)]
710pub struct BytesCData<'a> {
711    content: Cow<'a, [u8]>,
712    /// Encoding in which the `content` is stored inside the event
713    decoder: Decoder,
714}
715
716impl<'a> BytesCData<'a> {
717    /// Creates a new `BytesCData` from a byte sequence in the specified encoding.
718    #[inline]
719    pub(crate) fn wrap<C: Into<Cow<'a, [u8]>>>(content: C, decoder: Decoder) -> Self {
720        Self {
721            content: content.into(),
722            decoder,
723        }
724    }
725
726    /// Creates a new `BytesCData` from a string.
727    ///
728    /// # Warning
729    ///
730    /// `content` must not contain the `]]>` sequence. You can use
731    /// [`BytesCData::escaped`] to escape the content instead.
732    #[inline]
733    pub fn new<C: Into<Cow<'a, str>>>(content: C) -> Self {
734        Self::wrap(str_cow_to_bytes(content), Decoder::utf8())
735    }
736
737    /// Creates an iterator of `BytesCData` from a string.
738    ///
739    /// If a string contains `]]>`, it needs to be split into multiple `CDATA`
740    /// sections, splitting the `]]` and `>` characters, because the CDATA closing
741    /// sequence cannot be escaped. This iterator yields a `BytesCData` instance
742    /// for each of those sections.
743    ///
744    /// # Examples
745    ///
746    /// ```
747    /// # use quick_xml::events::BytesCData;
748    /// # use pretty_assertions::assert_eq;
749    /// let content = "";
750    /// let cdata = BytesCData::escaped(content).collect::<Vec<_>>();
751    /// assert_eq!(cdata, &[BytesCData::new("")]);
752    ///
753    /// let content = "Certain tokens like ]]> can be difficult and <invalid>";
754    /// let cdata = BytesCData::escaped(content).collect::<Vec<_>>();
755    /// assert_eq!(cdata, &[
756    ///     BytesCData::new("Certain tokens like ]]"),
757    ///     BytesCData::new("> can be difficult and <invalid>"),
758    /// ]);
759    ///
760    /// let content = "foo]]>bar]]>baz]]>quux";
761    /// let cdata = BytesCData::escaped(content).collect::<Vec<_>>();
762    /// assert_eq!(cdata, &[
763    ///     BytesCData::new("foo]]"),
764    ///     BytesCData::new(">bar]]"),
765    ///     BytesCData::new(">baz]]"),
766    ///     BytesCData::new(">quux"),
767    /// ]);
768    /// ```
769    #[inline]
770    pub fn escaped(content: &'a str) -> CDataIterator<'a> {
771        CDataIterator {
772            unprocessed: content.as_bytes(),
773            finished: false,
774        }
775    }
776
777    /// Ensures that all data is owned to extend the object's lifetime if
778    /// necessary.
779    #[inline]
780    pub fn into_owned(self) -> BytesCData<'static> {
781        BytesCData {
782            content: self.content.into_owned().into(),
783            decoder: self.decoder,
784        }
785    }
786
787    /// Extracts the inner `Cow` from the `BytesCData` event container.
788    #[inline]
789    pub fn into_inner(self) -> Cow<'a, [u8]> {
790        self.content
791    }
792
793    /// Converts the event into a borrowed event.
794    #[inline]
795    pub fn borrow(&self) -> BytesCData {
796        BytesCData {
797            content: Cow::Borrowed(&self.content),
798            decoder: self.decoder,
799        }
800    }
801
802    /// Converts this CDATA content to an escaped version, that can be written
803    /// as an usual text in XML.
804    ///
805    /// This function performs following replacements:
806    ///
807    /// | Character | Replacement
808    /// |-----------|------------
809    /// | `<`       | `&lt;`
810    /// | `>`       | `&gt;`
811    /// | `&`       | `&amp;`
812    /// | `'`       | `&apos;`
813    /// | `"`       | `&quot;`
814    pub fn escape(self) -> Result<BytesText<'a>, EncodingError> {
815        let decoded = self.decode()?;
816        Ok(BytesText::wrap(
817            match escape(decoded) {
818                Cow::Borrowed(escaped) => Cow::Borrowed(escaped.as_bytes()),
819                Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
820            },
821            Decoder::utf8(),
822        ))
823    }
824
825    /// Converts this CDATA content to an escaped version, that can be written
826    /// as an usual text in XML.
827    ///
828    /// In XML text content, it is allowed (though not recommended) to leave
829    /// the quote special characters `"` and `'` unescaped.
830    ///
831    /// This function performs following replacements:
832    ///
833    /// | Character | Replacement
834    /// |-----------|------------
835    /// | `<`       | `&lt;`
836    /// | `>`       | `&gt;`
837    /// | `&`       | `&amp;`
838    pub fn partial_escape(self) -> Result<BytesText<'a>, EncodingError> {
839        let decoded = self.decode()?;
840        Ok(BytesText::wrap(
841            match partial_escape(decoded) {
842                Cow::Borrowed(escaped) => Cow::Borrowed(escaped.as_bytes()),
843                Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
844            },
845            Decoder::utf8(),
846        ))
847    }
848
849    /// Converts this CDATA content to an escaped version, that can be written
850    /// as an usual text in XML. This method escapes only those characters that
851    /// must be escaped according to the [specification].
852    ///
853    /// This function performs following replacements:
854    ///
855    /// | Character | Replacement
856    /// |-----------|------------
857    /// | `<`       | `&lt;`
858    /// | `&`       | `&amp;`
859    ///
860    /// [specification]: https://www.w3.org/TR/xml11/#syntax
861    pub fn minimal_escape(self) -> Result<BytesText<'a>, EncodingError> {
862        let decoded = self.decode()?;
863        Ok(BytesText::wrap(
864            match minimal_escape(decoded) {
865                Cow::Borrowed(escaped) => Cow::Borrowed(escaped.as_bytes()),
866                Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
867            },
868            Decoder::utf8(),
869        ))
870    }
871
872    /// Decodes the raw input byte content of the CDATA section into a string,
873    /// without performing XML entity escaping.
874    ///
875    /// When this event produced by the XML reader, it uses the encoding information
876    /// associated with that reader to interpret the raw bytes contained within this
877    /// CDATA event.
878    ///
879    /// This method does not normalizes end-of-line characters as required by [specification].
880    /// Usually you need [`xml_content()`](Self::xml_content) instead of this method.
881    ///
882    /// [specification]: https://www.w3.org/TR/xml11/#sec-line-ends
883    pub fn decode(&self) -> Result<Cow<'a, str>, EncodingError> {
884        self.decoder.decode_cow(&self.content)
885    }
886
887    /// Decodes the raw input byte content of the CDATA section of the XML event
888    /// into a string.
889    ///
890    /// When this event produced by the reader, it uses the encoding information
891    /// associated with that reader to interpret the raw bytes contained within
892    /// this CDATA event.
893    ///
894    /// This will allocate if the value in non-UTF-8 encoding, or EOL normalization
895    /// is required.
896    ///
897    /// Note, that this method should be used only if event represents XML content,
898    /// because rules for normalizing EOLs for [XML] and [HTML] differs.
899    ///
900    /// To get HTML content use [`html_content()`](Self::html_content).
901    ///
902    /// [XML]: https://www.w3.org/TR/xml11/#sec-line-ends
903    /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines
904    pub fn xml_content(&self) -> Result<Cow<'a, str>, EncodingError> {
905        self.decoder.content(&self.content, normalize_xml_eols)
906    }
907
908    /// Decodes the raw input byte content of the CDATA section of the HTML event
909    /// into a string.
910    ///
911    /// When this event produced by the reader, it uses the encoding information
912    /// associated with that reader to interpret the raw bytes contained within
913    /// this CDATA event.
914    ///
915    /// This will allocate if the value in non-UTF-8 encoding, or EOL normalization
916    /// is required.
917    ///
918    /// Note, that this method should be used only if event represents HTML content,
919    /// because rules for normalizing EOLs for [XML] and [HTML] differs.
920    ///
921    /// To get XML content use [`xml_content()`](Self::xml_content).
922    ///
923    /// [XML]: https://www.w3.org/TR/xml11/#sec-line-ends
924    /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines
925    pub fn html_content(&self) -> Result<Cow<'a, str>, EncodingError> {
926        self.decoder.content(&self.content, normalize_html_eols)
927    }
928}
929
930impl<'a> Debug for BytesCData<'a> {
931    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
932        write!(f, "BytesCData {{ content: ")?;
933        write_cow_string(f, &self.content)?;
934        write!(f, " }}")
935    }
936}
937
938impl<'a> Deref for BytesCData<'a> {
939    type Target = [u8];
940
941    fn deref(&self) -> &[u8] {
942        &self.content
943    }
944}
945
946#[cfg(feature = "arbitrary")]
947impl<'a> arbitrary::Arbitrary<'a> for BytesCData<'a> {
948    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
949        Ok(Self::new(<&str>::arbitrary(u)?))
950    }
951    fn size_hint(depth: usize) -> (usize, Option<usize>) {
952        return <&str as arbitrary::Arbitrary>::size_hint(depth);
953    }
954}
955
956/// Iterator over `CDATA` sections in a string.
957///
958/// This iterator is created by the [`BytesCData::escaped`] method.
959#[derive(Clone)]
960pub struct CDataIterator<'a> {
961    /// The unprocessed data which should be emitted as `BytesCData` events.
962    /// At each iteration, the processed data is cut from this slice.
963    unprocessed: &'a [u8],
964    finished: bool,
965}
966
967impl<'a> Debug for CDataIterator<'a> {
968    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
969        f.debug_struct("CDataIterator")
970            .field("unprocessed", &Bytes(self.unprocessed))
971            .field("finished", &self.finished)
972            .finish()
973    }
974}
975
976impl<'a> Iterator for CDataIterator<'a> {
977    type Item = BytesCData<'a>;
978
979    fn next(&mut self) -> Option<BytesCData<'a>> {
980        if self.finished {
981            return None;
982        }
983
984        for gt in memchr::memchr_iter(b'>', self.unprocessed) {
985            if self.unprocessed[..gt].ends_with(b"]]") {
986                let (slice, rest) = self.unprocessed.split_at(gt);
987                self.unprocessed = rest;
988                return Some(BytesCData::wrap(slice, Decoder::utf8()));
989            }
990        }
991
992        self.finished = true;
993        Some(BytesCData::wrap(self.unprocessed, Decoder::utf8()))
994    }
995}
996
997impl FusedIterator for CDataIterator<'_> {}
998
999////////////////////////////////////////////////////////////////////////////////////////////////////
1000
1001/// [Processing instructions][PI] (PIs) allow documents to contain instructions for applications.
1002///
1003/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
1004/// returns the content of this event between `<?` and `?>`.
1005///
1006/// Note, that inner text will not contain `?>` sequence inside:
1007///
1008/// ```
1009/// # use quick_xml::events::{BytesPI, Event};
1010/// # use quick_xml::reader::Reader;
1011/// # use pretty_assertions::assert_eq;
1012/// let mut reader = Reader::from_str("<?processing instruction >:-<~ ?>");
1013/// let content = "processing instruction >:-<~ ";
1014/// let event = BytesPI::new(content);
1015///
1016/// assert_eq!(reader.read_event().unwrap(), Event::PI(event.borrow()));
1017/// // deref coercion of &BytesPI to &[u8]
1018/// assert_eq!(&event as &[u8], content.as_bytes());
1019/// // AsRef<[u8]> for &T + deref coercion
1020/// assert_eq!(event.as_ref(), content.as_bytes());
1021/// ```
1022///
1023/// [PI]: https://www.w3.org/TR/xml11/#sec-pi
1024#[derive(Clone, Eq, PartialEq)]
1025pub struct BytesPI<'a> {
1026    content: BytesStart<'a>,
1027}
1028
1029impl<'a> BytesPI<'a> {
1030    /// Creates a new `BytesPI` from a byte sequence in the specified encoding.
1031    #[inline]
1032    pub(crate) const fn wrap(content: &'a [u8], target_len: usize, decoder: Decoder) -> Self {
1033        Self {
1034            content: BytesStart::wrap(content, target_len, decoder),
1035        }
1036    }
1037
1038    /// Creates a new `BytesPI` from a string.
1039    ///
1040    /// # Warning
1041    ///
1042    /// `content` must not contain the `?>` sequence.
1043    #[inline]
1044    pub fn new<C: Into<Cow<'a, str>>>(content: C) -> Self {
1045        let buf = str_cow_to_bytes(content);
1046        let name_len = name_len(&buf);
1047        Self {
1048            content: BytesStart {
1049                buf,
1050                name_len,
1051                decoder: Decoder::utf8(),
1052            },
1053        }
1054    }
1055
1056    /// Ensures that all data is owned to extend the object's lifetime if
1057    /// necessary.
1058    #[inline]
1059    pub fn into_owned(self) -> BytesPI<'static> {
1060        BytesPI {
1061            content: self.content.into_owned().into(),
1062        }
1063    }
1064
1065    /// Extracts the inner `Cow` from the `BytesPI` event container.
1066    #[inline]
1067    pub fn into_inner(self) -> Cow<'a, [u8]> {
1068        self.content.buf
1069    }
1070
1071    /// Converts the event into a borrowed event.
1072    #[inline]
1073    pub fn borrow(&self) -> BytesPI {
1074        BytesPI {
1075            content: self.content.borrow(),
1076        }
1077    }
1078
1079    /// A target used to identify the application to which the instruction is directed.
1080    ///
1081    /// # Example
1082    ///
1083    /// ```
1084    /// # use pretty_assertions::assert_eq;
1085    /// use quick_xml::events::BytesPI;
1086    ///
1087    /// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#);
1088    /// assert_eq!(instruction.target(), b"xml-stylesheet");
1089    /// ```
1090    #[inline]
1091    pub fn target(&self) -> &[u8] {
1092        self.content.name().0
1093    }
1094
1095    /// Content of the processing instruction. Contains everything between target
1096    /// name and the end of the instruction. A direct consequence is that the first
1097    /// character is always a space character.
1098    ///
1099    /// # Example
1100    ///
1101    /// ```
1102    /// # use pretty_assertions::assert_eq;
1103    /// use quick_xml::events::BytesPI;
1104    ///
1105    /// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#);
1106    /// assert_eq!(instruction.content(), br#" href="style.css""#);
1107    /// ```
1108    #[inline]
1109    pub fn content(&self) -> &[u8] {
1110        self.content.attributes_raw()
1111    }
1112
1113    /// A view of the processing instructions' content as a list of key-value pairs.
1114    ///
1115    /// Key-value pairs are used in some processing instructions, for example in
1116    /// `<?xml-stylesheet?>`.
1117    ///
1118    /// Returned iterator does not validate attribute values as may required by
1119    /// target's rules. For example, it doesn't check that substring `?>` is not
1120    /// present in the attribute value. That shouldn't be the problem when event
1121    /// is produced by the reader, because reader detects end of processing instruction
1122    /// by the first `?>` sequence, as required by the specification, and therefore
1123    /// this sequence cannot appear inside it.
1124    ///
1125    /// # Example
1126    ///
1127    /// ```
1128    /// # use pretty_assertions::assert_eq;
1129    /// use std::borrow::Cow;
1130    /// use quick_xml::events::attributes::Attribute;
1131    /// use quick_xml::events::BytesPI;
1132    /// use quick_xml::name::QName;
1133    ///
1134    /// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#);
1135    /// for attr in instruction.attributes() {
1136    ///     assert_eq!(attr, Ok(Attribute {
1137    ///         key: QName(b"href"),
1138    ///         value: Cow::Borrowed(b"style.css"),
1139    ///     }));
1140    /// }
1141    /// ```
1142    #[inline]
1143    pub fn attributes(&self) -> Attributes {
1144        self.content.attributes()
1145    }
1146}
1147
1148impl<'a> Debug for BytesPI<'a> {
1149    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1150        write!(f, "BytesPI {{ content: ")?;
1151        write_cow_string(f, &self.content.buf)?;
1152        write!(f, " }}")
1153    }
1154}
1155
1156impl<'a> Deref for BytesPI<'a> {
1157    type Target = [u8];
1158
1159    fn deref(&self) -> &[u8] {
1160        &self.content
1161    }
1162}
1163
1164#[cfg(feature = "arbitrary")]
1165impl<'a> arbitrary::Arbitrary<'a> for BytesPI<'a> {
1166    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
1167        Ok(Self::new(<&str>::arbitrary(u)?))
1168    }
1169    fn size_hint(depth: usize) -> (usize, Option<usize>) {
1170        return <&str as arbitrary::Arbitrary>::size_hint(depth);
1171    }
1172}
1173
1174////////////////////////////////////////////////////////////////////////////////////////////////////
1175
1176/// An XML declaration (`Event::Decl`).
1177///
1178/// [W3C XML 1.1 Prolog and Document Type Declaration](http://w3.org/TR/xml11/#sec-prolog-dtd)
1179///
1180/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
1181/// returns the content of this event between `<?` and `?>`.
1182///
1183/// Note, that inner text will not contain `?>` sequence inside:
1184///
1185/// ```
1186/// # use quick_xml::events::{BytesDecl, BytesStart, Event};
1187/// # use quick_xml::reader::Reader;
1188/// # use pretty_assertions::assert_eq;
1189/// let mut reader = Reader::from_str("<?xml version = '1.0' ?>");
1190/// let content = "xml version = '1.0' ";
1191/// let event = BytesDecl::from_start(BytesStart::from_content(content, 3));
1192///
1193/// assert_eq!(reader.read_event().unwrap(), Event::Decl(event.borrow()));
1194/// // deref coercion of &BytesDecl to &[u8]
1195/// assert_eq!(&event as &[u8], content.as_bytes());
1196/// // AsRef<[u8]> for &T + deref coercion
1197/// assert_eq!(event.as_ref(), content.as_bytes());
1198/// ```
1199#[derive(Clone, Debug, Eq, PartialEq)]
1200pub struct BytesDecl<'a> {
1201    content: BytesStart<'a>,
1202}
1203
1204impl<'a> BytesDecl<'a> {
1205    /// Constructs a new `XmlDecl` from the (mandatory) _version_ (should be `1.0` or `1.1`),
1206    /// the optional _encoding_ (e.g., `UTF-8`) and the optional _standalone_ (`yes` or `no`)
1207    /// attribute.
1208    ///
1209    /// Does not escape any of its inputs. Always uses double quotes to wrap the attribute values.
1210    /// The caller is responsible for escaping attribute values. Shouldn't usually be relevant since
1211    /// the double quote character is not allowed in any of the attribute values.
1212    pub fn new(
1213        version: &str,
1214        encoding: Option<&str>,
1215        standalone: Option<&str>,
1216    ) -> BytesDecl<'static> {
1217        // Compute length of the buffer based on supplied attributes
1218        // ' encoding=""'   => 12
1219        let encoding_attr_len = if let Some(xs) = encoding {
1220            12 + xs.len()
1221        } else {
1222            0
1223        };
1224        // ' standalone=""' => 14
1225        let standalone_attr_len = if let Some(xs) = standalone {
1226            14 + xs.len()
1227        } else {
1228            0
1229        };
1230        // 'xml version=""' => 14
1231        let mut buf = String::with_capacity(14 + encoding_attr_len + standalone_attr_len);
1232
1233        buf.push_str("xml version=\"");
1234        buf.push_str(version);
1235
1236        if let Some(encoding_val) = encoding {
1237            buf.push_str("\" encoding=\"");
1238            buf.push_str(encoding_val);
1239        }
1240
1241        if let Some(standalone_val) = standalone {
1242            buf.push_str("\" standalone=\"");
1243            buf.push_str(standalone_val);
1244        }
1245        buf.push('"');
1246
1247        BytesDecl {
1248            content: BytesStart::from_content(buf, 3),
1249        }
1250    }
1251
1252    /// Creates a `BytesDecl` from a `BytesStart`
1253    pub const fn from_start(start: BytesStart<'a>) -> Self {
1254        Self { content: start }
1255    }
1256
1257    /// Gets xml version, excluding quotes (`'` or `"`).
1258    ///
1259    /// According to the [grammar], the version *must* be the first thing in the declaration.
1260    /// This method tries to extract the first thing in the declaration and return it.
1261    /// In case of multiple attributes value of the first one is returned.
1262    ///
1263    /// If version is missed in the declaration, or the first thing is not a version,
1264    /// [`IllFormedError::MissingDeclVersion`] will be returned.
1265    ///
1266    /// # Examples
1267    ///
1268    /// ```
1269    /// use quick_xml::errors::{Error, IllFormedError};
1270    /// use quick_xml::events::{BytesDecl, BytesStart};
1271    ///
1272    /// // <?xml version='1.1'?>
1273    /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0));
1274    /// assert_eq!(decl.version().unwrap(), b"1.1".as_ref());
1275    ///
1276    /// // <?xml version='1.0' version='1.1'?>
1277    /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.0' version='1.1'", 0));
1278    /// assert_eq!(decl.version().unwrap(), b"1.0".as_ref());
1279    ///
1280    /// // <?xml encoding='utf-8'?>
1281    /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8'", 0));
1282    /// match decl.version() {
1283    ///     Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(key)))) => assert_eq!(key, "encoding"),
1284    ///     _ => assert!(false),
1285    /// }
1286    ///
1287    /// // <?xml encoding='utf-8' version='1.1'?>
1288    /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8' version='1.1'", 0));
1289    /// match decl.version() {
1290    ///     Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(key)))) => assert_eq!(key, "encoding"),
1291    ///     _ => assert!(false),
1292    /// }
1293    ///
1294    /// // <?xml?>
1295    /// let decl = BytesDecl::from_start(BytesStart::from_content("", 0));
1296    /// match decl.version() {
1297    ///     Err(Error::IllFormed(IllFormedError::MissingDeclVersion(None))) => {},
1298    ///     _ => assert!(false),
1299    /// }
1300    /// ```
1301    ///
1302    /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
1303    pub fn version(&self) -> Result<Cow<[u8]>, Error> {
1304        // The version *must* be the first thing in the declaration.
1305        match self.content.attributes().with_checks(false).next() {
1306            Some(Ok(a)) if a.key.as_ref() == b"version" => Ok(a.value),
1307            // first attribute was not "version"
1308            Some(Ok(a)) => {
1309                let found = from_utf8(a.key.as_ref())
1310                    .map_err(|_| IllFormedError::MissingDeclVersion(None))?
1311                    .to_string();
1312                Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(
1313                    found,
1314                ))))
1315            }
1316            // error parsing attributes
1317            Some(Err(e)) => Err(e.into()),
1318            // no attributes
1319            None => Err(Error::IllFormed(IllFormedError::MissingDeclVersion(None))),
1320        }
1321    }
1322
1323    /// Gets xml encoding, excluding quotes (`'` or `"`).
1324    ///
1325    /// Although according to the [grammar] encoding must appear before `"standalone"`
1326    /// and after `"version"`, this method does not check that. The first occurrence
1327    /// of the attribute will be returned even if there are several. Also, method does
1328    /// not restrict symbols that can forming the encoding, so the returned encoding
1329    /// name may not correspond to the grammar.
1330    ///
1331    /// # Examples
1332    ///
1333    /// ```
1334    /// use std::borrow::Cow;
1335    /// use quick_xml::Error;
1336    /// use quick_xml::events::{BytesDecl, BytesStart};
1337    ///
1338    /// // <?xml version='1.1'?>
1339    /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0));
1340    /// assert!(decl.encoding().is_none());
1341    ///
1342    /// // <?xml encoding='utf-8'?>
1343    /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8'", 0));
1344    /// match decl.encoding() {
1345    ///     Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"utf-8"),
1346    ///     _ => assert!(false),
1347    /// }
1348    ///
1349    /// // <?xml encoding='something_WRONG' encoding='utf-8'?>
1350    /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='something_WRONG' encoding='utf-8'", 0));
1351    /// match decl.encoding() {
1352    ///     Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"something_WRONG"),
1353    ///     _ => assert!(false),
1354    /// }
1355    /// ```
1356    ///
1357    /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
1358    pub fn encoding(&self) -> Option<Result<Cow<[u8]>, AttrError>> {
1359        self.content
1360            .try_get_attribute("encoding")
1361            .map(|a| a.map(|a| a.value))
1362            .transpose()
1363    }
1364
1365    /// Gets xml standalone, excluding quotes (`'` or `"`).
1366    ///
1367    /// Although according to the [grammar] standalone flag must appear after `"version"`
1368    /// and `"encoding"`, this method does not check that. The first occurrence of the
1369    /// attribute will be returned even if there are several. Also, method does not
1370    /// restrict symbols that can forming the value, so the returned flag name may not
1371    /// correspond to the grammar.
1372    ///
1373    /// # Examples
1374    ///
1375    /// ```
1376    /// use std::borrow::Cow;
1377    /// use quick_xml::Error;
1378    /// use quick_xml::events::{BytesDecl, BytesStart};
1379    ///
1380    /// // <?xml version='1.1'?>
1381    /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0));
1382    /// assert!(decl.standalone().is_none());
1383    ///
1384    /// // <?xml standalone='yes'?>
1385    /// let decl = BytesDecl::from_start(BytesStart::from_content(" standalone='yes'", 0));
1386    /// match decl.standalone() {
1387    ///     Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"yes"),
1388    ///     _ => assert!(false),
1389    /// }
1390    ///
1391    /// // <?xml standalone='something_WRONG' encoding='utf-8'?>
1392    /// let decl = BytesDecl::from_start(BytesStart::from_content(" standalone='something_WRONG' encoding='utf-8'", 0));
1393    /// match decl.standalone() {
1394    ///     Some(Ok(Cow::Borrowed(flag))) => assert_eq!(flag, b"something_WRONG"),
1395    ///     _ => assert!(false),
1396    /// }
1397    /// ```
1398    ///
1399    /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
1400    pub fn standalone(&self) -> Option<Result<Cow<[u8]>, AttrError>> {
1401        self.content
1402            .try_get_attribute("standalone")
1403            .map(|a| a.map(|a| a.value))
1404            .transpose()
1405    }
1406
1407    /// Gets the actual encoding using [_get an encoding_](https://encoding.spec.whatwg.org/#concept-encoding-get)
1408    /// algorithm.
1409    ///
1410    /// If encoding in not known, or `encoding` key was not found, returns `None`.
1411    /// In case of duplicated `encoding` key, encoding, corresponding to the first
1412    /// one, is returned.
1413    #[cfg(feature = "encoding")]
1414    pub fn encoder(&self) -> Option<&'static Encoding> {
1415        self.encoding()
1416            .and_then(|e| e.ok())
1417            .and_then(|e| Encoding::for_label(&e))
1418    }
1419
1420    /// Converts the event into an owned event.
1421    pub fn into_owned(self) -> BytesDecl<'static> {
1422        BytesDecl {
1423            content: self.content.into_owned(),
1424        }
1425    }
1426
1427    /// Converts the event into a borrowed event.
1428    #[inline]
1429    pub fn borrow(&self) -> BytesDecl {
1430        BytesDecl {
1431            content: self.content.borrow(),
1432        }
1433    }
1434}
1435
1436impl<'a> Deref for BytesDecl<'a> {
1437    type Target = [u8];
1438
1439    fn deref(&self) -> &[u8] {
1440        &self.content
1441    }
1442}
1443
1444#[cfg(feature = "arbitrary")]
1445impl<'a> arbitrary::Arbitrary<'a> for BytesDecl<'a> {
1446    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
1447        Ok(Self::new(
1448            <&str>::arbitrary(u)?,
1449            Option::<&str>::arbitrary(u)?,
1450            Option::<&str>::arbitrary(u)?,
1451        ))
1452    }
1453
1454    fn size_hint(depth: usize) -> (usize, Option<usize>) {
1455        return <&str as arbitrary::Arbitrary>::size_hint(depth);
1456    }
1457}
1458
1459////////////////////////////////////////////////////////////////////////////////////////////////////
1460
1461/// Character or general entity reference (`Event::GeneralRef`): `&ref;` or `&#<number>;`.
1462///
1463/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
1464/// returns the content of this event between `&` and `;`:
1465///
1466/// ```
1467/// # use quick_xml::events::{BytesRef, Event};
1468/// # use quick_xml::reader::Reader;
1469/// # use pretty_assertions::assert_eq;
1470/// let mut reader = Reader::from_str(r#"&entity;"#);
1471/// let content = "entity";
1472/// let event = BytesRef::new(content);
1473///
1474/// assert_eq!(reader.read_event().unwrap(), Event::GeneralRef(event.borrow()));
1475/// // deref coercion of &BytesRef to &[u8]
1476/// assert_eq!(&event as &[u8], content.as_bytes());
1477/// // AsRef<[u8]> for &T + deref coercion
1478/// assert_eq!(event.as_ref(), content.as_bytes());
1479/// ```
1480#[derive(Clone, Eq, PartialEq)]
1481pub struct BytesRef<'a> {
1482    content: Cow<'a, [u8]>,
1483    /// Encoding in which the `content` is stored inside the event.
1484    decoder: Decoder,
1485}
1486
1487impl<'a> BytesRef<'a> {
1488    /// Internal constructor, used by `Reader`. Supplies data in reader's encoding
1489    #[inline]
1490    pub(crate) const fn wrap(content: &'a [u8], decoder: Decoder) -> Self {
1491        Self {
1492            content: Cow::Borrowed(content),
1493            decoder,
1494        }
1495    }
1496
1497    /// Creates a new `BytesRef` borrowing a slice.
1498    ///
1499    /// # Warning
1500    ///
1501    /// `name` must be a valid name.
1502    #[inline]
1503    pub fn new<C: Into<Cow<'a, str>>>(name: C) -> Self {
1504        Self {
1505            content: str_cow_to_bytes(name),
1506            decoder: Decoder::utf8(),
1507        }
1508    }
1509
1510    /// Converts the event into an owned event.
1511    pub fn into_owned(self) -> BytesRef<'static> {
1512        BytesRef {
1513            content: Cow::Owned(self.content.into_owned()),
1514            decoder: self.decoder,
1515        }
1516    }
1517
1518    /// Extracts the inner `Cow` from the `BytesRef` event container.
1519    #[inline]
1520    pub fn into_inner(self) -> Cow<'a, [u8]> {
1521        self.content
1522    }
1523
1524    /// Converts the event into a borrowed event.
1525    #[inline]
1526    pub fn borrow(&self) -> BytesRef {
1527        BytesRef {
1528            content: Cow::Borrowed(&self.content),
1529            decoder: self.decoder,
1530        }
1531    }
1532
1533    /// Decodes the content of the event.
1534    ///
1535    /// This will allocate if the value contains any escape sequences or in
1536    /// non-UTF-8 encoding.
1537    ///
1538    /// This method does not normalizes end-of-line characters as required by [specification].
1539    /// Usually you need [`xml_content()`](Self::xml_content) instead of this method.
1540    ///
1541    /// [specification]: https://www.w3.org/TR/xml11/#sec-line-ends
1542    pub fn decode(&self) -> Result<Cow<'a, str>, EncodingError> {
1543        self.decoder.decode_cow(&self.content)
1544    }
1545
1546    /// Decodes the content of the XML event.
1547    ///
1548    /// When this event produced by the reader, it uses the encoding information
1549    /// associated with that reader to interpret the raw bytes contained within
1550    /// this general reference event.
1551    ///
1552    /// This will allocate if the value in non-UTF-8 encoding, or EOL normalization
1553    /// is required.
1554    ///
1555    /// Note, that this method should be used only if event represents XML content,
1556    /// because rules for normalizing EOLs for [XML] and [HTML] differs.
1557    ///
1558    /// To get HTML content use [`html_content()`](Self::html_content).
1559    ///
1560    /// [XML]: https://www.w3.org/TR/xml11/#sec-line-ends
1561    /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines
1562    pub fn xml_content(&self) -> Result<Cow<'a, str>, EncodingError> {
1563        self.decoder.content(&self.content, normalize_xml_eols)
1564    }
1565
1566    /// Decodes the content of the HTML event.
1567    ///
1568    /// When this event produced by the reader, it uses the encoding information
1569    /// associated with that reader to interpret the raw bytes contained within
1570    /// this general reference event.
1571    ///
1572    /// This will allocate if the value in non-UTF-8 encoding, or EOL normalization
1573    /// is required.
1574    ///
1575    /// Note, that this method should be used only if event represents HTML content,
1576    /// because rules for normalizing EOLs for [XML] and [HTML] differs.
1577    ///
1578    /// To get XML content use [`xml_content()`](Self::xml_content).
1579    ///
1580    /// [XML]: https://www.w3.org/TR/xml11/#sec-line-ends
1581    /// [HTML]: https://html.spec.whatwg.org/#normalize-newlines
1582    pub fn html_content(&self) -> Result<Cow<'a, str>, EncodingError> {
1583        self.decoder.content(&self.content, normalize_html_eols)
1584    }
1585
1586    /// Returns `true` if the specified reference represents the character reference
1587    /// (`&#<number>;`).
1588    ///
1589    /// ```
1590    /// # use quick_xml::events::BytesRef;
1591    /// # use pretty_assertions::assert_eq;
1592    /// assert_eq!(BytesRef::new("#x30").is_char_ref(), true);
1593    /// assert_eq!(BytesRef::new("#49" ).is_char_ref(), true);
1594    /// assert_eq!(BytesRef::new("lt"  ).is_char_ref(), false);
1595    /// ```
1596    pub fn is_char_ref(&self) -> bool {
1597        matches!(self.content.first(), Some(b'#'))
1598    }
1599
1600    /// If this reference represents character reference, then resolves it and
1601    /// returns the character, otherwise returns `None`.
1602    ///
1603    /// This method does not check if character is allowed for XML, in other words,
1604    /// well-formedness constraint [WFC: Legal Char] is not enforced.
1605    /// The character `0x0`, however, will return `EscapeError::InvalidCharRef`.
1606    ///
1607    /// ```
1608    /// # use quick_xml::events::BytesRef;
1609    /// # use pretty_assertions::assert_eq;
1610    /// assert_eq!(BytesRef::new("#x30").resolve_char_ref().unwrap(), Some('0'));
1611    /// assert_eq!(BytesRef::new("#49" ).resolve_char_ref().unwrap(), Some('1'));
1612    /// assert_eq!(BytesRef::new("lt"  ).resolve_char_ref().unwrap(), None);
1613    /// ```
1614    ///
1615    /// [WFC: Legal Char]: https://www.w3.org/TR/xml11/#wf-Legalchar
1616    pub fn resolve_char_ref(&self) -> Result<Option<char>, Error> {
1617        if let Some(num) = self.decode()?.strip_prefix('#') {
1618            let ch = parse_number(num).map_err(EscapeError::InvalidCharRef)?;
1619            return Ok(Some(ch));
1620        }
1621        Ok(None)
1622    }
1623}
1624
1625impl<'a> Debug for BytesRef<'a> {
1626    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1627        write!(f, "BytesRef {{ content: ")?;
1628        write_cow_string(f, &self.content)?;
1629        write!(f, " }}")
1630    }
1631}
1632
1633impl<'a> Deref for BytesRef<'a> {
1634    type Target = [u8];
1635
1636    fn deref(&self) -> &[u8] {
1637        &self.content
1638    }
1639}
1640
1641#[cfg(feature = "arbitrary")]
1642impl<'a> arbitrary::Arbitrary<'a> for BytesRef<'a> {
1643    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
1644        Ok(Self::new(<&str>::arbitrary(u)?))
1645    }
1646
1647    fn size_hint(depth: usize) -> (usize, Option<usize>) {
1648        <&str as arbitrary::Arbitrary>::size_hint(depth)
1649    }
1650}
1651
1652////////////////////////////////////////////////////////////////////////////////////////////////////
1653
1654/// Event emitted by [`Reader::read_event_into`].
1655///
1656/// [`Reader::read_event_into`]: crate::reader::Reader::read_event_into
1657#[derive(Clone, Debug, Eq, PartialEq)]
1658#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
1659pub enum Event<'a> {
1660    /// Start tag (with attributes) `<tag attr="value">`.
1661    Start(BytesStart<'a>),
1662    /// End tag `</tag>`.
1663    End(BytesEnd<'a>),
1664    /// Empty element tag (with attributes) `<tag attr="value" />`.
1665    Empty(BytesStart<'a>),
1666    /// Escaped character data between tags.
1667    Text(BytesText<'a>),
1668    /// Unescaped character data stored in `<![CDATA[...]]>`.
1669    CData(BytesCData<'a>),
1670    /// Comment `<!-- ... -->`.
1671    Comment(BytesText<'a>),
1672    /// XML declaration `<?xml ...?>`.
1673    Decl(BytesDecl<'a>),
1674    /// Processing instruction `<?...?>`.
1675    PI(BytesPI<'a>),
1676    /// Document type definition data (DTD) stored in `<!DOCTYPE ...>`.
1677    DocType(BytesText<'a>),
1678    /// General reference `&entity;` in the textual data. Can be either an entity
1679    /// reference, or a character reference.
1680    GeneralRef(BytesRef<'a>),
1681    /// End of XML document.
1682    Eof,
1683}
1684
1685impl<'a> Event<'a> {
1686    /// Converts the event to an owned version, untied to the lifetime of
1687    /// buffer used when reading but incurring a new, separate allocation.
1688    pub fn into_owned(self) -> Event<'static> {
1689        match self {
1690            Event::Start(e) => Event::Start(e.into_owned()),
1691            Event::End(e) => Event::End(e.into_owned()),
1692            Event::Empty(e) => Event::Empty(e.into_owned()),
1693            Event::Text(e) => Event::Text(e.into_owned()),
1694            Event::Comment(e) => Event::Comment(e.into_owned()),
1695            Event::CData(e) => Event::CData(e.into_owned()),
1696            Event::Decl(e) => Event::Decl(e.into_owned()),
1697            Event::PI(e) => Event::PI(e.into_owned()),
1698            Event::DocType(e) => Event::DocType(e.into_owned()),
1699            Event::GeneralRef(e) => Event::GeneralRef(e.into_owned()),
1700            Event::Eof => Event::Eof,
1701        }
1702    }
1703
1704    /// Converts the event into a borrowed event.
1705    #[inline]
1706    pub fn borrow(&self) -> Event {
1707        match self {
1708            Event::Start(e) => Event::Start(e.borrow()),
1709            Event::End(e) => Event::End(e.borrow()),
1710            Event::Empty(e) => Event::Empty(e.borrow()),
1711            Event::Text(e) => Event::Text(e.borrow()),
1712            Event::Comment(e) => Event::Comment(e.borrow()),
1713            Event::CData(e) => Event::CData(e.borrow()),
1714            Event::Decl(e) => Event::Decl(e.borrow()),
1715            Event::PI(e) => Event::PI(e.borrow()),
1716            Event::DocType(e) => Event::DocType(e.borrow()),
1717            Event::GeneralRef(e) => Event::GeneralRef(e.borrow()),
1718            Event::Eof => Event::Eof,
1719        }
1720    }
1721}
1722
1723impl<'a> Deref for Event<'a> {
1724    type Target = [u8];
1725
1726    fn deref(&self) -> &[u8] {
1727        match *self {
1728            Event::Start(ref e) | Event::Empty(ref e) => e,
1729            Event::End(ref e) => e,
1730            Event::Text(ref e) => e,
1731            Event::Decl(ref e) => e,
1732            Event::PI(ref e) => e,
1733            Event::CData(ref e) => e,
1734            Event::Comment(ref e) => e,
1735            Event::DocType(ref e) => e,
1736            Event::GeneralRef(ref e) => e,
1737            Event::Eof => &[],
1738        }
1739    }
1740}
1741
1742impl<'a> AsRef<Event<'a>> for Event<'a> {
1743    fn as_ref(&self) -> &Event<'a> {
1744        self
1745    }
1746}
1747
1748////////////////////////////////////////////////////////////////////////////////////////////////////
1749
1750#[inline]
1751fn str_cow_to_bytes<'a, C: Into<Cow<'a, str>>>(content: C) -> Cow<'a, [u8]> {
1752    match content.into() {
1753        Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()),
1754        Cow::Owned(s) => Cow::Owned(s.into_bytes()),
1755    }
1756}
1757
1758fn trim_cow<'a, F>(value: Cow<'a, [u8]>, trim: F) -> Cow<'a, [u8]>
1759where
1760    F: FnOnce(&[u8]) -> &[u8],
1761{
1762    match value {
1763        Cow::Borrowed(bytes) => Cow::Borrowed(trim(bytes)),
1764        Cow::Owned(mut bytes) => {
1765            let trimmed = trim(&bytes);
1766            if trimmed.len() != bytes.len() {
1767                bytes = trimmed.to_vec();
1768            }
1769            Cow::Owned(bytes)
1770        }
1771    }
1772}
1773
1774#[cfg(test)]
1775mod test {
1776    use super::*;
1777    use pretty_assertions::assert_eq;
1778
1779    #[test]
1780    fn bytestart_create() {
1781        let b = BytesStart::new("test");
1782        assert_eq!(b.len(), 4);
1783        assert_eq!(b.name(), QName(b"test"));
1784    }
1785
1786    #[test]
1787    fn bytestart_set_name() {
1788        let mut b = BytesStart::new("test");
1789        assert_eq!(b.len(), 4);
1790        assert_eq!(b.name(), QName(b"test"));
1791        assert_eq!(b.attributes_raw(), b"");
1792        b.push_attribute(("x", "a"));
1793        assert_eq!(b.len(), 10);
1794        assert_eq!(b.attributes_raw(), b" x=\"a\"");
1795        b.set_name(b"g");
1796        assert_eq!(b.len(), 7);
1797        assert_eq!(b.name(), QName(b"g"));
1798    }
1799
1800    #[test]
1801    fn bytestart_clear_attributes() {
1802        let mut b = BytesStart::new("test");
1803        b.push_attribute(("x", "y\"z"));
1804        b.push_attribute(("x", "y\"z"));
1805        b.clear_attributes();
1806        assert!(b.attributes().next().is_none());
1807        assert_eq!(b.len(), 4);
1808        assert_eq!(b.name(), QName(b"test"));
1809    }
1810}