quick_xml/events/
mod.rs

1//! Defines zero-copy XML events used throughout this library.
2//!
3//! A XML event often represents part of a XML element.
4//! They occur both during reading and writing and are
5//! usually used with the stream-oriented API.
6//!
7//! For example, the XML element
8//! ```xml
9//! <name attr="value">Inner text</name>
10//! ```
11//! consists of the three events `Start`, `Text` and `End`.
12//! They can also represent other parts in an XML document like the
13//! XML declaration. Each Event usually contains further information,
14//! like the tag name, the attribute or the inner text.
15//!
16//! See [`Event`] for a list of all possible events.
17//!
18//! # Reading
19//! When reading a XML stream, the events are emitted by [`Reader::read_event`]
20//! and [`Reader::read_event_into`]. You must listen
21//! for the different types of events you are interested in.
22//!
23//! See [`Reader`] for further information.
24//!
25//! # Writing
26//! When writing the XML document, you must create the XML element
27//! by constructing the events it consists of and pass them to the writer
28//! sequentially.
29//!
30//! See [`Writer`] for further information.
31//!
32//! [`Reader::read_event`]: crate::reader::Reader::read_event
33//! [`Reader::read_event_into`]: crate::reader::Reader::read_event_into
34//! [`Reader`]: crate::reader::Reader
35//! [`Writer`]: crate::writer::Writer
36//! [`Event`]: crate::events::Event
37
38pub mod attributes;
39
40#[cfg(feature = "encoding")]
41use encoding_rs::Encoding;
42use std::borrow::Cow;
43use std::fmt::{self, Debug, Formatter};
44use std::iter::FusedIterator;
45use std::mem::replace;
46use std::ops::Deref;
47use std::str::from_utf8;
48
49use crate::encoding::{Decoder, EncodingError};
50use crate::errors::{Error, IllFormedError};
51use crate::escape::{escape, minimal_escape, parse_number, partial_escape, EscapeError};
52use crate::name::{LocalName, QName};
53use crate::utils::{name_len, trim_xml_end, trim_xml_start, write_cow_string, Bytes};
54use attributes::{AttrError, Attribute, Attributes};
55
56/// Opening tag data (`Event::Start`), with optional attributes: `<name attr="value">`.
57///
58/// The name can be accessed using the [`name`] or [`local_name`] methods.
59/// An iterator over the attributes is returned by the [`attributes`] method.
60///
61/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
62/// returns the content of this event between `<` and `>` or `/>`:
63///
64/// ```
65/// # use quick_xml::events::{BytesStart, Event};
66/// # use quick_xml::reader::Reader;
67/// # use pretty_assertions::assert_eq;
68/// // Remember, that \ at the end of string literal strips
69/// // all space characters to the first non-space character
70/// let mut reader = Reader::from_str("\
71///     <element a1 = 'val1' a2=\"val2\" />\
72///     <element a1 = 'val1' a2=\"val2\" >"
73/// );
74/// let content = "element a1 = 'val1' a2=\"val2\" ";
75/// let event = BytesStart::from_content(content, 7);
76///
77/// assert_eq!(reader.read_event().unwrap(), Event::Empty(event.borrow()));
78/// assert_eq!(reader.read_event().unwrap(), Event::Start(event.borrow()));
79/// // deref coercion of &BytesStart to &[u8]
80/// assert_eq!(&event as &[u8], content.as_bytes());
81/// // AsRef<[u8]> for &T + deref coercion
82/// assert_eq!(event.as_ref(), content.as_bytes());
83/// ```
84///
85/// [`name`]: Self::name
86/// [`local_name`]: Self::local_name
87/// [`attributes`]: Self::attributes
88#[derive(Clone, Eq, PartialEq)]
89pub struct BytesStart<'a> {
90    /// content of the element, before any utf8 conversion
91    pub(crate) buf: Cow<'a, [u8]>,
92    /// end of the element name, the name starts at that the start of `buf`
93    pub(crate) name_len: usize,
94    /// Encoding used for `buf`
95    decoder: Decoder,
96}
97
98impl<'a> BytesStart<'a> {
99    /// Internal constructor, used by `Reader`. Supplies data in reader's encoding
100    #[inline]
101    pub(crate) const fn wrap(content: &'a [u8], name_len: usize, decoder: Decoder) -> Self {
102        BytesStart {
103            buf: Cow::Borrowed(content),
104            name_len,
105            decoder,
106        }
107    }
108
109    /// Creates a new `BytesStart` from the given name.
110    ///
111    /// # Warning
112    ///
113    /// `name` must be a valid name.
114    #[inline]
115    pub fn new<C: Into<Cow<'a, str>>>(name: C) -> Self {
116        let buf = str_cow_to_bytes(name);
117        BytesStart {
118            name_len: buf.len(),
119            buf,
120            decoder: Decoder::utf8(),
121        }
122    }
123
124    /// Creates a new `BytesStart` from the given content (name + attributes).
125    ///
126    /// # Warning
127    ///
128    /// `&content[..name_len]` must be a valid name, and the remainder of `content`
129    /// must be correctly-formed attributes. Neither are checked, it is possible
130    /// to generate invalid XML if `content` or `name_len` are incorrect.
131    #[inline]
132    pub fn from_content<C: Into<Cow<'a, str>>>(content: C, name_len: usize) -> Self {
133        BytesStart {
134            buf: str_cow_to_bytes(content),
135            name_len,
136            decoder: Decoder::utf8(),
137        }
138    }
139
140    /// Converts the event into an owned event.
141    pub fn into_owned(self) -> BytesStart<'static> {
142        BytesStart {
143            buf: Cow::Owned(self.buf.into_owned()),
144            name_len: self.name_len,
145            decoder: self.decoder,
146        }
147    }
148
149    /// Converts the event into an owned event without taking ownership of Event
150    pub fn to_owned(&self) -> BytesStart<'static> {
151        BytesStart {
152            buf: Cow::Owned(self.buf.clone().into_owned()),
153            name_len: self.name_len,
154            decoder: self.decoder,
155        }
156    }
157
158    /// Converts the event into a borrowed event. Most useful when paired with [`to_end`].
159    ///
160    /// # Example
161    ///
162    /// ```
163    /// use quick_xml::events::{BytesStart, Event};
164    /// # use quick_xml::writer::Writer;
165    /// # use quick_xml::Error;
166    ///
167    /// struct SomeStruct<'a> {
168    ///     attrs: BytesStart<'a>,
169    ///     // ...
170    /// }
171    /// # impl<'a> SomeStruct<'a> {
172    /// # fn example(&self) -> Result<(), Error> {
173    /// # let mut writer = Writer::new(Vec::new());
174    ///
175    /// writer.write_event(Event::Start(self.attrs.borrow()))?;
176    /// // ...
177    /// writer.write_event(Event::End(self.attrs.to_end()))?;
178    /// # Ok(())
179    /// # }}
180    /// ```
181    ///
182    /// [`to_end`]: Self::to_end
183    pub fn borrow(&self) -> BytesStart {
184        BytesStart {
185            buf: Cow::Borrowed(&self.buf),
186            name_len: self.name_len,
187            decoder: self.decoder,
188        }
189    }
190
191    /// Creates new paired close tag
192    #[inline]
193    pub fn to_end(&self) -> BytesEnd {
194        BytesEnd::from(self.name())
195    }
196
197    /// Get the decoder, used to decode bytes, read by the reader which produces
198    /// this event, to the strings.
199    ///
200    /// When event was created manually, encoding is UTF-8.
201    ///
202    /// If [`encoding`] feature is enabled and no encoding is specified in declaration,
203    /// defaults to UTF-8.
204    ///
205    /// [`encoding`]: ../index.html#encoding
206    #[inline]
207    pub const fn decoder(&self) -> Decoder {
208        self.decoder
209    }
210
211    /// Gets the undecoded raw tag name, as present in the input stream.
212    #[inline]
213    pub fn name(&self) -> QName {
214        QName(&self.buf[..self.name_len])
215    }
216
217    /// Gets the undecoded raw local tag name (excluding namespace) as present
218    /// in the input stream.
219    ///
220    /// All content up to and including the first `:` character is removed from the tag name.
221    #[inline]
222    pub fn local_name(&self) -> LocalName {
223        self.name().into()
224    }
225
226    /// Edit the name of the BytesStart in-place
227    ///
228    /// # Warning
229    ///
230    /// `name` must be a valid name.
231    pub fn set_name(&mut self, name: &[u8]) -> &mut BytesStart<'a> {
232        let bytes = self.buf.to_mut();
233        bytes.splice(..self.name_len, name.iter().cloned());
234        self.name_len = name.len();
235        self
236    }
237}
238
239/// Attribute-related methods
240impl<'a> BytesStart<'a> {
241    /// Consumes `self` and yield a new `BytesStart` with additional attributes from an iterator.
242    ///
243    /// The yielded items must be convertible to [`Attribute`] using `Into`.
244    pub fn with_attributes<'b, I>(mut self, attributes: I) -> Self
245    where
246        I: IntoIterator,
247        I::Item: Into<Attribute<'b>>,
248    {
249        self.extend_attributes(attributes);
250        self
251    }
252
253    /// Add additional attributes to this tag using an iterator.
254    ///
255    /// The yielded items must be convertible to [`Attribute`] using `Into`.
256    pub fn extend_attributes<'b, I>(&mut self, attributes: I) -> &mut BytesStart<'a>
257    where
258        I: IntoIterator,
259        I::Item: Into<Attribute<'b>>,
260    {
261        for attr in attributes {
262            self.push_attribute(attr);
263        }
264        self
265    }
266
267    /// Adds an attribute to this element.
268    pub fn push_attribute<'b, A>(&mut self, attr: A)
269    where
270        A: Into<Attribute<'b>>,
271    {
272        self.buf.to_mut().push(b' ');
273        self.push_attr(attr.into());
274    }
275
276    /// Remove all attributes from the ByteStart
277    pub fn clear_attributes(&mut self) -> &mut BytesStart<'a> {
278        self.buf.to_mut().truncate(self.name_len);
279        self
280    }
281
282    /// Returns an iterator over the attributes of this tag.
283    pub fn attributes(&self) -> Attributes {
284        Attributes::wrap(&self.buf, self.name_len, false, self.decoder)
285    }
286
287    /// Returns an iterator over the HTML-like attributes of this tag (no mandatory quotes or `=`).
288    pub fn html_attributes(&self) -> Attributes {
289        Attributes::wrap(&self.buf, self.name_len, true, self.decoder)
290    }
291
292    /// Gets the undecoded raw string with the attributes of this tag as a `&[u8]`,
293    /// including the whitespace after the tag name if there is any.
294    #[inline]
295    pub fn attributes_raw(&self) -> &[u8] {
296        &self.buf[self.name_len..]
297    }
298
299    /// Try to get an attribute
300    pub fn try_get_attribute<N: AsRef<[u8]> + Sized>(
301        &'a self,
302        attr_name: N,
303    ) -> Result<Option<Attribute<'a>>, AttrError> {
304        for a in self.attributes().with_checks(false) {
305            let a = a?;
306            if a.key.as_ref() == attr_name.as_ref() {
307                return Ok(Some(a));
308            }
309        }
310        Ok(None)
311    }
312
313    /// Adds an attribute to this element.
314    pub(crate) fn push_attr<'b>(&mut self, attr: Attribute<'b>) {
315        let bytes = self.buf.to_mut();
316        bytes.extend_from_slice(attr.key.as_ref());
317        bytes.extend_from_slice(b"=\"");
318        // FIXME: need to escape attribute content
319        bytes.extend_from_slice(attr.value.as_ref());
320        bytes.push(b'"');
321    }
322
323    /// Adds new line in existing element
324    pub(crate) fn push_newline(&mut self) {
325        self.buf.to_mut().push(b'\n');
326    }
327
328    /// Adds indentation bytes in existing element
329    pub(crate) fn push_indent(&mut self, indent: &[u8]) {
330        self.buf.to_mut().extend_from_slice(indent);
331    }
332}
333
334impl<'a> Debug for BytesStart<'a> {
335    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
336        write!(f, "BytesStart {{ buf: ")?;
337        write_cow_string(f, &self.buf)?;
338        write!(f, ", name_len: {} }}", self.name_len)
339    }
340}
341
342impl<'a> Deref for BytesStart<'a> {
343    type Target = [u8];
344
345    fn deref(&self) -> &[u8] {
346        &self.buf
347    }
348}
349
350#[cfg(feature = "arbitrary")]
351impl<'a> arbitrary::Arbitrary<'a> for BytesStart<'a> {
352    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
353        let s = <&str>::arbitrary(u)?;
354        if s.is_empty() || !s.chars().all(char::is_alphanumeric) {
355            return Err(arbitrary::Error::IncorrectFormat);
356        }
357        let mut result = Self::new(s);
358        result.extend_attributes(Vec::<(&str, &str)>::arbitrary(u)?.into_iter());
359        Ok(result)
360    }
361
362    fn size_hint(depth: usize) -> (usize, Option<usize>) {
363        return <&str as arbitrary::Arbitrary>::size_hint(depth);
364    }
365}
366
367////////////////////////////////////////////////////////////////////////////////////////////////////
368
369/// Closing tag data (`Event::End`): `</name>`.
370///
371/// The name can be accessed using the [`name`] or [`local_name`] methods.
372///
373/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
374/// returns the content of this event between `</` and `>`.
375///
376/// Note, that inner text will not contain `>` character inside:
377///
378/// ```
379/// # use quick_xml::events::{BytesEnd, Event};
380/// # use quick_xml::reader::Reader;
381/// # use pretty_assertions::assert_eq;
382/// let mut reader = Reader::from_str(r#"<element></element a1 = 'val1' a2="val2" >"#);
383/// // Note, that this entire string considered as a .name()
384/// let content = "element a1 = 'val1' a2=\"val2\" ";
385/// let event = BytesEnd::new(content);
386///
387/// reader.config_mut().trim_markup_names_in_closing_tags = false;
388/// reader.config_mut().check_end_names = false;
389/// reader.read_event().unwrap(); // Skip `<element>`
390///
391/// assert_eq!(reader.read_event().unwrap(), Event::End(event.borrow()));
392/// assert_eq!(event.name().as_ref(), content.as_bytes());
393/// // deref coercion of &BytesEnd to &[u8]
394/// assert_eq!(&event as &[u8], content.as_bytes());
395/// // AsRef<[u8]> for &T + deref coercion
396/// assert_eq!(event.as_ref(), content.as_bytes());
397/// ```
398///
399/// [`name`]: Self::name
400/// [`local_name`]: Self::local_name
401#[derive(Clone, Eq, PartialEq)]
402pub struct BytesEnd<'a> {
403    name: Cow<'a, [u8]>,
404}
405
406impl<'a> BytesEnd<'a> {
407    /// Internal constructor, used by `Reader`. Supplies data in reader's encoding
408    #[inline]
409    pub(crate) const fn wrap(name: Cow<'a, [u8]>) -> Self {
410        BytesEnd { name }
411    }
412
413    /// Creates a new `BytesEnd` borrowing a slice.
414    ///
415    /// # Warning
416    ///
417    /// `name` must be a valid name.
418    #[inline]
419    pub fn new<C: Into<Cow<'a, str>>>(name: C) -> Self {
420        Self::wrap(str_cow_to_bytes(name))
421    }
422
423    /// Converts the event into an owned event.
424    pub fn into_owned(self) -> BytesEnd<'static> {
425        BytesEnd {
426            name: Cow::Owned(self.name.into_owned()),
427        }
428    }
429
430    /// Converts the event into a borrowed event.
431    #[inline]
432    pub fn borrow(&self) -> BytesEnd {
433        BytesEnd {
434            name: Cow::Borrowed(&self.name),
435        }
436    }
437
438    /// Gets the undecoded raw tag name, as present in the input stream.
439    #[inline]
440    pub fn name(&self) -> QName {
441        QName(&self.name)
442    }
443
444    /// Gets the undecoded raw local tag name (excluding namespace) as present
445    /// in the input stream.
446    ///
447    /// All content up to and including the first `:` character is removed from the tag name.
448    #[inline]
449    pub fn local_name(&self) -> LocalName {
450        self.name().into()
451    }
452}
453
454impl<'a> Debug for BytesEnd<'a> {
455    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
456        write!(f, "BytesEnd {{ name: ")?;
457        write_cow_string(f, &self.name)?;
458        write!(f, " }}")
459    }
460}
461
462impl<'a> Deref for BytesEnd<'a> {
463    type Target = [u8];
464
465    fn deref(&self) -> &[u8] {
466        &self.name
467    }
468}
469
470impl<'a> From<QName<'a>> for BytesEnd<'a> {
471    #[inline]
472    fn from(name: QName<'a>) -> Self {
473        Self::wrap(name.into_inner().into())
474    }
475}
476
477#[cfg(feature = "arbitrary")]
478impl<'a> arbitrary::Arbitrary<'a> for BytesEnd<'a> {
479    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
480        Ok(Self::new(<&str>::arbitrary(u)?))
481    }
482    fn size_hint(depth: usize) -> (usize, Option<usize>) {
483        return <&str as arbitrary::Arbitrary>::size_hint(depth);
484    }
485}
486
487////////////////////////////////////////////////////////////////////////////////////////////////////
488
489/// Data from various events (most notably, `Event::Text`) that stored in XML
490/// in escaped form. Internally data is stored in escaped form.
491///
492/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
493/// returns the content of this event. In case of comment this is everything
494/// between `<!--` and `-->` and the text of comment will not contain `-->` inside.
495/// In case of DTD this is everything between `<!DOCTYPE` + spaces and closing `>`
496/// (i.e. in case of DTD the first character is never space):
497///
498/// ```
499/// # use quick_xml::events::{BytesText, Event};
500/// # use quick_xml::reader::Reader;
501/// # use pretty_assertions::assert_eq;
502/// // Remember, that \ at the end of string literal strips
503/// // all space characters to the first non-space character
504/// let mut reader = Reader::from_str("\
505///     <!DOCTYPE comment or text >\
506///     comment or text \
507///     <!--comment or text -->"
508/// );
509/// let content = "comment or text ";
510/// let event = BytesText::new(content);
511///
512/// assert_eq!(reader.read_event().unwrap(), Event::DocType(event.borrow()));
513/// assert_eq!(reader.read_event().unwrap(), Event::Text(event.borrow()));
514/// assert_eq!(reader.read_event().unwrap(), Event::Comment(event.borrow()));
515/// // deref coercion of &BytesText to &[u8]
516/// assert_eq!(&event as &[u8], content.as_bytes());
517/// // AsRef<[u8]> for &T + deref coercion
518/// assert_eq!(event.as_ref(), content.as_bytes());
519/// ```
520#[derive(Clone, Eq, PartialEq)]
521pub struct BytesText<'a> {
522    /// Escaped then encoded content of the event. Content is encoded in the XML
523    /// document encoding when event comes from the reader and should be in the
524    /// document encoding when event passed to the writer
525    content: Cow<'a, [u8]>,
526    /// Encoding in which the `content` is stored inside the event
527    decoder: Decoder,
528}
529
530impl<'a> BytesText<'a> {
531    /// Creates a new `BytesText` from an escaped byte sequence in the specified encoding.
532    #[inline]
533    pub(crate) fn wrap<C: Into<Cow<'a, [u8]>>>(content: C, decoder: Decoder) -> Self {
534        Self {
535            content: content.into(),
536            decoder,
537        }
538    }
539
540    /// Creates a new `BytesText` from an escaped string.
541    #[inline]
542    pub fn from_escaped<C: Into<Cow<'a, str>>>(content: C) -> Self {
543        Self::wrap(str_cow_to_bytes(content), Decoder::utf8())
544    }
545
546    /// Creates a new `BytesText` from a string. The string is expected not to
547    /// be escaped.
548    #[inline]
549    pub fn new(content: &'a str) -> Self {
550        Self::from_escaped(escape(content))
551    }
552
553    /// Ensures that all data is owned to extend the object's lifetime if
554    /// necessary.
555    #[inline]
556    pub fn into_owned(self) -> BytesText<'static> {
557        BytesText {
558            content: self.content.into_owned().into(),
559            decoder: self.decoder,
560        }
561    }
562
563    /// Extracts the inner `Cow` from the `BytesText` event container.
564    #[inline]
565    pub fn into_inner(self) -> Cow<'a, [u8]> {
566        self.content
567    }
568
569    /// Converts the event into a borrowed event.
570    #[inline]
571    pub fn borrow(&self) -> BytesText {
572        BytesText {
573            content: Cow::Borrowed(&self.content),
574            decoder: self.decoder,
575        }
576    }
577
578    /// Decodes the content of the event.
579    ///
580    /// This will allocate if the value contains any escape sequences or in
581    /// non-UTF-8 encoding.
582    pub fn decode(&self) -> Result<Cow<'a, str>, EncodingError> {
583        self.decoder.decode_cow(&self.content)
584    }
585
586    /// Removes leading XML whitespace bytes from text content.
587    ///
588    /// Returns `true` if content is empty after that
589    pub fn inplace_trim_start(&mut self) -> bool {
590        self.content = trim_cow(
591            replace(&mut self.content, Cow::Borrowed(b"")),
592            trim_xml_start,
593        );
594        self.content.is_empty()
595    }
596
597    /// Removes trailing XML whitespace bytes from text content.
598    ///
599    /// Returns `true` if content is empty after that
600    pub fn inplace_trim_end(&mut self) -> bool {
601        self.content = trim_cow(replace(&mut self.content, Cow::Borrowed(b"")), trim_xml_end);
602        self.content.is_empty()
603    }
604}
605
606impl<'a> Debug for BytesText<'a> {
607    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
608        write!(f, "BytesText {{ content: ")?;
609        write_cow_string(f, &self.content)?;
610        write!(f, " }}")
611    }
612}
613
614impl<'a> Deref for BytesText<'a> {
615    type Target = [u8];
616
617    fn deref(&self) -> &[u8] {
618        &self.content
619    }
620}
621
622#[cfg(feature = "arbitrary")]
623impl<'a> arbitrary::Arbitrary<'a> for BytesText<'a> {
624    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
625        let s = <&str>::arbitrary(u)?;
626        if !s.chars().all(char::is_alphanumeric) {
627            return Err(arbitrary::Error::IncorrectFormat);
628        }
629        Ok(Self::new(s))
630    }
631
632    fn size_hint(depth: usize) -> (usize, Option<usize>) {
633        return <&str as arbitrary::Arbitrary>::size_hint(depth);
634    }
635}
636
637////////////////////////////////////////////////////////////////////////////////////////////////////
638
639/// CDATA content contains unescaped data from the reader. If you want to write them as a text,
640/// [convert](Self::escape) it to [`BytesText`].
641///
642/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
643/// returns the content of this event between `<![CDATA[` and `]]>`.
644///
645/// Note, that inner text will not contain `]]>` sequence inside:
646///
647/// ```
648/// # use quick_xml::events::{BytesCData, Event};
649/// # use quick_xml::reader::Reader;
650/// # use pretty_assertions::assert_eq;
651/// let mut reader = Reader::from_str("<![CDATA[ CDATA section ]]>");
652/// let content = " CDATA section ";
653/// let event = BytesCData::new(content);
654///
655/// assert_eq!(reader.read_event().unwrap(), Event::CData(event.borrow()));
656/// // deref coercion of &BytesCData to &[u8]
657/// assert_eq!(&event as &[u8], content.as_bytes());
658/// // AsRef<[u8]> for &T + deref coercion
659/// assert_eq!(event.as_ref(), content.as_bytes());
660/// ```
661#[derive(Clone, Eq, PartialEq)]
662pub struct BytesCData<'a> {
663    content: Cow<'a, [u8]>,
664    /// Encoding in which the `content` is stored inside the event
665    decoder: Decoder,
666}
667
668impl<'a> BytesCData<'a> {
669    /// Creates a new `BytesCData` from a byte sequence in the specified encoding.
670    #[inline]
671    pub(crate) fn wrap<C: Into<Cow<'a, [u8]>>>(content: C, decoder: Decoder) -> Self {
672        Self {
673            content: content.into(),
674            decoder,
675        }
676    }
677
678    /// Creates a new `BytesCData` from a string.
679    ///
680    /// # Warning
681    ///
682    /// `content` must not contain the `]]>` sequence. You can use
683    /// [`BytesCData::escaped`] to escape the content instead.
684    #[inline]
685    pub fn new<C: Into<Cow<'a, str>>>(content: C) -> Self {
686        Self::wrap(str_cow_to_bytes(content), Decoder::utf8())
687    }
688
689    /// Creates an iterator of `BytesCData` from a string.
690    ///
691    /// If a string contains `]]>`, it needs to be split into multiple `CDATA`
692    /// sections, splitting the `]]` and `>` characters, because the CDATA closing
693    /// sequence cannot be escaped. This iterator yields a `BytesCData` instance
694    /// for each of those sections.
695    ///
696    /// # Examples
697    ///
698    /// ```
699    /// # use quick_xml::events::BytesCData;
700    /// # use pretty_assertions::assert_eq;
701    /// let content = "";
702    /// let cdata = BytesCData::escaped(content).collect::<Vec<_>>();
703    /// assert_eq!(cdata, &[BytesCData::new("")]);
704    ///
705    /// let content = "Certain tokens like ]]> can be difficult and <invalid>";
706    /// let cdata = BytesCData::escaped(content).collect::<Vec<_>>();
707    /// assert_eq!(cdata, &[
708    ///     BytesCData::new("Certain tokens like ]]"),
709    ///     BytesCData::new("> can be difficult and <invalid>"),
710    /// ]);
711    ///
712    /// let content = "foo]]>bar]]>baz]]>quux";
713    /// let cdata = BytesCData::escaped(content).collect::<Vec<_>>();
714    /// assert_eq!(cdata, &[
715    ///     BytesCData::new("foo]]"),
716    ///     BytesCData::new(">bar]]"),
717    ///     BytesCData::new(">baz]]"),
718    ///     BytesCData::new(">quux"),
719    /// ]);
720    /// ```
721    #[inline]
722    pub fn escaped(content: &'a str) -> CDataIterator<'a> {
723        CDataIterator {
724            unprocessed: content.as_bytes(),
725            finished: false,
726        }
727    }
728
729    /// Ensures that all data is owned to extend the object's lifetime if
730    /// necessary.
731    #[inline]
732    pub fn into_owned(self) -> BytesCData<'static> {
733        BytesCData {
734            content: self.content.into_owned().into(),
735            decoder: self.decoder,
736        }
737    }
738
739    /// Extracts the inner `Cow` from the `BytesCData` event container.
740    #[inline]
741    pub fn into_inner(self) -> Cow<'a, [u8]> {
742        self.content
743    }
744
745    /// Converts the event into a borrowed event.
746    #[inline]
747    pub fn borrow(&self) -> BytesCData {
748        BytesCData {
749            content: Cow::Borrowed(&self.content),
750            decoder: self.decoder,
751        }
752    }
753
754    /// Converts this CDATA content to an escaped version, that can be written
755    /// as an usual text in XML.
756    ///
757    /// This function performs following replacements:
758    ///
759    /// | Character | Replacement
760    /// |-----------|------------
761    /// | `<`       | `&lt;`
762    /// | `>`       | `&gt;`
763    /// | `&`       | `&amp;`
764    /// | `'`       | `&apos;`
765    /// | `"`       | `&quot;`
766    pub fn escape(self) -> Result<BytesText<'a>, EncodingError> {
767        let decoded = self.decode()?;
768        Ok(BytesText::wrap(
769            match escape(decoded) {
770                Cow::Borrowed(escaped) => Cow::Borrowed(escaped.as_bytes()),
771                Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
772            },
773            Decoder::utf8(),
774        ))
775    }
776
777    /// Converts this CDATA content to an escaped version, that can be written
778    /// as an usual text in XML.
779    ///
780    /// In XML text content, it is allowed (though not recommended) to leave
781    /// the quote special characters `"` and `'` unescaped.
782    ///
783    /// This function performs following replacements:
784    ///
785    /// | Character | Replacement
786    /// |-----------|------------
787    /// | `<`       | `&lt;`
788    /// | `>`       | `&gt;`
789    /// | `&`       | `&amp;`
790    pub fn partial_escape(self) -> Result<BytesText<'a>, EncodingError> {
791        let decoded = self.decode()?;
792        Ok(BytesText::wrap(
793            match partial_escape(decoded) {
794                Cow::Borrowed(escaped) => Cow::Borrowed(escaped.as_bytes()),
795                Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
796            },
797            Decoder::utf8(),
798        ))
799    }
800
801    /// Converts this CDATA content to an escaped version, that can be written
802    /// as an usual text in XML. This method escapes only those characters that
803    /// must be escaped according to the [specification].
804    ///
805    /// This function performs following replacements:
806    ///
807    /// | Character | Replacement
808    /// |-----------|------------
809    /// | `<`       | `&lt;`
810    /// | `&`       | `&amp;`
811    ///
812    /// [specification]: https://www.w3.org/TR/xml11/#syntax
813    pub fn minimal_escape(self) -> Result<BytesText<'a>, EncodingError> {
814        let decoded = self.decode()?;
815        Ok(BytesText::wrap(
816            match minimal_escape(decoded) {
817                Cow::Borrowed(escaped) => Cow::Borrowed(escaped.as_bytes()),
818                Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
819            },
820            Decoder::utf8(),
821        ))
822    }
823
824    /// Decodes the raw input byte content of the CDATA section into a string,
825    /// without performing XML entity escaping.
826    ///
827    /// When this event produced by the XML reader, it uses the encoding information
828    /// associated with that reader to interpret the raw bytes contained within this
829    /// CDATA event.
830    pub fn decode(&self) -> Result<Cow<'a, str>, EncodingError> {
831        Ok(self.decoder.decode_cow(&self.content)?)
832    }
833}
834
835impl<'a> Debug for BytesCData<'a> {
836    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
837        write!(f, "BytesCData {{ content: ")?;
838        write_cow_string(f, &self.content)?;
839        write!(f, " }}")
840    }
841}
842
843impl<'a> Deref for BytesCData<'a> {
844    type Target = [u8];
845
846    fn deref(&self) -> &[u8] {
847        &self.content
848    }
849}
850
851#[cfg(feature = "arbitrary")]
852impl<'a> arbitrary::Arbitrary<'a> for BytesCData<'a> {
853    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
854        Ok(Self::new(<&str>::arbitrary(u)?))
855    }
856    fn size_hint(depth: usize) -> (usize, Option<usize>) {
857        return <&str as arbitrary::Arbitrary>::size_hint(depth);
858    }
859}
860
861/// Iterator over `CDATA` sections in a string.
862///
863/// This iterator is created by the [`BytesCData::escaped`] method.
864#[derive(Clone)]
865pub struct CDataIterator<'a> {
866    /// The unprocessed data which should be emitted as `BytesCData` events.
867    /// At each iteration, the processed data is cut from this slice.
868    unprocessed: &'a [u8],
869    finished: bool,
870}
871
872impl<'a> Debug for CDataIterator<'a> {
873    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
874        f.debug_struct("CDataIterator")
875            .field("unprocessed", &Bytes(self.unprocessed))
876            .field("finished", &self.finished)
877            .finish()
878    }
879}
880
881impl<'a> Iterator for CDataIterator<'a> {
882    type Item = BytesCData<'a>;
883
884    fn next(&mut self) -> Option<BytesCData<'a>> {
885        if self.finished {
886            return None;
887        }
888
889        for gt in memchr::memchr_iter(b'>', self.unprocessed) {
890            if self.unprocessed[..gt].ends_with(b"]]") {
891                let (slice, rest) = self.unprocessed.split_at(gt);
892                self.unprocessed = rest;
893                return Some(BytesCData::wrap(slice, Decoder::utf8()));
894            }
895        }
896
897        self.finished = true;
898        Some(BytesCData::wrap(self.unprocessed, Decoder::utf8()))
899    }
900}
901
902impl FusedIterator for CDataIterator<'_> {}
903
904////////////////////////////////////////////////////////////////////////////////////////////////////
905
906/// [Processing instructions][PI] (PIs) allow documents to contain instructions for applications.
907///
908/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
909/// returns the content of this event between `<?` and `?>`.
910///
911/// Note, that inner text will not contain `?>` sequence inside:
912///
913/// ```
914/// # use quick_xml::events::{BytesPI, Event};
915/// # use quick_xml::reader::Reader;
916/// # use pretty_assertions::assert_eq;
917/// let mut reader = Reader::from_str("<?processing instruction >:-<~ ?>");
918/// let content = "processing instruction >:-<~ ";
919/// let event = BytesPI::new(content);
920///
921/// assert_eq!(reader.read_event().unwrap(), Event::PI(event.borrow()));
922/// // deref coercion of &BytesPI to &[u8]
923/// assert_eq!(&event as &[u8], content.as_bytes());
924/// // AsRef<[u8]> for &T + deref coercion
925/// assert_eq!(event.as_ref(), content.as_bytes());
926/// ```
927///
928/// [PI]: https://www.w3.org/TR/xml11/#sec-pi
929#[derive(Clone, Eq, PartialEq)]
930pub struct BytesPI<'a> {
931    content: BytesStart<'a>,
932}
933
934impl<'a> BytesPI<'a> {
935    /// Creates a new `BytesPI` from a byte sequence in the specified encoding.
936    #[inline]
937    pub(crate) const fn wrap(content: &'a [u8], target_len: usize, decoder: Decoder) -> Self {
938        Self {
939            content: BytesStart::wrap(content, target_len, decoder),
940        }
941    }
942
943    /// Creates a new `BytesPI` from a string.
944    ///
945    /// # Warning
946    ///
947    /// `content` must not contain the `?>` sequence.
948    #[inline]
949    pub fn new<C: Into<Cow<'a, str>>>(content: C) -> Self {
950        let buf = str_cow_to_bytes(content);
951        let name_len = name_len(&buf);
952        Self {
953            content: BytesStart {
954                buf,
955                name_len,
956                decoder: Decoder::utf8(),
957            },
958        }
959    }
960
961    /// Ensures that all data is owned to extend the object's lifetime if
962    /// necessary.
963    #[inline]
964    pub fn into_owned(self) -> BytesPI<'static> {
965        BytesPI {
966            content: self.content.into_owned().into(),
967        }
968    }
969
970    /// Extracts the inner `Cow` from the `BytesPI` event container.
971    #[inline]
972    pub fn into_inner(self) -> Cow<'a, [u8]> {
973        self.content.buf
974    }
975
976    /// Converts the event into a borrowed event.
977    #[inline]
978    pub fn borrow(&self) -> BytesPI {
979        BytesPI {
980            content: self.content.borrow(),
981        }
982    }
983
984    /// A target used to identify the application to which the instruction is directed.
985    ///
986    /// # Example
987    ///
988    /// ```
989    /// # use pretty_assertions::assert_eq;
990    /// use quick_xml::events::BytesPI;
991    ///
992    /// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#);
993    /// assert_eq!(instruction.target(), b"xml-stylesheet");
994    /// ```
995    #[inline]
996    pub fn target(&self) -> &[u8] {
997        self.content.name().0
998    }
999
1000    /// Content of the processing instruction. Contains everything between target
1001    /// name and the end of the instruction. A direct consequence is that the first
1002    /// character is always a space character.
1003    ///
1004    /// # Example
1005    ///
1006    /// ```
1007    /// # use pretty_assertions::assert_eq;
1008    /// use quick_xml::events::BytesPI;
1009    ///
1010    /// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#);
1011    /// assert_eq!(instruction.content(), br#" href="style.css""#);
1012    /// ```
1013    #[inline]
1014    pub fn content(&self) -> &[u8] {
1015        self.content.attributes_raw()
1016    }
1017
1018    /// A view of the processing instructions' content as a list of key-value pairs.
1019    ///
1020    /// Key-value pairs are used in some processing instructions, for example in
1021    /// `<?xml-stylesheet?>`.
1022    ///
1023    /// Returned iterator does not validate attribute values as may required by
1024    /// target's rules. For example, it doesn't check that substring `?>` is not
1025    /// present in the attribute value. That shouldn't be the problem when event
1026    /// is produced by the reader, because reader detects end of processing instruction
1027    /// by the first `?>` sequence, as required by the specification, and therefore
1028    /// this sequence cannot appear inside it.
1029    ///
1030    /// # Example
1031    ///
1032    /// ```
1033    /// # use pretty_assertions::assert_eq;
1034    /// use std::borrow::Cow;
1035    /// use quick_xml::events::attributes::Attribute;
1036    /// use quick_xml::events::BytesPI;
1037    /// use quick_xml::name::QName;
1038    ///
1039    /// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#);
1040    /// for attr in instruction.attributes() {
1041    ///     assert_eq!(attr, Ok(Attribute {
1042    ///         key: QName(b"href"),
1043    ///         value: Cow::Borrowed(b"style.css"),
1044    ///     }));
1045    /// }
1046    /// ```
1047    #[inline]
1048    pub fn attributes(&self) -> Attributes {
1049        self.content.attributes()
1050    }
1051}
1052
1053impl<'a> Debug for BytesPI<'a> {
1054    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1055        write!(f, "BytesPI {{ content: ")?;
1056        write_cow_string(f, &self.content.buf)?;
1057        write!(f, " }}")
1058    }
1059}
1060
1061impl<'a> Deref for BytesPI<'a> {
1062    type Target = [u8];
1063
1064    fn deref(&self) -> &[u8] {
1065        &self.content
1066    }
1067}
1068
1069#[cfg(feature = "arbitrary")]
1070impl<'a> arbitrary::Arbitrary<'a> for BytesPI<'a> {
1071    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
1072        Ok(Self::new(<&str>::arbitrary(u)?))
1073    }
1074    fn size_hint(depth: usize) -> (usize, Option<usize>) {
1075        return <&str as arbitrary::Arbitrary>::size_hint(depth);
1076    }
1077}
1078
1079////////////////////////////////////////////////////////////////////////////////////////////////////
1080
1081/// An XML declaration (`Event::Decl`).
1082///
1083/// [W3C XML 1.1 Prolog and Document Type Declaration](http://w3.org/TR/xml11/#sec-prolog-dtd)
1084///
1085/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
1086/// returns the content of this event between `<?` and `?>`.
1087///
1088/// Note, that inner text will not contain `?>` sequence inside:
1089///
1090/// ```
1091/// # use quick_xml::events::{BytesDecl, BytesStart, Event};
1092/// # use quick_xml::reader::Reader;
1093/// # use pretty_assertions::assert_eq;
1094/// let mut reader = Reader::from_str("<?xml version = '1.0' ?>");
1095/// let content = "xml version = '1.0' ";
1096/// let event = BytesDecl::from_start(BytesStart::from_content(content, 3));
1097///
1098/// assert_eq!(reader.read_event().unwrap(), Event::Decl(event.borrow()));
1099/// // deref coercion of &BytesDecl to &[u8]
1100/// assert_eq!(&event as &[u8], content.as_bytes());
1101/// // AsRef<[u8]> for &T + deref coercion
1102/// assert_eq!(event.as_ref(), content.as_bytes());
1103/// ```
1104#[derive(Clone, Debug, Eq, PartialEq)]
1105pub struct BytesDecl<'a> {
1106    content: BytesStart<'a>,
1107}
1108
1109impl<'a> BytesDecl<'a> {
1110    /// Constructs a new `XmlDecl` from the (mandatory) _version_ (should be `1.0` or `1.1`),
1111    /// the optional _encoding_ (e.g., `UTF-8`) and the optional _standalone_ (`yes` or `no`)
1112    /// attribute.
1113    ///
1114    /// Does not escape any of its inputs. Always uses double quotes to wrap the attribute values.
1115    /// The caller is responsible for escaping attribute values. Shouldn't usually be relevant since
1116    /// the double quote character is not allowed in any of the attribute values.
1117    pub fn new(
1118        version: &str,
1119        encoding: Option<&str>,
1120        standalone: Option<&str>,
1121    ) -> BytesDecl<'static> {
1122        // Compute length of the buffer based on supplied attributes
1123        // ' encoding=""'   => 12
1124        let encoding_attr_len = if let Some(xs) = encoding {
1125            12 + xs.len()
1126        } else {
1127            0
1128        };
1129        // ' standalone=""' => 14
1130        let standalone_attr_len = if let Some(xs) = standalone {
1131            14 + xs.len()
1132        } else {
1133            0
1134        };
1135        // 'xml version=""' => 14
1136        let mut buf = String::with_capacity(14 + encoding_attr_len + standalone_attr_len);
1137
1138        buf.push_str("xml version=\"");
1139        buf.push_str(version);
1140
1141        if let Some(encoding_val) = encoding {
1142            buf.push_str("\" encoding=\"");
1143            buf.push_str(encoding_val);
1144        }
1145
1146        if let Some(standalone_val) = standalone {
1147            buf.push_str("\" standalone=\"");
1148            buf.push_str(standalone_val);
1149        }
1150        buf.push('"');
1151
1152        BytesDecl {
1153            content: BytesStart::from_content(buf, 3),
1154        }
1155    }
1156
1157    /// Creates a `BytesDecl` from a `BytesStart`
1158    pub const fn from_start(start: BytesStart<'a>) -> Self {
1159        Self { content: start }
1160    }
1161
1162    /// Gets xml version, excluding quotes (`'` or `"`).
1163    ///
1164    /// According to the [grammar], the version *must* be the first thing in the declaration.
1165    /// This method tries to extract the first thing in the declaration and return it.
1166    /// In case of multiple attributes value of the first one is returned.
1167    ///
1168    /// If version is missed in the declaration, or the first thing is not a version,
1169    /// [`IllFormedError::MissingDeclVersion`] will be returned.
1170    ///
1171    /// # Examples
1172    ///
1173    /// ```
1174    /// use quick_xml::errors::{Error, IllFormedError};
1175    /// use quick_xml::events::{BytesDecl, BytesStart};
1176    ///
1177    /// // <?xml version='1.1'?>
1178    /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0));
1179    /// assert_eq!(decl.version().unwrap(), b"1.1".as_ref());
1180    ///
1181    /// // <?xml version='1.0' version='1.1'?>
1182    /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.0' version='1.1'", 0));
1183    /// assert_eq!(decl.version().unwrap(), b"1.0".as_ref());
1184    ///
1185    /// // <?xml encoding='utf-8'?>
1186    /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8'", 0));
1187    /// match decl.version() {
1188    ///     Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(key)))) => assert_eq!(key, "encoding"),
1189    ///     _ => assert!(false),
1190    /// }
1191    ///
1192    /// // <?xml encoding='utf-8' version='1.1'?>
1193    /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8' version='1.1'", 0));
1194    /// match decl.version() {
1195    ///     Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(key)))) => assert_eq!(key, "encoding"),
1196    ///     _ => assert!(false),
1197    /// }
1198    ///
1199    /// // <?xml?>
1200    /// let decl = BytesDecl::from_start(BytesStart::from_content("", 0));
1201    /// match decl.version() {
1202    ///     Err(Error::IllFormed(IllFormedError::MissingDeclVersion(None))) => {},
1203    ///     _ => assert!(false),
1204    /// }
1205    /// ```
1206    ///
1207    /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
1208    pub fn version(&self) -> Result<Cow<[u8]>, Error> {
1209        // The version *must* be the first thing in the declaration.
1210        match self.content.attributes().with_checks(false).next() {
1211            Some(Ok(a)) if a.key.as_ref() == b"version" => Ok(a.value),
1212            // first attribute was not "version"
1213            Some(Ok(a)) => {
1214                let found = from_utf8(a.key.as_ref())
1215                    .map_err(|_| IllFormedError::MissingDeclVersion(None))?
1216                    .to_string();
1217                Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(
1218                    found,
1219                ))))
1220            }
1221            // error parsing attributes
1222            Some(Err(e)) => Err(e.into()),
1223            // no attributes
1224            None => Err(Error::IllFormed(IllFormedError::MissingDeclVersion(None))),
1225        }
1226    }
1227
1228    /// Gets xml encoding, excluding quotes (`'` or `"`).
1229    ///
1230    /// Although according to the [grammar] encoding must appear before `"standalone"`
1231    /// and after `"version"`, this method does not check that. The first occurrence
1232    /// of the attribute will be returned even if there are several. Also, method does
1233    /// not restrict symbols that can forming the encoding, so the returned encoding
1234    /// name may not correspond to the grammar.
1235    ///
1236    /// # Examples
1237    ///
1238    /// ```
1239    /// use std::borrow::Cow;
1240    /// use quick_xml::Error;
1241    /// use quick_xml::events::{BytesDecl, BytesStart};
1242    ///
1243    /// // <?xml version='1.1'?>
1244    /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0));
1245    /// assert!(decl.encoding().is_none());
1246    ///
1247    /// // <?xml encoding='utf-8'?>
1248    /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8'", 0));
1249    /// match decl.encoding() {
1250    ///     Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"utf-8"),
1251    ///     _ => assert!(false),
1252    /// }
1253    ///
1254    /// // <?xml encoding='something_WRONG' encoding='utf-8'?>
1255    /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='something_WRONG' encoding='utf-8'", 0));
1256    /// match decl.encoding() {
1257    ///     Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"something_WRONG"),
1258    ///     _ => assert!(false),
1259    /// }
1260    /// ```
1261    ///
1262    /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
1263    pub fn encoding(&self) -> Option<Result<Cow<[u8]>, AttrError>> {
1264        self.content
1265            .try_get_attribute("encoding")
1266            .map(|a| a.map(|a| a.value))
1267            .transpose()
1268    }
1269
1270    /// Gets xml standalone, excluding quotes (`'` or `"`).
1271    ///
1272    /// Although according to the [grammar] standalone flag must appear after `"version"`
1273    /// and `"encoding"`, this method does not check that. The first occurrence of the
1274    /// attribute will be returned even if there are several. Also, method does not
1275    /// restrict symbols that can forming the value, so the returned flag name may not
1276    /// correspond to the grammar.
1277    ///
1278    /// # Examples
1279    ///
1280    /// ```
1281    /// use std::borrow::Cow;
1282    /// use quick_xml::Error;
1283    /// use quick_xml::events::{BytesDecl, BytesStart};
1284    ///
1285    /// // <?xml version='1.1'?>
1286    /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0));
1287    /// assert!(decl.standalone().is_none());
1288    ///
1289    /// // <?xml standalone='yes'?>
1290    /// let decl = BytesDecl::from_start(BytesStart::from_content(" standalone='yes'", 0));
1291    /// match decl.standalone() {
1292    ///     Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"yes"),
1293    ///     _ => assert!(false),
1294    /// }
1295    ///
1296    /// // <?xml standalone='something_WRONG' encoding='utf-8'?>
1297    /// let decl = BytesDecl::from_start(BytesStart::from_content(" standalone='something_WRONG' encoding='utf-8'", 0));
1298    /// match decl.standalone() {
1299    ///     Some(Ok(Cow::Borrowed(flag))) => assert_eq!(flag, b"something_WRONG"),
1300    ///     _ => assert!(false),
1301    /// }
1302    /// ```
1303    ///
1304    /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
1305    pub fn standalone(&self) -> Option<Result<Cow<[u8]>, AttrError>> {
1306        self.content
1307            .try_get_attribute("standalone")
1308            .map(|a| a.map(|a| a.value))
1309            .transpose()
1310    }
1311
1312    /// Gets the actual encoding using [_get an encoding_](https://encoding.spec.whatwg.org/#concept-encoding-get)
1313    /// algorithm.
1314    ///
1315    /// If encoding in not known, or `encoding` key was not found, returns `None`.
1316    /// In case of duplicated `encoding` key, encoding, corresponding to the first
1317    /// one, is returned.
1318    #[cfg(feature = "encoding")]
1319    pub fn encoder(&self) -> Option<&'static Encoding> {
1320        self.encoding()
1321            .and_then(|e| e.ok())
1322            .and_then(|e| Encoding::for_label(&e))
1323    }
1324
1325    /// Converts the event into an owned event.
1326    pub fn into_owned(self) -> BytesDecl<'static> {
1327        BytesDecl {
1328            content: self.content.into_owned(),
1329        }
1330    }
1331
1332    /// Converts the event into a borrowed event.
1333    #[inline]
1334    pub fn borrow(&self) -> BytesDecl {
1335        BytesDecl {
1336            content: self.content.borrow(),
1337        }
1338    }
1339}
1340
1341impl<'a> Deref for BytesDecl<'a> {
1342    type Target = [u8];
1343
1344    fn deref(&self) -> &[u8] {
1345        &self.content
1346    }
1347}
1348
1349#[cfg(feature = "arbitrary")]
1350impl<'a> arbitrary::Arbitrary<'a> for BytesDecl<'a> {
1351    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
1352        Ok(Self::new(
1353            <&str>::arbitrary(u)?,
1354            Option::<&str>::arbitrary(u)?,
1355            Option::<&str>::arbitrary(u)?,
1356        ))
1357    }
1358
1359    fn size_hint(depth: usize) -> (usize, Option<usize>) {
1360        return <&str as arbitrary::Arbitrary>::size_hint(depth);
1361    }
1362}
1363
1364////////////////////////////////////////////////////////////////////////////////////////////////////
1365
1366/// Character or general entity reference (`Event::GeneralRef`): `&ref;` or `&#<number>;`.
1367///
1368/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
1369/// returns the content of this event between `&` and `;`:
1370///
1371/// ```
1372/// # use quick_xml::events::{BytesRef, Event};
1373/// # use quick_xml::reader::Reader;
1374/// # use pretty_assertions::assert_eq;
1375/// let mut reader = Reader::from_str(r#"&entity;"#);
1376/// let content = "entity";
1377/// let event = BytesRef::new(content);
1378///
1379/// assert_eq!(reader.read_event().unwrap(), Event::GeneralRef(event.borrow()));
1380/// // deref coercion of &BytesRef to &[u8]
1381/// assert_eq!(&event as &[u8], content.as_bytes());
1382/// // AsRef<[u8]> for &T + deref coercion
1383/// assert_eq!(event.as_ref(), content.as_bytes());
1384/// ```
1385#[derive(Clone, Eq, PartialEq)]
1386pub struct BytesRef<'a> {
1387    content: Cow<'a, [u8]>,
1388    /// Encoding in which the `content` is stored inside the event.
1389    decoder: Decoder,
1390}
1391
1392impl<'a> BytesRef<'a> {
1393    /// Internal constructor, used by `Reader`. Supplies data in reader's encoding
1394    #[inline]
1395    pub(crate) const fn wrap(content: &'a [u8], decoder: Decoder) -> Self {
1396        Self {
1397            content: Cow::Borrowed(content),
1398            decoder,
1399        }
1400    }
1401
1402    /// Creates a new `BytesRef` borrowing a slice.
1403    ///
1404    /// # Warning
1405    ///
1406    /// `name` must be a valid name.
1407    #[inline]
1408    pub fn new<C: Into<Cow<'a, str>>>(name: C) -> Self {
1409        Self {
1410            content: str_cow_to_bytes(name),
1411            decoder: Decoder::utf8(),
1412        }
1413    }
1414
1415    /// Converts the event into an owned event.
1416    pub fn into_owned(self) -> BytesRef<'static> {
1417        BytesRef {
1418            content: Cow::Owned(self.content.into_owned()),
1419            decoder: self.decoder,
1420        }
1421    }
1422
1423    /// Extracts the inner `Cow` from the `BytesRef` event container.
1424    #[inline]
1425    pub fn into_inner(self) -> Cow<'a, [u8]> {
1426        self.content
1427    }
1428
1429    /// Converts the event into a borrowed event.
1430    #[inline]
1431    pub fn borrow(&self) -> BytesRef {
1432        BytesRef {
1433            content: Cow::Borrowed(&self.content),
1434            decoder: self.decoder,
1435        }
1436    }
1437
1438    /// Decodes the content of the event.
1439    ///
1440    /// This will allocate if the value contains any escape sequences or in
1441    /// non-UTF-8 encoding.
1442    pub fn decode(&self) -> Result<Cow<'a, str>, EncodingError> {
1443        self.decoder.decode_cow(&self.content)
1444    }
1445
1446    /// Returns `true` if the specified reference represents the character reference
1447    /// (`&#<number>;`).
1448    ///
1449    /// ```
1450    /// # use quick_xml::events::BytesRef;
1451    /// # use pretty_assertions::assert_eq;
1452    /// assert_eq!(BytesRef::new("#x30").is_char_ref(), true);
1453    /// assert_eq!(BytesRef::new("#49" ).is_char_ref(), true);
1454    /// assert_eq!(BytesRef::new("lt"  ).is_char_ref(), false);
1455    /// ```
1456    pub fn is_char_ref(&self) -> bool {
1457        matches!(self.content.first(), Some(b'#'))
1458    }
1459
1460    /// If this reference represents character reference, then resolves it and
1461    /// returns the character, otherwise returns `None`.
1462    ///
1463    /// This method does not check if character is allowed for XML, in other words,
1464    /// well-formedness constraint [WFC: Legal Char] is not enforced.
1465    /// The character `0x0`, however, will return `EscapeError::InvalidCharRef`.
1466    ///
1467    /// ```
1468    /// # use quick_xml::events::BytesRef;
1469    /// # use pretty_assertions::assert_eq;
1470    /// assert_eq!(BytesRef::new("#x30").resolve_char_ref().unwrap(), Some('0'));
1471    /// assert_eq!(BytesRef::new("#49" ).resolve_char_ref().unwrap(), Some('1'));
1472    /// assert_eq!(BytesRef::new("lt"  ).resolve_char_ref().unwrap(), None);
1473    /// ```
1474    ///
1475    /// [WFC: Legal Char]: https://www.w3.org/TR/xml11/#wf-Legalchar
1476    pub fn resolve_char_ref(&self) -> Result<Option<char>, Error> {
1477        if let Some(num) = self.decode()?.strip_prefix('#') {
1478            let ch = parse_number(num).map_err(EscapeError::InvalidCharRef)?;
1479            return Ok(Some(ch));
1480        }
1481        Ok(None)
1482    }
1483}
1484
1485impl<'a> Debug for BytesRef<'a> {
1486    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1487        write!(f, "BytesRef {{ content: ")?;
1488        write_cow_string(f, &self.content)?;
1489        write!(f, " }}")
1490    }
1491}
1492
1493impl<'a> Deref for BytesRef<'a> {
1494    type Target = [u8];
1495
1496    fn deref(&self) -> &[u8] {
1497        &self.content
1498    }
1499}
1500
1501#[cfg(feature = "arbitrary")]
1502impl<'a> arbitrary::Arbitrary<'a> for BytesRef<'a> {
1503    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
1504        Ok(Self::new(<&str>::arbitrary(u)?))
1505    }
1506
1507    fn size_hint(depth: usize) -> (usize, Option<usize>) {
1508        <&str as arbitrary::Arbitrary>::size_hint(depth)
1509    }
1510}
1511
1512////////////////////////////////////////////////////////////////////////////////////////////////////
1513
1514/// Event emitted by [`Reader::read_event_into`].
1515///
1516/// [`Reader::read_event_into`]: crate::reader::Reader::read_event_into
1517#[derive(Clone, Debug, Eq, PartialEq)]
1518#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
1519pub enum Event<'a> {
1520    /// Start tag (with attributes) `<tag attr="value">`.
1521    Start(BytesStart<'a>),
1522    /// End tag `</tag>`.
1523    End(BytesEnd<'a>),
1524    /// Empty element tag (with attributes) `<tag attr="value" />`.
1525    Empty(BytesStart<'a>),
1526    /// Escaped character data between tags.
1527    Text(BytesText<'a>),
1528    /// Unescaped character data stored in `<![CDATA[...]]>`.
1529    CData(BytesCData<'a>),
1530    /// Comment `<!-- ... -->`.
1531    Comment(BytesText<'a>),
1532    /// XML declaration `<?xml ...?>`.
1533    Decl(BytesDecl<'a>),
1534    /// Processing instruction `<?...?>`.
1535    PI(BytesPI<'a>),
1536    /// Document type definition data (DTD) stored in `<!DOCTYPE ...>`.
1537    DocType(BytesText<'a>),
1538    /// General reference `&entity;` in the textual data. Can be either an entity
1539    /// reference, or a character reference.
1540    GeneralRef(BytesRef<'a>),
1541    /// End of XML document.
1542    Eof,
1543}
1544
1545impl<'a> Event<'a> {
1546    /// Converts the event to an owned version, untied to the lifetime of
1547    /// buffer used when reading but incurring a new, separate allocation.
1548    pub fn into_owned(self) -> Event<'static> {
1549        match self {
1550            Event::Start(e) => Event::Start(e.into_owned()),
1551            Event::End(e) => Event::End(e.into_owned()),
1552            Event::Empty(e) => Event::Empty(e.into_owned()),
1553            Event::Text(e) => Event::Text(e.into_owned()),
1554            Event::Comment(e) => Event::Comment(e.into_owned()),
1555            Event::CData(e) => Event::CData(e.into_owned()),
1556            Event::Decl(e) => Event::Decl(e.into_owned()),
1557            Event::PI(e) => Event::PI(e.into_owned()),
1558            Event::DocType(e) => Event::DocType(e.into_owned()),
1559            Event::GeneralRef(e) => Event::GeneralRef(e.into_owned()),
1560            Event::Eof => Event::Eof,
1561        }
1562    }
1563
1564    /// Converts the event into a borrowed event.
1565    #[inline]
1566    pub fn borrow(&self) -> Event {
1567        match self {
1568            Event::Start(e) => Event::Start(e.borrow()),
1569            Event::End(e) => Event::End(e.borrow()),
1570            Event::Empty(e) => Event::Empty(e.borrow()),
1571            Event::Text(e) => Event::Text(e.borrow()),
1572            Event::Comment(e) => Event::Comment(e.borrow()),
1573            Event::CData(e) => Event::CData(e.borrow()),
1574            Event::Decl(e) => Event::Decl(e.borrow()),
1575            Event::PI(e) => Event::PI(e.borrow()),
1576            Event::DocType(e) => Event::DocType(e.borrow()),
1577            Event::GeneralRef(e) => Event::GeneralRef(e.borrow()),
1578            Event::Eof => Event::Eof,
1579        }
1580    }
1581}
1582
1583impl<'a> Deref for Event<'a> {
1584    type Target = [u8];
1585
1586    fn deref(&self) -> &[u8] {
1587        match *self {
1588            Event::Start(ref e) | Event::Empty(ref e) => e,
1589            Event::End(ref e) => e,
1590            Event::Text(ref e) => e,
1591            Event::Decl(ref e) => e,
1592            Event::PI(ref e) => e,
1593            Event::CData(ref e) => e,
1594            Event::Comment(ref e) => e,
1595            Event::DocType(ref e) => e,
1596            Event::GeneralRef(ref e) => e,
1597            Event::Eof => &[],
1598        }
1599    }
1600}
1601
1602impl<'a> AsRef<Event<'a>> for Event<'a> {
1603    fn as_ref(&self) -> &Event<'a> {
1604        self
1605    }
1606}
1607
1608////////////////////////////////////////////////////////////////////////////////////////////////////
1609
1610#[inline]
1611fn str_cow_to_bytes<'a, C: Into<Cow<'a, str>>>(content: C) -> Cow<'a, [u8]> {
1612    match content.into() {
1613        Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()),
1614        Cow::Owned(s) => Cow::Owned(s.into_bytes()),
1615    }
1616}
1617
1618fn trim_cow<'a, F>(value: Cow<'a, [u8]>, trim: F) -> Cow<'a, [u8]>
1619where
1620    F: FnOnce(&[u8]) -> &[u8],
1621{
1622    match value {
1623        Cow::Borrowed(bytes) => Cow::Borrowed(trim(bytes)),
1624        Cow::Owned(mut bytes) => {
1625            let trimmed = trim(&bytes);
1626            if trimmed.len() != bytes.len() {
1627                bytes = trimmed.to_vec();
1628            }
1629            Cow::Owned(bytes)
1630        }
1631    }
1632}
1633
1634#[cfg(test)]
1635mod test {
1636    use super::*;
1637    use pretty_assertions::assert_eq;
1638
1639    #[test]
1640    fn bytestart_create() {
1641        let b = BytesStart::new("test");
1642        assert_eq!(b.len(), 4);
1643        assert_eq!(b.name(), QName(b"test"));
1644    }
1645
1646    #[test]
1647    fn bytestart_set_name() {
1648        let mut b = BytesStart::new("test");
1649        assert_eq!(b.len(), 4);
1650        assert_eq!(b.name(), QName(b"test"));
1651        assert_eq!(b.attributes_raw(), b"");
1652        b.push_attribute(("x", "a"));
1653        assert_eq!(b.len(), 10);
1654        assert_eq!(b.attributes_raw(), b" x=\"a\"");
1655        b.set_name(b"g");
1656        assert_eq!(b.len(), 7);
1657        assert_eq!(b.name(), QName(b"g"));
1658    }
1659
1660    #[test]
1661    fn bytestart_clear_attributes() {
1662        let mut b = BytesStart::new("test");
1663        b.push_attribute(("x", "y\"z"));
1664        b.push_attribute(("x", "y\"z"));
1665        b.clear_attributes();
1666        assert!(b.attributes().next().is_none());
1667        assert_eq!(b.len(), 4);
1668        assert_eq!(b.name(), QName(b"test"));
1669    }
1670}