quick_xml/events/mod.rs
1//! Defines zero-copy XML events used throughout this library.
2//!
3//! A XML event often represents part of a XML element.
4//! They occur both during reading and writing and are
5//! usually used with the stream-oriented API.
6//!
7//! For example, the XML element
8//! ```xml
9//! <name attr="value">Inner text</name>
10//! ```
11//! consists of the three events `Start`, `Text` and `End`.
12//! They can also represent other parts in an XML document like the
13//! XML declaration. Each Event usually contains further information,
14//! like the tag name, the attribute or the inner text.
15//!
16//! See [`Event`] for a list of all possible events.
17//!
18//! # Reading
19//! When reading a XML stream, the events are emitted by [`Reader::read_event`]
20//! and [`Reader::read_event_into`]. You must listen
21//! for the different types of events you are interested in.
22//!
23//! See [`Reader`] for further information.
24//!
25//! # Writing
26//! When writing the XML document, you must create the XML element
27//! by constructing the events it consists of and pass them to the writer
28//! sequentially.
29//!
30//! See [`Writer`] for further information.
31//!
32//! [`Reader::read_event`]: crate::reader::Reader::read_event
33//! [`Reader::read_event_into`]: crate::reader::Reader::read_event_into
34//! [`Reader`]: crate::reader::Reader
35//! [`Writer`]: crate::writer::Writer
36//! [`Event`]: crate::events::Event
37
38pub mod attributes;
39
40#[cfg(feature = "encoding")]
41use encoding_rs::Encoding;
42use std::borrow::Cow;
43use std::fmt::{self, Debug, Formatter};
44use std::iter::FusedIterator;
45use std::mem::replace;
46use std::ops::Deref;
47use std::str::from_utf8;
48
49use crate::encoding::{Decoder, EncodingError};
50use crate::errors::{Error, IllFormedError};
51use crate::escape::{escape, minimal_escape, parse_number, partial_escape, EscapeError};
52use crate::name::{LocalName, QName};
53use crate::utils::{name_len, trim_xml_end, trim_xml_start, write_cow_string, Bytes};
54use attributes::{AttrError, Attribute, Attributes};
55
56/// Opening tag data (`Event::Start`), with optional attributes: `<name attr="value">`.
57///
58/// The name can be accessed using the [`name`] or [`local_name`] methods.
59/// An iterator over the attributes is returned by the [`attributes`] method.
60///
61/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
62/// returns the content of this event between `<` and `>` or `/>`:
63///
64/// ```
65/// # use quick_xml::events::{BytesStart, Event};
66/// # use quick_xml::reader::Reader;
67/// # use pretty_assertions::assert_eq;
68/// // Remember, that \ at the end of string literal strips
69/// // all space characters to the first non-space character
70/// let mut reader = Reader::from_str("\
71/// <element a1 = 'val1' a2=\"val2\" />\
72/// <element a1 = 'val1' a2=\"val2\" >"
73/// );
74/// let content = "element a1 = 'val1' a2=\"val2\" ";
75/// let event = BytesStart::from_content(content, 7);
76///
77/// assert_eq!(reader.read_event().unwrap(), Event::Empty(event.borrow()));
78/// assert_eq!(reader.read_event().unwrap(), Event::Start(event.borrow()));
79/// // deref coercion of &BytesStart to &[u8]
80/// assert_eq!(&event as &[u8], content.as_bytes());
81/// // AsRef<[u8]> for &T + deref coercion
82/// assert_eq!(event.as_ref(), content.as_bytes());
83/// ```
84///
85/// [`name`]: Self::name
86/// [`local_name`]: Self::local_name
87/// [`attributes`]: Self::attributes
88#[derive(Clone, Eq, PartialEq)]
89pub struct BytesStart<'a> {
90 /// content of the element, before any utf8 conversion
91 pub(crate) buf: Cow<'a, [u8]>,
92 /// end of the element name, the name starts at that the start of `buf`
93 pub(crate) name_len: usize,
94 /// Encoding used for `buf`
95 decoder: Decoder,
96}
97
98impl<'a> BytesStart<'a> {
99 /// Internal constructor, used by `Reader`. Supplies data in reader's encoding
100 #[inline]
101 pub(crate) const fn wrap(content: &'a [u8], name_len: usize, decoder: Decoder) -> Self {
102 BytesStart {
103 buf: Cow::Borrowed(content),
104 name_len,
105 decoder,
106 }
107 }
108
109 /// Creates a new `BytesStart` from the given name.
110 ///
111 /// # Warning
112 ///
113 /// `name` must be a valid name.
114 #[inline]
115 pub fn new<C: Into<Cow<'a, str>>>(name: C) -> Self {
116 let buf = str_cow_to_bytes(name);
117 BytesStart {
118 name_len: buf.len(),
119 buf,
120 decoder: Decoder::utf8(),
121 }
122 }
123
124 /// Creates a new `BytesStart` from the given content (name + attributes).
125 ///
126 /// # Warning
127 ///
128 /// `&content[..name_len]` must be a valid name, and the remainder of `content`
129 /// must be correctly-formed attributes. Neither are checked, it is possible
130 /// to generate invalid XML if `content` or `name_len` are incorrect.
131 #[inline]
132 pub fn from_content<C: Into<Cow<'a, str>>>(content: C, name_len: usize) -> Self {
133 BytesStart {
134 buf: str_cow_to_bytes(content),
135 name_len,
136 decoder: Decoder::utf8(),
137 }
138 }
139
140 /// Converts the event into an owned event.
141 pub fn into_owned(self) -> BytesStart<'static> {
142 BytesStart {
143 buf: Cow::Owned(self.buf.into_owned()),
144 name_len: self.name_len,
145 decoder: self.decoder,
146 }
147 }
148
149 /// Converts the event into an owned event without taking ownership of Event
150 pub fn to_owned(&self) -> BytesStart<'static> {
151 BytesStart {
152 buf: Cow::Owned(self.buf.clone().into_owned()),
153 name_len: self.name_len,
154 decoder: self.decoder,
155 }
156 }
157
158 /// Converts the event into a borrowed event. Most useful when paired with [`to_end`].
159 ///
160 /// # Example
161 ///
162 /// ```
163 /// use quick_xml::events::{BytesStart, Event};
164 /// # use quick_xml::writer::Writer;
165 /// # use quick_xml::Error;
166 ///
167 /// struct SomeStruct<'a> {
168 /// attrs: BytesStart<'a>,
169 /// // ...
170 /// }
171 /// # impl<'a> SomeStruct<'a> {
172 /// # fn example(&self) -> Result<(), Error> {
173 /// # let mut writer = Writer::new(Vec::new());
174 ///
175 /// writer.write_event(Event::Start(self.attrs.borrow()))?;
176 /// // ...
177 /// writer.write_event(Event::End(self.attrs.to_end()))?;
178 /// # Ok(())
179 /// # }}
180 /// ```
181 ///
182 /// [`to_end`]: Self::to_end
183 pub fn borrow(&self) -> BytesStart {
184 BytesStart {
185 buf: Cow::Borrowed(&self.buf),
186 name_len: self.name_len,
187 decoder: self.decoder,
188 }
189 }
190
191 /// Creates new paired close tag
192 #[inline]
193 pub fn to_end(&self) -> BytesEnd {
194 BytesEnd::from(self.name())
195 }
196
197 /// Get the decoder, used to decode bytes, read by the reader which produces
198 /// this event, to the strings.
199 ///
200 /// When event was created manually, encoding is UTF-8.
201 ///
202 /// If [`encoding`] feature is enabled and no encoding is specified in declaration,
203 /// defaults to UTF-8.
204 ///
205 /// [`encoding`]: ../index.html#encoding
206 #[inline]
207 pub const fn decoder(&self) -> Decoder {
208 self.decoder
209 }
210
211 /// Gets the undecoded raw tag name, as present in the input stream.
212 #[inline]
213 pub fn name(&self) -> QName {
214 QName(&self.buf[..self.name_len])
215 }
216
217 /// Gets the undecoded raw local tag name (excluding namespace) as present
218 /// in the input stream.
219 ///
220 /// All content up to and including the first `:` character is removed from the tag name.
221 #[inline]
222 pub fn local_name(&self) -> LocalName {
223 self.name().into()
224 }
225
226 /// Edit the name of the BytesStart in-place
227 ///
228 /// # Warning
229 ///
230 /// `name` must be a valid name.
231 pub fn set_name(&mut self, name: &[u8]) -> &mut BytesStart<'a> {
232 let bytes = self.buf.to_mut();
233 bytes.splice(..self.name_len, name.iter().cloned());
234 self.name_len = name.len();
235 self
236 }
237}
238
239/// Attribute-related methods
240impl<'a> BytesStart<'a> {
241 /// Consumes `self` and yield a new `BytesStart` with additional attributes from an iterator.
242 ///
243 /// The yielded items must be convertible to [`Attribute`] using `Into`.
244 pub fn with_attributes<'b, I>(mut self, attributes: I) -> Self
245 where
246 I: IntoIterator,
247 I::Item: Into<Attribute<'b>>,
248 {
249 self.extend_attributes(attributes);
250 self
251 }
252
253 /// Add additional attributes to this tag using an iterator.
254 ///
255 /// The yielded items must be convertible to [`Attribute`] using `Into`.
256 pub fn extend_attributes<'b, I>(&mut self, attributes: I) -> &mut BytesStart<'a>
257 where
258 I: IntoIterator,
259 I::Item: Into<Attribute<'b>>,
260 {
261 for attr in attributes {
262 self.push_attribute(attr);
263 }
264 self
265 }
266
267 /// Adds an attribute to this element.
268 pub fn push_attribute<'b, A>(&mut self, attr: A)
269 where
270 A: Into<Attribute<'b>>,
271 {
272 self.buf.to_mut().push(b' ');
273 self.push_attr(attr.into());
274 }
275
276 /// Remove all attributes from the ByteStart
277 pub fn clear_attributes(&mut self) -> &mut BytesStart<'a> {
278 self.buf.to_mut().truncate(self.name_len);
279 self
280 }
281
282 /// Returns an iterator over the attributes of this tag.
283 pub fn attributes(&self) -> Attributes {
284 Attributes::wrap(&self.buf, self.name_len, false, self.decoder)
285 }
286
287 /// Returns an iterator over the HTML-like attributes of this tag (no mandatory quotes or `=`).
288 pub fn html_attributes(&self) -> Attributes {
289 Attributes::wrap(&self.buf, self.name_len, true, self.decoder)
290 }
291
292 /// Gets the undecoded raw string with the attributes of this tag as a `&[u8]`,
293 /// including the whitespace after the tag name if there is any.
294 #[inline]
295 pub fn attributes_raw(&self) -> &[u8] {
296 &self.buf[self.name_len..]
297 }
298
299 /// Try to get an attribute
300 pub fn try_get_attribute<N: AsRef<[u8]> + Sized>(
301 &'a self,
302 attr_name: N,
303 ) -> Result<Option<Attribute<'a>>, AttrError> {
304 for a in self.attributes().with_checks(false) {
305 let a = a?;
306 if a.key.as_ref() == attr_name.as_ref() {
307 return Ok(Some(a));
308 }
309 }
310 Ok(None)
311 }
312
313 /// Adds an attribute to this element.
314 pub(crate) fn push_attr<'b>(&mut self, attr: Attribute<'b>) {
315 let bytes = self.buf.to_mut();
316 bytes.extend_from_slice(attr.key.as_ref());
317 bytes.extend_from_slice(b"=\"");
318 // FIXME: need to escape attribute content
319 bytes.extend_from_slice(attr.value.as_ref());
320 bytes.push(b'"');
321 }
322
323 /// Adds new line in existing element
324 pub(crate) fn push_newline(&mut self) {
325 self.buf.to_mut().push(b'\n');
326 }
327
328 /// Adds indentation bytes in existing element
329 pub(crate) fn push_indent(&mut self, indent: &[u8]) {
330 self.buf.to_mut().extend_from_slice(indent);
331 }
332}
333
334impl<'a> Debug for BytesStart<'a> {
335 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
336 write!(f, "BytesStart {{ buf: ")?;
337 write_cow_string(f, &self.buf)?;
338 write!(f, ", name_len: {} }}", self.name_len)
339 }
340}
341
342impl<'a> Deref for BytesStart<'a> {
343 type Target = [u8];
344
345 fn deref(&self) -> &[u8] {
346 &self.buf
347 }
348}
349
350#[cfg(feature = "arbitrary")]
351impl<'a> arbitrary::Arbitrary<'a> for BytesStart<'a> {
352 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
353 let s = <&str>::arbitrary(u)?;
354 if s.is_empty() || !s.chars().all(char::is_alphanumeric) {
355 return Err(arbitrary::Error::IncorrectFormat);
356 }
357 let mut result = Self::new(s);
358 result.extend_attributes(Vec::<(&str, &str)>::arbitrary(u)?.into_iter());
359 Ok(result)
360 }
361
362 fn size_hint(depth: usize) -> (usize, Option<usize>) {
363 return <&str as arbitrary::Arbitrary>::size_hint(depth);
364 }
365}
366
367////////////////////////////////////////////////////////////////////////////////////////////////////
368
369/// Closing tag data (`Event::End`): `</name>`.
370///
371/// The name can be accessed using the [`name`] or [`local_name`] methods.
372///
373/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
374/// returns the content of this event between `</` and `>`.
375///
376/// Note, that inner text will not contain `>` character inside:
377///
378/// ```
379/// # use quick_xml::events::{BytesEnd, Event};
380/// # use quick_xml::reader::Reader;
381/// # use pretty_assertions::assert_eq;
382/// let mut reader = Reader::from_str(r#"<element></element a1 = 'val1' a2="val2" >"#);
383/// // Note, that this entire string considered as a .name()
384/// let content = "element a1 = 'val1' a2=\"val2\" ";
385/// let event = BytesEnd::new(content);
386///
387/// reader.config_mut().trim_markup_names_in_closing_tags = false;
388/// reader.config_mut().check_end_names = false;
389/// reader.read_event().unwrap(); // Skip `<element>`
390///
391/// assert_eq!(reader.read_event().unwrap(), Event::End(event.borrow()));
392/// assert_eq!(event.name().as_ref(), content.as_bytes());
393/// // deref coercion of &BytesEnd to &[u8]
394/// assert_eq!(&event as &[u8], content.as_bytes());
395/// // AsRef<[u8]> for &T + deref coercion
396/// assert_eq!(event.as_ref(), content.as_bytes());
397/// ```
398///
399/// [`name`]: Self::name
400/// [`local_name`]: Self::local_name
401#[derive(Clone, Eq, PartialEq)]
402pub struct BytesEnd<'a> {
403 name: Cow<'a, [u8]>,
404}
405
406impl<'a> BytesEnd<'a> {
407 /// Internal constructor, used by `Reader`. Supplies data in reader's encoding
408 #[inline]
409 pub(crate) const fn wrap(name: Cow<'a, [u8]>) -> Self {
410 BytesEnd { name }
411 }
412
413 /// Creates a new `BytesEnd` borrowing a slice.
414 ///
415 /// # Warning
416 ///
417 /// `name` must be a valid name.
418 #[inline]
419 pub fn new<C: Into<Cow<'a, str>>>(name: C) -> Self {
420 Self::wrap(str_cow_to_bytes(name))
421 }
422
423 /// Converts the event into an owned event.
424 pub fn into_owned(self) -> BytesEnd<'static> {
425 BytesEnd {
426 name: Cow::Owned(self.name.into_owned()),
427 }
428 }
429
430 /// Converts the event into a borrowed event.
431 #[inline]
432 pub fn borrow(&self) -> BytesEnd {
433 BytesEnd {
434 name: Cow::Borrowed(&self.name),
435 }
436 }
437
438 /// Gets the undecoded raw tag name, as present in the input stream.
439 #[inline]
440 pub fn name(&self) -> QName {
441 QName(&self.name)
442 }
443
444 /// Gets the undecoded raw local tag name (excluding namespace) as present
445 /// in the input stream.
446 ///
447 /// All content up to and including the first `:` character is removed from the tag name.
448 #[inline]
449 pub fn local_name(&self) -> LocalName {
450 self.name().into()
451 }
452}
453
454impl<'a> Debug for BytesEnd<'a> {
455 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
456 write!(f, "BytesEnd {{ name: ")?;
457 write_cow_string(f, &self.name)?;
458 write!(f, " }}")
459 }
460}
461
462impl<'a> Deref for BytesEnd<'a> {
463 type Target = [u8];
464
465 fn deref(&self) -> &[u8] {
466 &self.name
467 }
468}
469
470impl<'a> From<QName<'a>> for BytesEnd<'a> {
471 #[inline]
472 fn from(name: QName<'a>) -> Self {
473 Self::wrap(name.into_inner().into())
474 }
475}
476
477#[cfg(feature = "arbitrary")]
478impl<'a> arbitrary::Arbitrary<'a> for BytesEnd<'a> {
479 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
480 Ok(Self::new(<&str>::arbitrary(u)?))
481 }
482 fn size_hint(depth: usize) -> (usize, Option<usize>) {
483 return <&str as arbitrary::Arbitrary>::size_hint(depth);
484 }
485}
486
487////////////////////////////////////////////////////////////////////////////////////////////////////
488
489/// Data from various events (most notably, `Event::Text`) that stored in XML
490/// in escaped form. Internally data is stored in escaped form.
491///
492/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
493/// returns the content of this event. In case of comment this is everything
494/// between `<!--` and `-->` and the text of comment will not contain `-->` inside.
495/// In case of DTD this is everything between `<!DOCTYPE` + spaces and closing `>`
496/// (i.e. in case of DTD the first character is never space):
497///
498/// ```
499/// # use quick_xml::events::{BytesText, Event};
500/// # use quick_xml::reader::Reader;
501/// # use pretty_assertions::assert_eq;
502/// // Remember, that \ at the end of string literal strips
503/// // all space characters to the first non-space character
504/// let mut reader = Reader::from_str("\
505/// <!DOCTYPE comment or text >\
506/// comment or text \
507/// <!--comment or text -->"
508/// );
509/// let content = "comment or text ";
510/// let event = BytesText::new(content);
511///
512/// assert_eq!(reader.read_event().unwrap(), Event::DocType(event.borrow()));
513/// assert_eq!(reader.read_event().unwrap(), Event::Text(event.borrow()));
514/// assert_eq!(reader.read_event().unwrap(), Event::Comment(event.borrow()));
515/// // deref coercion of &BytesText to &[u8]
516/// assert_eq!(&event as &[u8], content.as_bytes());
517/// // AsRef<[u8]> for &T + deref coercion
518/// assert_eq!(event.as_ref(), content.as_bytes());
519/// ```
520#[derive(Clone, Eq, PartialEq)]
521pub struct BytesText<'a> {
522 /// Escaped then encoded content of the event. Content is encoded in the XML
523 /// document encoding when event comes from the reader and should be in the
524 /// document encoding when event passed to the writer
525 content: Cow<'a, [u8]>,
526 /// Encoding in which the `content` is stored inside the event
527 decoder: Decoder,
528}
529
530impl<'a> BytesText<'a> {
531 /// Creates a new `BytesText` from an escaped byte sequence in the specified encoding.
532 #[inline]
533 pub(crate) fn wrap<C: Into<Cow<'a, [u8]>>>(content: C, decoder: Decoder) -> Self {
534 Self {
535 content: content.into(),
536 decoder,
537 }
538 }
539
540 /// Creates a new `BytesText` from an escaped string.
541 #[inline]
542 pub fn from_escaped<C: Into<Cow<'a, str>>>(content: C) -> Self {
543 Self::wrap(str_cow_to_bytes(content), Decoder::utf8())
544 }
545
546 /// Creates a new `BytesText` from a string. The string is expected not to
547 /// be escaped.
548 #[inline]
549 pub fn new(content: &'a str) -> Self {
550 Self::from_escaped(escape(content))
551 }
552
553 /// Ensures that all data is owned to extend the object's lifetime if
554 /// necessary.
555 #[inline]
556 pub fn into_owned(self) -> BytesText<'static> {
557 BytesText {
558 content: self.content.into_owned().into(),
559 decoder: self.decoder,
560 }
561 }
562
563 /// Extracts the inner `Cow` from the `BytesText` event container.
564 #[inline]
565 pub fn into_inner(self) -> Cow<'a, [u8]> {
566 self.content
567 }
568
569 /// Converts the event into a borrowed event.
570 #[inline]
571 pub fn borrow(&self) -> BytesText {
572 BytesText {
573 content: Cow::Borrowed(&self.content),
574 decoder: self.decoder,
575 }
576 }
577
578 /// Decodes the content of the event.
579 ///
580 /// This will allocate if the value contains any escape sequences or in
581 /// non-UTF-8 encoding.
582 pub fn decode(&self) -> Result<Cow<'a, str>, EncodingError> {
583 self.decoder.decode_cow(&self.content)
584 }
585
586 /// Removes leading XML whitespace bytes from text content.
587 ///
588 /// Returns `true` if content is empty after that
589 pub fn inplace_trim_start(&mut self) -> bool {
590 self.content = trim_cow(
591 replace(&mut self.content, Cow::Borrowed(b"")),
592 trim_xml_start,
593 );
594 self.content.is_empty()
595 }
596
597 /// Removes trailing XML whitespace bytes from text content.
598 ///
599 /// Returns `true` if content is empty after that
600 pub fn inplace_trim_end(&mut self) -> bool {
601 self.content = trim_cow(replace(&mut self.content, Cow::Borrowed(b"")), trim_xml_end);
602 self.content.is_empty()
603 }
604}
605
606impl<'a> Debug for BytesText<'a> {
607 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
608 write!(f, "BytesText {{ content: ")?;
609 write_cow_string(f, &self.content)?;
610 write!(f, " }}")
611 }
612}
613
614impl<'a> Deref for BytesText<'a> {
615 type Target = [u8];
616
617 fn deref(&self) -> &[u8] {
618 &self.content
619 }
620}
621
622#[cfg(feature = "arbitrary")]
623impl<'a> arbitrary::Arbitrary<'a> for BytesText<'a> {
624 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
625 let s = <&str>::arbitrary(u)?;
626 if !s.chars().all(char::is_alphanumeric) {
627 return Err(arbitrary::Error::IncorrectFormat);
628 }
629 Ok(Self::new(s))
630 }
631
632 fn size_hint(depth: usize) -> (usize, Option<usize>) {
633 return <&str as arbitrary::Arbitrary>::size_hint(depth);
634 }
635}
636
637////////////////////////////////////////////////////////////////////////////////////////////////////
638
639/// CDATA content contains unescaped data from the reader. If you want to write them as a text,
640/// [convert](Self::escape) it to [`BytesText`].
641///
642/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
643/// returns the content of this event between `<![CDATA[` and `]]>`.
644///
645/// Note, that inner text will not contain `]]>` sequence inside:
646///
647/// ```
648/// # use quick_xml::events::{BytesCData, Event};
649/// # use quick_xml::reader::Reader;
650/// # use pretty_assertions::assert_eq;
651/// let mut reader = Reader::from_str("<![CDATA[ CDATA section ]]>");
652/// let content = " CDATA section ";
653/// let event = BytesCData::new(content);
654///
655/// assert_eq!(reader.read_event().unwrap(), Event::CData(event.borrow()));
656/// // deref coercion of &BytesCData to &[u8]
657/// assert_eq!(&event as &[u8], content.as_bytes());
658/// // AsRef<[u8]> for &T + deref coercion
659/// assert_eq!(event.as_ref(), content.as_bytes());
660/// ```
661#[derive(Clone, Eq, PartialEq)]
662pub struct BytesCData<'a> {
663 content: Cow<'a, [u8]>,
664 /// Encoding in which the `content` is stored inside the event
665 decoder: Decoder,
666}
667
668impl<'a> BytesCData<'a> {
669 /// Creates a new `BytesCData` from a byte sequence in the specified encoding.
670 #[inline]
671 pub(crate) fn wrap<C: Into<Cow<'a, [u8]>>>(content: C, decoder: Decoder) -> Self {
672 Self {
673 content: content.into(),
674 decoder,
675 }
676 }
677
678 /// Creates a new `BytesCData` from a string.
679 ///
680 /// # Warning
681 ///
682 /// `content` must not contain the `]]>` sequence. You can use
683 /// [`BytesCData::escaped`] to escape the content instead.
684 #[inline]
685 pub fn new<C: Into<Cow<'a, str>>>(content: C) -> Self {
686 Self::wrap(str_cow_to_bytes(content), Decoder::utf8())
687 }
688
689 /// Creates an iterator of `BytesCData` from a string.
690 ///
691 /// If a string contains `]]>`, it needs to be split into multiple `CDATA`
692 /// sections, splitting the `]]` and `>` characters, because the CDATA closing
693 /// sequence cannot be escaped. This iterator yields a `BytesCData` instance
694 /// for each of those sections.
695 ///
696 /// # Examples
697 ///
698 /// ```
699 /// # use quick_xml::events::BytesCData;
700 /// # use pretty_assertions::assert_eq;
701 /// let content = "";
702 /// let cdata = BytesCData::escaped(content).collect::<Vec<_>>();
703 /// assert_eq!(cdata, &[BytesCData::new("")]);
704 ///
705 /// let content = "Certain tokens like ]]> can be difficult and <invalid>";
706 /// let cdata = BytesCData::escaped(content).collect::<Vec<_>>();
707 /// assert_eq!(cdata, &[
708 /// BytesCData::new("Certain tokens like ]]"),
709 /// BytesCData::new("> can be difficult and <invalid>"),
710 /// ]);
711 ///
712 /// let content = "foo]]>bar]]>baz]]>quux";
713 /// let cdata = BytesCData::escaped(content).collect::<Vec<_>>();
714 /// assert_eq!(cdata, &[
715 /// BytesCData::new("foo]]"),
716 /// BytesCData::new(">bar]]"),
717 /// BytesCData::new(">baz]]"),
718 /// BytesCData::new(">quux"),
719 /// ]);
720 /// ```
721 #[inline]
722 pub fn escaped(content: &'a str) -> CDataIterator<'a> {
723 CDataIterator {
724 unprocessed: content.as_bytes(),
725 finished: false,
726 }
727 }
728
729 /// Ensures that all data is owned to extend the object's lifetime if
730 /// necessary.
731 #[inline]
732 pub fn into_owned(self) -> BytesCData<'static> {
733 BytesCData {
734 content: self.content.into_owned().into(),
735 decoder: self.decoder,
736 }
737 }
738
739 /// Extracts the inner `Cow` from the `BytesCData` event container.
740 #[inline]
741 pub fn into_inner(self) -> Cow<'a, [u8]> {
742 self.content
743 }
744
745 /// Converts the event into a borrowed event.
746 #[inline]
747 pub fn borrow(&self) -> BytesCData {
748 BytesCData {
749 content: Cow::Borrowed(&self.content),
750 decoder: self.decoder,
751 }
752 }
753
754 /// Converts this CDATA content to an escaped version, that can be written
755 /// as an usual text in XML.
756 ///
757 /// This function performs following replacements:
758 ///
759 /// | Character | Replacement
760 /// |-----------|------------
761 /// | `<` | `<`
762 /// | `>` | `>`
763 /// | `&` | `&`
764 /// | `'` | `'`
765 /// | `"` | `"`
766 pub fn escape(self) -> Result<BytesText<'a>, EncodingError> {
767 let decoded = self.decode()?;
768 Ok(BytesText::wrap(
769 match escape(decoded) {
770 Cow::Borrowed(escaped) => Cow::Borrowed(escaped.as_bytes()),
771 Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
772 },
773 Decoder::utf8(),
774 ))
775 }
776
777 /// Converts this CDATA content to an escaped version, that can be written
778 /// as an usual text in XML.
779 ///
780 /// In XML text content, it is allowed (though not recommended) to leave
781 /// the quote special characters `"` and `'` unescaped.
782 ///
783 /// This function performs following replacements:
784 ///
785 /// | Character | Replacement
786 /// |-----------|------------
787 /// | `<` | `<`
788 /// | `>` | `>`
789 /// | `&` | `&`
790 pub fn partial_escape(self) -> Result<BytesText<'a>, EncodingError> {
791 let decoded = self.decode()?;
792 Ok(BytesText::wrap(
793 match partial_escape(decoded) {
794 Cow::Borrowed(escaped) => Cow::Borrowed(escaped.as_bytes()),
795 Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
796 },
797 Decoder::utf8(),
798 ))
799 }
800
801 /// Converts this CDATA content to an escaped version, that can be written
802 /// as an usual text in XML. This method escapes only those characters that
803 /// must be escaped according to the [specification].
804 ///
805 /// This function performs following replacements:
806 ///
807 /// | Character | Replacement
808 /// |-----------|------------
809 /// | `<` | `<`
810 /// | `&` | `&`
811 ///
812 /// [specification]: https://www.w3.org/TR/xml11/#syntax
813 pub fn minimal_escape(self) -> Result<BytesText<'a>, EncodingError> {
814 let decoded = self.decode()?;
815 Ok(BytesText::wrap(
816 match minimal_escape(decoded) {
817 Cow::Borrowed(escaped) => Cow::Borrowed(escaped.as_bytes()),
818 Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
819 },
820 Decoder::utf8(),
821 ))
822 }
823
824 /// Decodes the raw input byte content of the CDATA section into a string,
825 /// without performing XML entity escaping.
826 ///
827 /// When this event produced by the XML reader, it uses the encoding information
828 /// associated with that reader to interpret the raw bytes contained within this
829 /// CDATA event.
830 pub fn decode(&self) -> Result<Cow<'a, str>, EncodingError> {
831 Ok(self.decoder.decode_cow(&self.content)?)
832 }
833}
834
835impl<'a> Debug for BytesCData<'a> {
836 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
837 write!(f, "BytesCData {{ content: ")?;
838 write_cow_string(f, &self.content)?;
839 write!(f, " }}")
840 }
841}
842
843impl<'a> Deref for BytesCData<'a> {
844 type Target = [u8];
845
846 fn deref(&self) -> &[u8] {
847 &self.content
848 }
849}
850
851#[cfg(feature = "arbitrary")]
852impl<'a> arbitrary::Arbitrary<'a> for BytesCData<'a> {
853 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
854 Ok(Self::new(<&str>::arbitrary(u)?))
855 }
856 fn size_hint(depth: usize) -> (usize, Option<usize>) {
857 return <&str as arbitrary::Arbitrary>::size_hint(depth);
858 }
859}
860
861/// Iterator over `CDATA` sections in a string.
862///
863/// This iterator is created by the [`BytesCData::escaped`] method.
864#[derive(Clone)]
865pub struct CDataIterator<'a> {
866 /// The unprocessed data which should be emitted as `BytesCData` events.
867 /// At each iteration, the processed data is cut from this slice.
868 unprocessed: &'a [u8],
869 finished: bool,
870}
871
872impl<'a> Debug for CDataIterator<'a> {
873 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
874 f.debug_struct("CDataIterator")
875 .field("unprocessed", &Bytes(self.unprocessed))
876 .field("finished", &self.finished)
877 .finish()
878 }
879}
880
881impl<'a> Iterator for CDataIterator<'a> {
882 type Item = BytesCData<'a>;
883
884 fn next(&mut self) -> Option<BytesCData<'a>> {
885 if self.finished {
886 return None;
887 }
888
889 for gt in memchr::memchr_iter(b'>', self.unprocessed) {
890 if self.unprocessed[..gt].ends_with(b"]]") {
891 let (slice, rest) = self.unprocessed.split_at(gt);
892 self.unprocessed = rest;
893 return Some(BytesCData::wrap(slice, Decoder::utf8()));
894 }
895 }
896
897 self.finished = true;
898 Some(BytesCData::wrap(self.unprocessed, Decoder::utf8()))
899 }
900}
901
902impl FusedIterator for CDataIterator<'_> {}
903
904////////////////////////////////////////////////////////////////////////////////////////////////////
905
906/// [Processing instructions][PI] (PIs) allow documents to contain instructions for applications.
907///
908/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
909/// returns the content of this event between `<?` and `?>`.
910///
911/// Note, that inner text will not contain `?>` sequence inside:
912///
913/// ```
914/// # use quick_xml::events::{BytesPI, Event};
915/// # use quick_xml::reader::Reader;
916/// # use pretty_assertions::assert_eq;
917/// let mut reader = Reader::from_str("<?processing instruction >:-<~ ?>");
918/// let content = "processing instruction >:-<~ ";
919/// let event = BytesPI::new(content);
920///
921/// assert_eq!(reader.read_event().unwrap(), Event::PI(event.borrow()));
922/// // deref coercion of &BytesPI to &[u8]
923/// assert_eq!(&event as &[u8], content.as_bytes());
924/// // AsRef<[u8]> for &T + deref coercion
925/// assert_eq!(event.as_ref(), content.as_bytes());
926/// ```
927///
928/// [PI]: https://www.w3.org/TR/xml11/#sec-pi
929#[derive(Clone, Eq, PartialEq)]
930pub struct BytesPI<'a> {
931 content: BytesStart<'a>,
932}
933
934impl<'a> BytesPI<'a> {
935 /// Creates a new `BytesPI` from a byte sequence in the specified encoding.
936 #[inline]
937 pub(crate) const fn wrap(content: &'a [u8], target_len: usize, decoder: Decoder) -> Self {
938 Self {
939 content: BytesStart::wrap(content, target_len, decoder),
940 }
941 }
942
943 /// Creates a new `BytesPI` from a string.
944 ///
945 /// # Warning
946 ///
947 /// `content` must not contain the `?>` sequence.
948 #[inline]
949 pub fn new<C: Into<Cow<'a, str>>>(content: C) -> Self {
950 let buf = str_cow_to_bytes(content);
951 let name_len = name_len(&buf);
952 Self {
953 content: BytesStart {
954 buf,
955 name_len,
956 decoder: Decoder::utf8(),
957 },
958 }
959 }
960
961 /// Ensures that all data is owned to extend the object's lifetime if
962 /// necessary.
963 #[inline]
964 pub fn into_owned(self) -> BytesPI<'static> {
965 BytesPI {
966 content: self.content.into_owned().into(),
967 }
968 }
969
970 /// Extracts the inner `Cow` from the `BytesPI` event container.
971 #[inline]
972 pub fn into_inner(self) -> Cow<'a, [u8]> {
973 self.content.buf
974 }
975
976 /// Converts the event into a borrowed event.
977 #[inline]
978 pub fn borrow(&self) -> BytesPI {
979 BytesPI {
980 content: self.content.borrow(),
981 }
982 }
983
984 /// A target used to identify the application to which the instruction is directed.
985 ///
986 /// # Example
987 ///
988 /// ```
989 /// # use pretty_assertions::assert_eq;
990 /// use quick_xml::events::BytesPI;
991 ///
992 /// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#);
993 /// assert_eq!(instruction.target(), b"xml-stylesheet");
994 /// ```
995 #[inline]
996 pub fn target(&self) -> &[u8] {
997 self.content.name().0
998 }
999
1000 /// Content of the processing instruction. Contains everything between target
1001 /// name and the end of the instruction. A direct consequence is that the first
1002 /// character is always a space character.
1003 ///
1004 /// # Example
1005 ///
1006 /// ```
1007 /// # use pretty_assertions::assert_eq;
1008 /// use quick_xml::events::BytesPI;
1009 ///
1010 /// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#);
1011 /// assert_eq!(instruction.content(), br#" href="style.css""#);
1012 /// ```
1013 #[inline]
1014 pub fn content(&self) -> &[u8] {
1015 self.content.attributes_raw()
1016 }
1017
1018 /// A view of the processing instructions' content as a list of key-value pairs.
1019 ///
1020 /// Key-value pairs are used in some processing instructions, for example in
1021 /// `<?xml-stylesheet?>`.
1022 ///
1023 /// Returned iterator does not validate attribute values as may required by
1024 /// target's rules. For example, it doesn't check that substring `?>` is not
1025 /// present in the attribute value. That shouldn't be the problem when event
1026 /// is produced by the reader, because reader detects end of processing instruction
1027 /// by the first `?>` sequence, as required by the specification, and therefore
1028 /// this sequence cannot appear inside it.
1029 ///
1030 /// # Example
1031 ///
1032 /// ```
1033 /// # use pretty_assertions::assert_eq;
1034 /// use std::borrow::Cow;
1035 /// use quick_xml::events::attributes::Attribute;
1036 /// use quick_xml::events::BytesPI;
1037 /// use quick_xml::name::QName;
1038 ///
1039 /// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#);
1040 /// for attr in instruction.attributes() {
1041 /// assert_eq!(attr, Ok(Attribute {
1042 /// key: QName(b"href"),
1043 /// value: Cow::Borrowed(b"style.css"),
1044 /// }));
1045 /// }
1046 /// ```
1047 #[inline]
1048 pub fn attributes(&self) -> Attributes {
1049 self.content.attributes()
1050 }
1051}
1052
1053impl<'a> Debug for BytesPI<'a> {
1054 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1055 write!(f, "BytesPI {{ content: ")?;
1056 write_cow_string(f, &self.content.buf)?;
1057 write!(f, " }}")
1058 }
1059}
1060
1061impl<'a> Deref for BytesPI<'a> {
1062 type Target = [u8];
1063
1064 fn deref(&self) -> &[u8] {
1065 &self.content
1066 }
1067}
1068
1069#[cfg(feature = "arbitrary")]
1070impl<'a> arbitrary::Arbitrary<'a> for BytesPI<'a> {
1071 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
1072 Ok(Self::new(<&str>::arbitrary(u)?))
1073 }
1074 fn size_hint(depth: usize) -> (usize, Option<usize>) {
1075 return <&str as arbitrary::Arbitrary>::size_hint(depth);
1076 }
1077}
1078
1079////////////////////////////////////////////////////////////////////////////////////////////////////
1080
1081/// An XML declaration (`Event::Decl`).
1082///
1083/// [W3C XML 1.1 Prolog and Document Type Declaration](http://w3.org/TR/xml11/#sec-prolog-dtd)
1084///
1085/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
1086/// returns the content of this event between `<?` and `?>`.
1087///
1088/// Note, that inner text will not contain `?>` sequence inside:
1089///
1090/// ```
1091/// # use quick_xml::events::{BytesDecl, BytesStart, Event};
1092/// # use quick_xml::reader::Reader;
1093/// # use pretty_assertions::assert_eq;
1094/// let mut reader = Reader::from_str("<?xml version = '1.0' ?>");
1095/// let content = "xml version = '1.0' ";
1096/// let event = BytesDecl::from_start(BytesStart::from_content(content, 3));
1097///
1098/// assert_eq!(reader.read_event().unwrap(), Event::Decl(event.borrow()));
1099/// // deref coercion of &BytesDecl to &[u8]
1100/// assert_eq!(&event as &[u8], content.as_bytes());
1101/// // AsRef<[u8]> for &T + deref coercion
1102/// assert_eq!(event.as_ref(), content.as_bytes());
1103/// ```
1104#[derive(Clone, Debug, Eq, PartialEq)]
1105pub struct BytesDecl<'a> {
1106 content: BytesStart<'a>,
1107}
1108
1109impl<'a> BytesDecl<'a> {
1110 /// Constructs a new `XmlDecl` from the (mandatory) _version_ (should be `1.0` or `1.1`),
1111 /// the optional _encoding_ (e.g., `UTF-8`) and the optional _standalone_ (`yes` or `no`)
1112 /// attribute.
1113 ///
1114 /// Does not escape any of its inputs. Always uses double quotes to wrap the attribute values.
1115 /// The caller is responsible for escaping attribute values. Shouldn't usually be relevant since
1116 /// the double quote character is not allowed in any of the attribute values.
1117 pub fn new(
1118 version: &str,
1119 encoding: Option<&str>,
1120 standalone: Option<&str>,
1121 ) -> BytesDecl<'static> {
1122 // Compute length of the buffer based on supplied attributes
1123 // ' encoding=""' => 12
1124 let encoding_attr_len = if let Some(xs) = encoding {
1125 12 + xs.len()
1126 } else {
1127 0
1128 };
1129 // ' standalone=""' => 14
1130 let standalone_attr_len = if let Some(xs) = standalone {
1131 14 + xs.len()
1132 } else {
1133 0
1134 };
1135 // 'xml version=""' => 14
1136 let mut buf = String::with_capacity(14 + encoding_attr_len + standalone_attr_len);
1137
1138 buf.push_str("xml version=\"");
1139 buf.push_str(version);
1140
1141 if let Some(encoding_val) = encoding {
1142 buf.push_str("\" encoding=\"");
1143 buf.push_str(encoding_val);
1144 }
1145
1146 if let Some(standalone_val) = standalone {
1147 buf.push_str("\" standalone=\"");
1148 buf.push_str(standalone_val);
1149 }
1150 buf.push('"');
1151
1152 BytesDecl {
1153 content: BytesStart::from_content(buf, 3),
1154 }
1155 }
1156
1157 /// Creates a `BytesDecl` from a `BytesStart`
1158 pub const fn from_start(start: BytesStart<'a>) -> Self {
1159 Self { content: start }
1160 }
1161
1162 /// Gets xml version, excluding quotes (`'` or `"`).
1163 ///
1164 /// According to the [grammar], the version *must* be the first thing in the declaration.
1165 /// This method tries to extract the first thing in the declaration and return it.
1166 /// In case of multiple attributes value of the first one is returned.
1167 ///
1168 /// If version is missed in the declaration, or the first thing is not a version,
1169 /// [`IllFormedError::MissingDeclVersion`] will be returned.
1170 ///
1171 /// # Examples
1172 ///
1173 /// ```
1174 /// use quick_xml::errors::{Error, IllFormedError};
1175 /// use quick_xml::events::{BytesDecl, BytesStart};
1176 ///
1177 /// // <?xml version='1.1'?>
1178 /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0));
1179 /// assert_eq!(decl.version().unwrap(), b"1.1".as_ref());
1180 ///
1181 /// // <?xml version='1.0' version='1.1'?>
1182 /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.0' version='1.1'", 0));
1183 /// assert_eq!(decl.version().unwrap(), b"1.0".as_ref());
1184 ///
1185 /// // <?xml encoding='utf-8'?>
1186 /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8'", 0));
1187 /// match decl.version() {
1188 /// Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(key)))) => assert_eq!(key, "encoding"),
1189 /// _ => assert!(false),
1190 /// }
1191 ///
1192 /// // <?xml encoding='utf-8' version='1.1'?>
1193 /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8' version='1.1'", 0));
1194 /// match decl.version() {
1195 /// Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(key)))) => assert_eq!(key, "encoding"),
1196 /// _ => assert!(false),
1197 /// }
1198 ///
1199 /// // <?xml?>
1200 /// let decl = BytesDecl::from_start(BytesStart::from_content("", 0));
1201 /// match decl.version() {
1202 /// Err(Error::IllFormed(IllFormedError::MissingDeclVersion(None))) => {},
1203 /// _ => assert!(false),
1204 /// }
1205 /// ```
1206 ///
1207 /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
1208 pub fn version(&self) -> Result<Cow<[u8]>, Error> {
1209 // The version *must* be the first thing in the declaration.
1210 match self.content.attributes().with_checks(false).next() {
1211 Some(Ok(a)) if a.key.as_ref() == b"version" => Ok(a.value),
1212 // first attribute was not "version"
1213 Some(Ok(a)) => {
1214 let found = from_utf8(a.key.as_ref())
1215 .map_err(|_| IllFormedError::MissingDeclVersion(None))?
1216 .to_string();
1217 Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(
1218 found,
1219 ))))
1220 }
1221 // error parsing attributes
1222 Some(Err(e)) => Err(e.into()),
1223 // no attributes
1224 None => Err(Error::IllFormed(IllFormedError::MissingDeclVersion(None))),
1225 }
1226 }
1227
1228 /// Gets xml encoding, excluding quotes (`'` or `"`).
1229 ///
1230 /// Although according to the [grammar] encoding must appear before `"standalone"`
1231 /// and after `"version"`, this method does not check that. The first occurrence
1232 /// of the attribute will be returned even if there are several. Also, method does
1233 /// not restrict symbols that can forming the encoding, so the returned encoding
1234 /// name may not correspond to the grammar.
1235 ///
1236 /// # Examples
1237 ///
1238 /// ```
1239 /// use std::borrow::Cow;
1240 /// use quick_xml::Error;
1241 /// use quick_xml::events::{BytesDecl, BytesStart};
1242 ///
1243 /// // <?xml version='1.1'?>
1244 /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0));
1245 /// assert!(decl.encoding().is_none());
1246 ///
1247 /// // <?xml encoding='utf-8'?>
1248 /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8'", 0));
1249 /// match decl.encoding() {
1250 /// Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"utf-8"),
1251 /// _ => assert!(false),
1252 /// }
1253 ///
1254 /// // <?xml encoding='something_WRONG' encoding='utf-8'?>
1255 /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='something_WRONG' encoding='utf-8'", 0));
1256 /// match decl.encoding() {
1257 /// Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"something_WRONG"),
1258 /// _ => assert!(false),
1259 /// }
1260 /// ```
1261 ///
1262 /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
1263 pub fn encoding(&self) -> Option<Result<Cow<[u8]>, AttrError>> {
1264 self.content
1265 .try_get_attribute("encoding")
1266 .map(|a| a.map(|a| a.value))
1267 .transpose()
1268 }
1269
1270 /// Gets xml standalone, excluding quotes (`'` or `"`).
1271 ///
1272 /// Although according to the [grammar] standalone flag must appear after `"version"`
1273 /// and `"encoding"`, this method does not check that. The first occurrence of the
1274 /// attribute will be returned even if there are several. Also, method does not
1275 /// restrict symbols that can forming the value, so the returned flag name may not
1276 /// correspond to the grammar.
1277 ///
1278 /// # Examples
1279 ///
1280 /// ```
1281 /// use std::borrow::Cow;
1282 /// use quick_xml::Error;
1283 /// use quick_xml::events::{BytesDecl, BytesStart};
1284 ///
1285 /// // <?xml version='1.1'?>
1286 /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0));
1287 /// assert!(decl.standalone().is_none());
1288 ///
1289 /// // <?xml standalone='yes'?>
1290 /// let decl = BytesDecl::from_start(BytesStart::from_content(" standalone='yes'", 0));
1291 /// match decl.standalone() {
1292 /// Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"yes"),
1293 /// _ => assert!(false),
1294 /// }
1295 ///
1296 /// // <?xml standalone='something_WRONG' encoding='utf-8'?>
1297 /// let decl = BytesDecl::from_start(BytesStart::from_content(" standalone='something_WRONG' encoding='utf-8'", 0));
1298 /// match decl.standalone() {
1299 /// Some(Ok(Cow::Borrowed(flag))) => assert_eq!(flag, b"something_WRONG"),
1300 /// _ => assert!(false),
1301 /// }
1302 /// ```
1303 ///
1304 /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
1305 pub fn standalone(&self) -> Option<Result<Cow<[u8]>, AttrError>> {
1306 self.content
1307 .try_get_attribute("standalone")
1308 .map(|a| a.map(|a| a.value))
1309 .transpose()
1310 }
1311
1312 /// Gets the actual encoding using [_get an encoding_](https://encoding.spec.whatwg.org/#concept-encoding-get)
1313 /// algorithm.
1314 ///
1315 /// If encoding in not known, or `encoding` key was not found, returns `None`.
1316 /// In case of duplicated `encoding` key, encoding, corresponding to the first
1317 /// one, is returned.
1318 #[cfg(feature = "encoding")]
1319 pub fn encoder(&self) -> Option<&'static Encoding> {
1320 self.encoding()
1321 .and_then(|e| e.ok())
1322 .and_then(|e| Encoding::for_label(&e))
1323 }
1324
1325 /// Converts the event into an owned event.
1326 pub fn into_owned(self) -> BytesDecl<'static> {
1327 BytesDecl {
1328 content: self.content.into_owned(),
1329 }
1330 }
1331
1332 /// Converts the event into a borrowed event.
1333 #[inline]
1334 pub fn borrow(&self) -> BytesDecl {
1335 BytesDecl {
1336 content: self.content.borrow(),
1337 }
1338 }
1339}
1340
1341impl<'a> Deref for BytesDecl<'a> {
1342 type Target = [u8];
1343
1344 fn deref(&self) -> &[u8] {
1345 &self.content
1346 }
1347}
1348
1349#[cfg(feature = "arbitrary")]
1350impl<'a> arbitrary::Arbitrary<'a> for BytesDecl<'a> {
1351 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
1352 Ok(Self::new(
1353 <&str>::arbitrary(u)?,
1354 Option::<&str>::arbitrary(u)?,
1355 Option::<&str>::arbitrary(u)?,
1356 ))
1357 }
1358
1359 fn size_hint(depth: usize) -> (usize, Option<usize>) {
1360 return <&str as arbitrary::Arbitrary>::size_hint(depth);
1361 }
1362}
1363
1364////////////////////////////////////////////////////////////////////////////////////////////////////
1365
1366/// Character or general entity reference (`Event::GeneralRef`): `&ref;` or `&#<number>;`.
1367///
1368/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
1369/// returns the content of this event between `&` and `;`:
1370///
1371/// ```
1372/// # use quick_xml::events::{BytesRef, Event};
1373/// # use quick_xml::reader::Reader;
1374/// # use pretty_assertions::assert_eq;
1375/// let mut reader = Reader::from_str(r#"&entity;"#);
1376/// let content = "entity";
1377/// let event = BytesRef::new(content);
1378///
1379/// assert_eq!(reader.read_event().unwrap(), Event::GeneralRef(event.borrow()));
1380/// // deref coercion of &BytesRef to &[u8]
1381/// assert_eq!(&event as &[u8], content.as_bytes());
1382/// // AsRef<[u8]> for &T + deref coercion
1383/// assert_eq!(event.as_ref(), content.as_bytes());
1384/// ```
1385#[derive(Clone, Eq, PartialEq)]
1386pub struct BytesRef<'a> {
1387 content: Cow<'a, [u8]>,
1388 /// Encoding in which the `content` is stored inside the event.
1389 decoder: Decoder,
1390}
1391
1392impl<'a> BytesRef<'a> {
1393 /// Internal constructor, used by `Reader`. Supplies data in reader's encoding
1394 #[inline]
1395 pub(crate) const fn wrap(content: &'a [u8], decoder: Decoder) -> Self {
1396 Self {
1397 content: Cow::Borrowed(content),
1398 decoder,
1399 }
1400 }
1401
1402 /// Creates a new `BytesRef` borrowing a slice.
1403 ///
1404 /// # Warning
1405 ///
1406 /// `name` must be a valid name.
1407 #[inline]
1408 pub fn new<C: Into<Cow<'a, str>>>(name: C) -> Self {
1409 Self {
1410 content: str_cow_to_bytes(name),
1411 decoder: Decoder::utf8(),
1412 }
1413 }
1414
1415 /// Converts the event into an owned event.
1416 pub fn into_owned(self) -> BytesRef<'static> {
1417 BytesRef {
1418 content: Cow::Owned(self.content.into_owned()),
1419 decoder: self.decoder,
1420 }
1421 }
1422
1423 /// Extracts the inner `Cow` from the `BytesRef` event container.
1424 #[inline]
1425 pub fn into_inner(self) -> Cow<'a, [u8]> {
1426 self.content
1427 }
1428
1429 /// Converts the event into a borrowed event.
1430 #[inline]
1431 pub fn borrow(&self) -> BytesRef {
1432 BytesRef {
1433 content: Cow::Borrowed(&self.content),
1434 decoder: self.decoder,
1435 }
1436 }
1437
1438 /// Decodes the content of the event.
1439 ///
1440 /// This will allocate if the value contains any escape sequences or in
1441 /// non-UTF-8 encoding.
1442 pub fn decode(&self) -> Result<Cow<'a, str>, EncodingError> {
1443 self.decoder.decode_cow(&self.content)
1444 }
1445
1446 /// Returns `true` if the specified reference represents the character reference
1447 /// (`&#<number>;`).
1448 ///
1449 /// ```
1450 /// # use quick_xml::events::BytesRef;
1451 /// # use pretty_assertions::assert_eq;
1452 /// assert_eq!(BytesRef::new("#x30").is_char_ref(), true);
1453 /// assert_eq!(BytesRef::new("#49" ).is_char_ref(), true);
1454 /// assert_eq!(BytesRef::new("lt" ).is_char_ref(), false);
1455 /// ```
1456 pub fn is_char_ref(&self) -> bool {
1457 matches!(self.content.first(), Some(b'#'))
1458 }
1459
1460 /// If this reference represents character reference, then resolves it and
1461 /// returns the character, otherwise returns `None`.
1462 ///
1463 /// This method does not check if character is allowed for XML, in other words,
1464 /// well-formedness constraint [WFC: Legal Char] is not enforced.
1465 /// The character `0x0`, however, will return `EscapeError::InvalidCharRef`.
1466 ///
1467 /// ```
1468 /// # use quick_xml::events::BytesRef;
1469 /// # use pretty_assertions::assert_eq;
1470 /// assert_eq!(BytesRef::new("#x30").resolve_char_ref().unwrap(), Some('0'));
1471 /// assert_eq!(BytesRef::new("#49" ).resolve_char_ref().unwrap(), Some('1'));
1472 /// assert_eq!(BytesRef::new("lt" ).resolve_char_ref().unwrap(), None);
1473 /// ```
1474 ///
1475 /// [WFC: Legal Char]: https://www.w3.org/TR/xml11/#wf-Legalchar
1476 pub fn resolve_char_ref(&self) -> Result<Option<char>, Error> {
1477 if let Some(num) = self.decode()?.strip_prefix('#') {
1478 let ch = parse_number(num).map_err(EscapeError::InvalidCharRef)?;
1479 return Ok(Some(ch));
1480 }
1481 Ok(None)
1482 }
1483}
1484
1485impl<'a> Debug for BytesRef<'a> {
1486 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1487 write!(f, "BytesRef {{ content: ")?;
1488 write_cow_string(f, &self.content)?;
1489 write!(f, " }}")
1490 }
1491}
1492
1493impl<'a> Deref for BytesRef<'a> {
1494 type Target = [u8];
1495
1496 fn deref(&self) -> &[u8] {
1497 &self.content
1498 }
1499}
1500
1501#[cfg(feature = "arbitrary")]
1502impl<'a> arbitrary::Arbitrary<'a> for BytesRef<'a> {
1503 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
1504 Ok(Self::new(<&str>::arbitrary(u)?))
1505 }
1506
1507 fn size_hint(depth: usize) -> (usize, Option<usize>) {
1508 <&str as arbitrary::Arbitrary>::size_hint(depth)
1509 }
1510}
1511
1512////////////////////////////////////////////////////////////////////////////////////////////////////
1513
1514/// Event emitted by [`Reader::read_event_into`].
1515///
1516/// [`Reader::read_event_into`]: crate::reader::Reader::read_event_into
1517#[derive(Clone, Debug, Eq, PartialEq)]
1518#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
1519pub enum Event<'a> {
1520 /// Start tag (with attributes) `<tag attr="value">`.
1521 Start(BytesStart<'a>),
1522 /// End tag `</tag>`.
1523 End(BytesEnd<'a>),
1524 /// Empty element tag (with attributes) `<tag attr="value" />`.
1525 Empty(BytesStart<'a>),
1526 /// Escaped character data between tags.
1527 Text(BytesText<'a>),
1528 /// Unescaped character data stored in `<![CDATA[...]]>`.
1529 CData(BytesCData<'a>),
1530 /// Comment `<!-- ... -->`.
1531 Comment(BytesText<'a>),
1532 /// XML declaration `<?xml ...?>`.
1533 Decl(BytesDecl<'a>),
1534 /// Processing instruction `<?...?>`.
1535 PI(BytesPI<'a>),
1536 /// Document type definition data (DTD) stored in `<!DOCTYPE ...>`.
1537 DocType(BytesText<'a>),
1538 /// General reference `&entity;` in the textual data. Can be either an entity
1539 /// reference, or a character reference.
1540 GeneralRef(BytesRef<'a>),
1541 /// End of XML document.
1542 Eof,
1543}
1544
1545impl<'a> Event<'a> {
1546 /// Converts the event to an owned version, untied to the lifetime of
1547 /// buffer used when reading but incurring a new, separate allocation.
1548 pub fn into_owned(self) -> Event<'static> {
1549 match self {
1550 Event::Start(e) => Event::Start(e.into_owned()),
1551 Event::End(e) => Event::End(e.into_owned()),
1552 Event::Empty(e) => Event::Empty(e.into_owned()),
1553 Event::Text(e) => Event::Text(e.into_owned()),
1554 Event::Comment(e) => Event::Comment(e.into_owned()),
1555 Event::CData(e) => Event::CData(e.into_owned()),
1556 Event::Decl(e) => Event::Decl(e.into_owned()),
1557 Event::PI(e) => Event::PI(e.into_owned()),
1558 Event::DocType(e) => Event::DocType(e.into_owned()),
1559 Event::GeneralRef(e) => Event::GeneralRef(e.into_owned()),
1560 Event::Eof => Event::Eof,
1561 }
1562 }
1563
1564 /// Converts the event into a borrowed event.
1565 #[inline]
1566 pub fn borrow(&self) -> Event {
1567 match self {
1568 Event::Start(e) => Event::Start(e.borrow()),
1569 Event::End(e) => Event::End(e.borrow()),
1570 Event::Empty(e) => Event::Empty(e.borrow()),
1571 Event::Text(e) => Event::Text(e.borrow()),
1572 Event::Comment(e) => Event::Comment(e.borrow()),
1573 Event::CData(e) => Event::CData(e.borrow()),
1574 Event::Decl(e) => Event::Decl(e.borrow()),
1575 Event::PI(e) => Event::PI(e.borrow()),
1576 Event::DocType(e) => Event::DocType(e.borrow()),
1577 Event::GeneralRef(e) => Event::GeneralRef(e.borrow()),
1578 Event::Eof => Event::Eof,
1579 }
1580 }
1581}
1582
1583impl<'a> Deref for Event<'a> {
1584 type Target = [u8];
1585
1586 fn deref(&self) -> &[u8] {
1587 match *self {
1588 Event::Start(ref e) | Event::Empty(ref e) => e,
1589 Event::End(ref e) => e,
1590 Event::Text(ref e) => e,
1591 Event::Decl(ref e) => e,
1592 Event::PI(ref e) => e,
1593 Event::CData(ref e) => e,
1594 Event::Comment(ref e) => e,
1595 Event::DocType(ref e) => e,
1596 Event::GeneralRef(ref e) => e,
1597 Event::Eof => &[],
1598 }
1599 }
1600}
1601
1602impl<'a> AsRef<Event<'a>> for Event<'a> {
1603 fn as_ref(&self) -> &Event<'a> {
1604 self
1605 }
1606}
1607
1608////////////////////////////////////////////////////////////////////////////////////////////////////
1609
1610#[inline]
1611fn str_cow_to_bytes<'a, C: Into<Cow<'a, str>>>(content: C) -> Cow<'a, [u8]> {
1612 match content.into() {
1613 Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()),
1614 Cow::Owned(s) => Cow::Owned(s.into_bytes()),
1615 }
1616}
1617
1618fn trim_cow<'a, F>(value: Cow<'a, [u8]>, trim: F) -> Cow<'a, [u8]>
1619where
1620 F: FnOnce(&[u8]) -> &[u8],
1621{
1622 match value {
1623 Cow::Borrowed(bytes) => Cow::Borrowed(trim(bytes)),
1624 Cow::Owned(mut bytes) => {
1625 let trimmed = trim(&bytes);
1626 if trimmed.len() != bytes.len() {
1627 bytes = trimmed.to_vec();
1628 }
1629 Cow::Owned(bytes)
1630 }
1631 }
1632}
1633
1634#[cfg(test)]
1635mod test {
1636 use super::*;
1637 use pretty_assertions::assert_eq;
1638
1639 #[test]
1640 fn bytestart_create() {
1641 let b = BytesStart::new("test");
1642 assert_eq!(b.len(), 4);
1643 assert_eq!(b.name(), QName(b"test"));
1644 }
1645
1646 #[test]
1647 fn bytestart_set_name() {
1648 let mut b = BytesStart::new("test");
1649 assert_eq!(b.len(), 4);
1650 assert_eq!(b.name(), QName(b"test"));
1651 assert_eq!(b.attributes_raw(), b"");
1652 b.push_attribute(("x", "a"));
1653 assert_eq!(b.len(), 10);
1654 assert_eq!(b.attributes_raw(), b" x=\"a\"");
1655 b.set_name(b"g");
1656 assert_eq!(b.len(), 7);
1657 assert_eq!(b.name(), QName(b"g"));
1658 }
1659
1660 #[test]
1661 fn bytestart_clear_attributes() {
1662 let mut b = BytesStart::new("test");
1663 b.push_attribute(("x", "y\"z"));
1664 b.push_attribute(("x", "y\"z"));
1665 b.clear_attributes();
1666 assert!(b.attributes().next().is_none());
1667 assert_eq!(b.len(), 4);
1668 assert_eq!(b.name(), QName(b"test"));
1669 }
1670}