quick_xml/events/mod.rs
1//! Defines zero-copy XML events used throughout this library.
2//!
3//! A XML event often represents part of a XML element.
4//! They occur both during reading and writing and are
5//! usually used with the stream-oriented API.
6//!
7//! For example, the XML element
8//! ```xml
9//! <name attr="value">Inner text</name>
10//! ```
11//! consists of the three events `Start`, `Text` and `End`.
12//! They can also represent other parts in an XML document like the
13//! XML declaration. Each Event usually contains further information,
14//! like the tag name, the attribute or the inner text.
15//!
16//! See [`Event`] for a list of all possible events.
17//!
18//! # Reading
19//! When reading a XML stream, the events are emitted by [`Reader::read_event`]
20//! and [`Reader::read_event_into`]. You must listen
21//! for the different types of events you are interested in.
22//!
23//! See [`Reader`] for further information.
24//!
25//! # Writing
26//! When writing the XML document, you must create the XML element
27//! by constructing the events it consists of and pass them to the writer
28//! sequentially.
29//!
30//! See [`Writer`] for further information.
31//!
32//! [`Reader::read_event`]: crate::reader::Reader::read_event
33//! [`Reader::read_event_into`]: crate::reader::Reader::read_event_into
34//! [`Reader`]: crate::reader::Reader
35//! [`Writer`]: crate::writer::Writer
36//! [`Event`]: crate::events::Event
37
38pub mod attributes;
39
40#[cfg(feature = "encoding")]
41use encoding_rs::Encoding;
42use std::borrow::Cow;
43use std::fmt::{self, Debug, Formatter};
44use std::iter::FusedIterator;
45use std::mem::replace;
46use std::ops::Deref;
47use std::str::from_utf8;
48
49use crate::encoding::{Decoder, EncodingError};
50use crate::errors::{Error, IllFormedError};
51use crate::escape::{
52 escape, minimal_escape, partial_escape, resolve_predefined_entity, unescape_with,
53};
54use crate::name::{LocalName, QName};
55#[cfg(feature = "serialize")]
56use crate::utils::CowRef;
57use crate::utils::{name_len, trim_xml_end, trim_xml_start, write_cow_string, Bytes};
58use attributes::{AttrError, Attribute, Attributes};
59
60/// Opening tag data (`Event::Start`), with optional attributes: `<name attr="value">`.
61///
62/// The name can be accessed using the [`name`] or [`local_name`] methods.
63/// An iterator over the attributes is returned by the [`attributes`] method.
64///
65/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
66/// returns the content of this event between `<` and `>` or `/>`:
67///
68/// ```
69/// # use quick_xml::events::{BytesStart, Event};
70/// # use quick_xml::reader::Reader;
71/// # use pretty_assertions::assert_eq;
72/// // Remember, that \ at the end of string literal strips
73/// // all space characters to the first non-space character
74/// let mut reader = Reader::from_str("\
75/// <element a1 = 'val1' a2=\"val2\" />\
76/// <element a1 = 'val1' a2=\"val2\" >"
77/// );
78/// let content = "element a1 = 'val1' a2=\"val2\" ";
79/// let event = BytesStart::from_content(content, 7);
80///
81/// assert_eq!(reader.read_event().unwrap(), Event::Empty(event.borrow()));
82/// assert_eq!(reader.read_event().unwrap(), Event::Start(event.borrow()));
83/// // deref coercion of &BytesStart to &[u8]
84/// assert_eq!(&event as &[u8], content.as_bytes());
85/// // AsRef<[u8]> for &T + deref coercion
86/// assert_eq!(event.as_ref(), content.as_bytes());
87/// ```
88///
89/// [`name`]: Self::name
90/// [`local_name`]: Self::local_name
91/// [`attributes`]: Self::attributes
92#[derive(Clone, Eq, PartialEq)]
93pub struct BytesStart<'a> {
94 /// content of the element, before any utf8 conversion
95 pub(crate) buf: Cow<'a, [u8]>,
96 /// end of the element name, the name starts at that the start of `buf`
97 pub(crate) name_len: usize,
98}
99
100impl<'a> BytesStart<'a> {
101 /// Internal constructor, used by `Reader`. Supplies data in reader's encoding
102 #[inline]
103 pub(crate) const fn wrap(content: &'a [u8], name_len: usize) -> Self {
104 BytesStart {
105 buf: Cow::Borrowed(content),
106 name_len,
107 }
108 }
109
110 /// Creates a new `BytesStart` from the given name.
111 ///
112 /// # Warning
113 ///
114 /// `name` must be a valid name.
115 #[inline]
116 pub fn new<C: Into<Cow<'a, str>>>(name: C) -> Self {
117 let buf = str_cow_to_bytes(name);
118 BytesStart {
119 name_len: buf.len(),
120 buf,
121 }
122 }
123
124 /// Creates a new `BytesStart` from the given content (name + attributes).
125 ///
126 /// # Warning
127 ///
128 /// `&content[..name_len]` must be a valid name, and the remainder of `content`
129 /// must be correctly-formed attributes. Neither are checked, it is possible
130 /// to generate invalid XML if `content` or `name_len` are incorrect.
131 #[inline]
132 pub fn from_content<C: Into<Cow<'a, str>>>(content: C, name_len: usize) -> Self {
133 BytesStart {
134 buf: str_cow_to_bytes(content),
135 name_len,
136 }
137 }
138
139 /// Converts the event into an owned event.
140 pub fn into_owned(self) -> BytesStart<'static> {
141 BytesStart {
142 buf: Cow::Owned(self.buf.into_owned()),
143 name_len: self.name_len,
144 }
145 }
146
147 /// Converts the event into an owned event without taking ownership of Event
148 pub fn to_owned(&self) -> BytesStart<'static> {
149 BytesStart {
150 buf: Cow::Owned(self.buf.clone().into_owned()),
151 name_len: self.name_len,
152 }
153 }
154
155 /// Converts the event into a borrowed event. Most useful when paired with [`to_end`].
156 ///
157 /// # Example
158 ///
159 /// ```
160 /// use quick_xml::events::{BytesStart, Event};
161 /// # use quick_xml::writer::Writer;
162 /// # use quick_xml::Error;
163 ///
164 /// struct SomeStruct<'a> {
165 /// attrs: BytesStart<'a>,
166 /// // ...
167 /// }
168 /// # impl<'a> SomeStruct<'a> {
169 /// # fn example(&self) -> Result<(), Error> {
170 /// # let mut writer = Writer::new(Vec::new());
171 ///
172 /// writer.write_event(Event::Start(self.attrs.borrow()))?;
173 /// // ...
174 /// writer.write_event(Event::End(self.attrs.to_end()))?;
175 /// # Ok(())
176 /// # }}
177 /// ```
178 ///
179 /// [`to_end`]: Self::to_end
180 pub fn borrow(&self) -> BytesStart {
181 BytesStart {
182 buf: Cow::Borrowed(&self.buf),
183 name_len: self.name_len,
184 }
185 }
186
187 /// Creates new paired close tag
188 #[inline]
189 pub fn to_end(&self) -> BytesEnd {
190 BytesEnd::from(self.name())
191 }
192
193 /// Gets the undecoded raw tag name, as present in the input stream.
194 #[inline]
195 pub fn name(&self) -> QName {
196 QName(&self.buf[..self.name_len])
197 }
198
199 /// Gets the undecoded raw local tag name (excluding namespace) as present
200 /// in the input stream.
201 ///
202 /// All content up to and including the first `:` character is removed from the tag name.
203 #[inline]
204 pub fn local_name(&self) -> LocalName {
205 self.name().into()
206 }
207
208 /// Edit the name of the BytesStart in-place
209 ///
210 /// # Warning
211 ///
212 /// `name` must be a valid name.
213 pub fn set_name(&mut self, name: &[u8]) -> &mut BytesStart<'a> {
214 let bytes = self.buf.to_mut();
215 bytes.splice(..self.name_len, name.iter().cloned());
216 self.name_len = name.len();
217 self
218 }
219
220 /// Gets the undecoded raw tag name, as present in the input stream, which
221 /// is borrowed either to the input, or to the event.
222 ///
223 /// # Lifetimes
224 ///
225 /// - `'a`: Lifetime of the input data from which this event is borrow
226 /// - `'e`: Lifetime of the concrete event instance
227 // TODO: We should made this is a part of public API, but with safe wrapped for a name
228 #[cfg(feature = "serialize")]
229 pub(crate) fn raw_name<'e>(&'e self) -> CowRef<'a, 'e, [u8]> {
230 match self.buf {
231 Cow::Borrowed(b) => CowRef::Input(&b[..self.name_len]),
232 Cow::Owned(ref o) => CowRef::Slice(&o[..self.name_len]),
233 }
234 }
235}
236
237/// Attribute-related methods
238impl<'a> BytesStart<'a> {
239 /// Consumes `self` and yield a new `BytesStart` with additional attributes from an iterator.
240 ///
241 /// The yielded items must be convertible to [`Attribute`] using `Into`.
242 pub fn with_attributes<'b, I>(mut self, attributes: I) -> Self
243 where
244 I: IntoIterator,
245 I::Item: Into<Attribute<'b>>,
246 {
247 self.extend_attributes(attributes);
248 self
249 }
250
251 /// Add additional attributes to this tag using an iterator.
252 ///
253 /// The yielded items must be convertible to [`Attribute`] using `Into`.
254 pub fn extend_attributes<'b, I>(&mut self, attributes: I) -> &mut BytesStart<'a>
255 where
256 I: IntoIterator,
257 I::Item: Into<Attribute<'b>>,
258 {
259 for attr in attributes {
260 self.push_attribute(attr);
261 }
262 self
263 }
264
265 /// Adds an attribute to this element.
266 pub fn push_attribute<'b, A>(&mut self, attr: A)
267 where
268 A: Into<Attribute<'b>>,
269 {
270 self.buf.to_mut().push(b' ');
271 self.push_attr(attr.into());
272 }
273
274 /// Remove all attributes from the ByteStart
275 pub fn clear_attributes(&mut self) -> &mut BytesStart<'a> {
276 self.buf.to_mut().truncate(self.name_len);
277 self
278 }
279
280 /// Returns an iterator over the attributes of this tag.
281 pub fn attributes(&self) -> Attributes {
282 Attributes::wrap(&self.buf, self.name_len, false)
283 }
284
285 /// Returns an iterator over the HTML-like attributes of this tag (no mandatory quotes or `=`).
286 pub fn html_attributes(&self) -> Attributes {
287 Attributes::wrap(&self.buf, self.name_len, true)
288 }
289
290 /// Gets the undecoded raw string with the attributes of this tag as a `&[u8]`,
291 /// including the whitespace after the tag name if there is any.
292 #[inline]
293 pub fn attributes_raw(&self) -> &[u8] {
294 &self.buf[self.name_len..]
295 }
296
297 /// Try to get an attribute
298 pub fn try_get_attribute<N: AsRef<[u8]> + Sized>(
299 &'a self,
300 attr_name: N,
301 ) -> Result<Option<Attribute<'a>>, AttrError> {
302 for a in self.attributes().with_checks(false) {
303 let a = a?;
304 if a.key.as_ref() == attr_name.as_ref() {
305 return Ok(Some(a));
306 }
307 }
308 Ok(None)
309 }
310
311 /// Adds an attribute to this element.
312 pub(crate) fn push_attr<'b>(&mut self, attr: Attribute<'b>) {
313 let bytes = self.buf.to_mut();
314 bytes.extend_from_slice(attr.key.as_ref());
315 bytes.extend_from_slice(b"=\"");
316 // FIXME: need to escape attribute content
317 bytes.extend_from_slice(attr.value.as_ref());
318 bytes.push(b'"');
319 }
320
321 /// Adds new line in existing element
322 pub(crate) fn push_newline(&mut self) {
323 self.buf.to_mut().push(b'\n');
324 }
325
326 /// Adds indentation bytes in existing element
327 pub(crate) fn push_indent(&mut self, indent: &[u8]) {
328 self.buf.to_mut().extend_from_slice(indent);
329 }
330}
331
332impl<'a> Debug for BytesStart<'a> {
333 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
334 write!(f, "BytesStart {{ buf: ")?;
335 write_cow_string(f, &self.buf)?;
336 write!(f, ", name_len: {} }}", self.name_len)
337 }
338}
339
340impl<'a> Deref for BytesStart<'a> {
341 type Target = [u8];
342
343 fn deref(&self) -> &[u8] {
344 &self.buf
345 }
346}
347
348impl<'a> From<QName<'a>> for BytesStart<'a> {
349 #[inline]
350 fn from(name: QName<'a>) -> Self {
351 let name = name.into_inner();
352 Self::wrap(name, name.len())
353 }
354}
355
356#[cfg(feature = "arbitrary")]
357impl<'a> arbitrary::Arbitrary<'a> for BytesStart<'a> {
358 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
359 let s = <&str>::arbitrary(u)?;
360 if s.is_empty() || !s.chars().all(char::is_alphanumeric) {
361 return Err(arbitrary::Error::IncorrectFormat);
362 }
363 let mut result = Self::new(s);
364 result.extend_attributes(Vec::<(&str, &str)>::arbitrary(u)?.into_iter());
365 Ok(result)
366 }
367
368 fn size_hint(depth: usize) -> (usize, Option<usize>) {
369 return <&str as arbitrary::Arbitrary>::size_hint(depth);
370 }
371}
372////////////////////////////////////////////////////////////////////////////////////////////////////
373
374/// Closing tag data (`Event::End`): `</name>`.
375///
376/// The name can be accessed using the [`name`] or [`local_name`] methods.
377///
378/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
379/// returns the content of this event between `</` and `>`.
380///
381/// Note, that inner text will not contain `>` character inside:
382///
383/// ```
384/// # use quick_xml::events::{BytesEnd, Event};
385/// # use quick_xml::reader::Reader;
386/// # use pretty_assertions::assert_eq;
387/// let mut reader = Reader::from_str(r#"<element></element a1 = 'val1' a2="val2" >"#);
388/// // Note, that this entire string considered as a .name()
389/// let content = "element a1 = 'val1' a2=\"val2\" ";
390/// let event = BytesEnd::new(content);
391///
392/// reader.config_mut().trim_markup_names_in_closing_tags = false;
393/// reader.config_mut().check_end_names = false;
394/// reader.read_event().unwrap(); // Skip `<element>`
395///
396/// assert_eq!(reader.read_event().unwrap(), Event::End(event.borrow()));
397/// assert_eq!(event.name().as_ref(), content.as_bytes());
398/// // deref coercion of &BytesEnd to &[u8]
399/// assert_eq!(&event as &[u8], content.as_bytes());
400/// // AsRef<[u8]> for &T + deref coercion
401/// assert_eq!(event.as_ref(), content.as_bytes());
402/// ```
403///
404/// [`name`]: Self::name
405/// [`local_name`]: Self::local_name
406#[derive(Clone, Eq, PartialEq)]
407pub struct BytesEnd<'a> {
408 name: Cow<'a, [u8]>,
409}
410
411impl<'a> BytesEnd<'a> {
412 /// Internal constructor, used by `Reader`. Supplies data in reader's encoding
413 #[inline]
414 pub(crate) const fn wrap(name: Cow<'a, [u8]>) -> Self {
415 BytesEnd { name }
416 }
417
418 /// Creates a new `BytesEnd` borrowing a slice.
419 ///
420 /// # Warning
421 ///
422 /// `name` must be a valid name.
423 #[inline]
424 pub fn new<C: Into<Cow<'a, str>>>(name: C) -> Self {
425 Self::wrap(str_cow_to_bytes(name))
426 }
427
428 /// Converts the event into an owned event.
429 pub fn into_owned(self) -> BytesEnd<'static> {
430 BytesEnd {
431 name: Cow::Owned(self.name.into_owned()),
432 }
433 }
434
435 /// Converts the event into a borrowed event.
436 #[inline]
437 pub fn borrow(&self) -> BytesEnd {
438 BytesEnd {
439 name: Cow::Borrowed(&self.name),
440 }
441 }
442
443 /// Gets the undecoded raw tag name, as present in the input stream.
444 #[inline]
445 pub fn name(&self) -> QName {
446 QName(&self.name)
447 }
448
449 /// Gets the undecoded raw local tag name (excluding namespace) as present
450 /// in the input stream.
451 ///
452 /// All content up to and including the first `:` character is removed from the tag name.
453 #[inline]
454 pub fn local_name(&self) -> LocalName {
455 self.name().into()
456 }
457}
458
459impl<'a> Debug for BytesEnd<'a> {
460 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
461 write!(f, "BytesEnd {{ name: ")?;
462 write_cow_string(f, &self.name)?;
463 write!(f, " }}")
464 }
465}
466
467impl<'a> Deref for BytesEnd<'a> {
468 type Target = [u8];
469
470 fn deref(&self) -> &[u8] {
471 &self.name
472 }
473}
474
475impl<'a> From<QName<'a>> for BytesEnd<'a> {
476 #[inline]
477 fn from(name: QName<'a>) -> Self {
478 Self::wrap(name.into_inner().into())
479 }
480}
481
482#[cfg(feature = "arbitrary")]
483impl<'a> arbitrary::Arbitrary<'a> for BytesEnd<'a> {
484 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
485 Ok(Self::new(<&str>::arbitrary(u)?))
486 }
487 fn size_hint(depth: usize) -> (usize, Option<usize>) {
488 return <&str as arbitrary::Arbitrary>::size_hint(depth);
489 }
490}
491
492////////////////////////////////////////////////////////////////////////////////////////////////////
493
494/// Data from various events (most notably, `Event::Text`) that stored in XML
495/// in escaped form. Internally data is stored in escaped form.
496///
497/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
498/// returns the content of this event. In case of comment this is everything
499/// between `<!--` and `-->` and the text of comment will not contain `-->` inside.
500/// In case of DTD this is everything between `<!DOCTYPE` + spaces and closing `>`
501/// (i.e. in case of DTD the first character is never space):
502///
503/// ```
504/// # use quick_xml::events::{BytesText, Event};
505/// # use quick_xml::reader::Reader;
506/// # use pretty_assertions::assert_eq;
507/// // Remember, that \ at the end of string literal strips
508/// // all space characters to the first non-space character
509/// let mut reader = Reader::from_str("\
510/// <!DOCTYPE comment or text >\
511/// comment or text \
512/// <!--comment or text -->"
513/// );
514/// let content = "comment or text ";
515/// let event = BytesText::new(content);
516///
517/// assert_eq!(reader.read_event().unwrap(), Event::DocType(event.borrow()));
518/// assert_eq!(reader.read_event().unwrap(), Event::Text(event.borrow()));
519/// assert_eq!(reader.read_event().unwrap(), Event::Comment(event.borrow()));
520/// // deref coercion of &BytesText to &[u8]
521/// assert_eq!(&event as &[u8], content.as_bytes());
522/// // AsRef<[u8]> for &T + deref coercion
523/// assert_eq!(event.as_ref(), content.as_bytes());
524/// ```
525#[derive(Clone, Eq, PartialEq)]
526pub struct BytesText<'a> {
527 /// Escaped then encoded content of the event. Content is encoded in the XML
528 /// document encoding when event comes from the reader and should be in the
529 /// document encoding when event passed to the writer
530 content: Cow<'a, [u8]>,
531 /// Encoding in which the `content` is stored inside the event
532 decoder: Decoder,
533}
534
535impl<'a> BytesText<'a> {
536 /// Creates a new `BytesText` from an escaped byte sequence in the specified encoding.
537 #[inline]
538 pub(crate) fn wrap<C: Into<Cow<'a, [u8]>>>(content: C, decoder: Decoder) -> Self {
539 Self {
540 content: content.into(),
541 decoder,
542 }
543 }
544
545 /// Creates a new `BytesText` from an escaped string.
546 #[inline]
547 pub fn from_escaped<C: Into<Cow<'a, str>>>(content: C) -> Self {
548 Self::wrap(str_cow_to_bytes(content), Decoder::utf8())
549 }
550
551 /// Creates a new `BytesText` from a string. The string is expected not to
552 /// be escaped.
553 #[inline]
554 pub fn new(content: &'a str) -> Self {
555 Self::from_escaped(escape(content))
556 }
557
558 /// Ensures that all data is owned to extend the object's lifetime if
559 /// necessary.
560 #[inline]
561 pub fn into_owned(self) -> BytesText<'static> {
562 BytesText {
563 content: self.content.into_owned().into(),
564 decoder: self.decoder,
565 }
566 }
567
568 /// Extracts the inner `Cow` from the `BytesText` event container.
569 #[inline]
570 pub fn into_inner(self) -> Cow<'a, [u8]> {
571 self.content
572 }
573
574 /// Converts the event into a borrowed event.
575 #[inline]
576 pub fn borrow(&self) -> BytesText {
577 BytesText {
578 content: Cow::Borrowed(&self.content),
579 decoder: self.decoder,
580 }
581 }
582
583 /// Decodes then unescapes the content of the event.
584 ///
585 /// This will allocate if the value contains any escape sequences or in
586 /// non-UTF-8 encoding.
587 pub fn unescape(&self) -> Result<Cow<'a, str>, Error> {
588 self.unescape_with(resolve_predefined_entity)
589 }
590
591 /// Decodes then unescapes the content of the event with custom entities.
592 ///
593 /// This will allocate if the value contains any escape sequences or in
594 /// non-UTF-8 encoding.
595 pub fn unescape_with<'entity>(
596 &self,
597 resolve_entity: impl FnMut(&str) -> Option<&'entity str>,
598 ) -> Result<Cow<'a, str>, Error> {
599 let decoded = self.decoder.decode_cow(&self.content)?;
600
601 match unescape_with(&decoded, resolve_entity)? {
602 // Because result is borrowed, no replacements was done and we can use original string
603 Cow::Borrowed(_) => Ok(decoded),
604 Cow::Owned(s) => Ok(s.into()),
605 }
606 }
607
608 /// Removes leading XML whitespace bytes from text content.
609 ///
610 /// Returns `true` if content is empty after that
611 pub fn inplace_trim_start(&mut self) -> bool {
612 self.content = trim_cow(
613 replace(&mut self.content, Cow::Borrowed(b"")),
614 trim_xml_start,
615 );
616 self.content.is_empty()
617 }
618
619 /// Removes trailing XML whitespace bytes from text content.
620 ///
621 /// Returns `true` if content is empty after that
622 pub fn inplace_trim_end(&mut self) -> bool {
623 self.content = trim_cow(replace(&mut self.content, Cow::Borrowed(b"")), trim_xml_end);
624 self.content.is_empty()
625 }
626}
627
628impl<'a> Debug for BytesText<'a> {
629 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
630 write!(f, "BytesText {{ content: ")?;
631 write_cow_string(f, &self.content)?;
632 write!(f, " }}")
633 }
634}
635
636impl<'a> Deref for BytesText<'a> {
637 type Target = [u8];
638
639 fn deref(&self) -> &[u8] {
640 &self.content
641 }
642}
643
644#[cfg(feature = "arbitrary")]
645impl<'a> arbitrary::Arbitrary<'a> for BytesText<'a> {
646 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
647 let s = <&str>::arbitrary(u)?;
648 if !s.chars().all(char::is_alphanumeric) {
649 return Err(arbitrary::Error::IncorrectFormat);
650 }
651 Ok(Self::new(s))
652 }
653
654 fn size_hint(depth: usize) -> (usize, Option<usize>) {
655 return <&str as arbitrary::Arbitrary>::size_hint(depth);
656 }
657}
658
659////////////////////////////////////////////////////////////////////////////////////////////////////
660
661/// CDATA content contains unescaped data from the reader. If you want to write them as a text,
662/// [convert](Self::escape) it to [`BytesText`].
663///
664/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
665/// returns the content of this event between `<![CDATA[` and `]]>`.
666///
667/// Note, that inner text will not contain `]]>` sequence inside:
668///
669/// ```
670/// # use quick_xml::events::{BytesCData, Event};
671/// # use quick_xml::reader::Reader;
672/// # use pretty_assertions::assert_eq;
673/// let mut reader = Reader::from_str("<![CDATA[ CDATA section ]]>");
674/// let content = " CDATA section ";
675/// let event = BytesCData::new(content);
676///
677/// assert_eq!(reader.read_event().unwrap(), Event::CData(event.borrow()));
678/// // deref coercion of &BytesCData to &[u8]
679/// assert_eq!(&event as &[u8], content.as_bytes());
680/// // AsRef<[u8]> for &T + deref coercion
681/// assert_eq!(event.as_ref(), content.as_bytes());
682/// ```
683#[derive(Clone, Eq, PartialEq)]
684pub struct BytesCData<'a> {
685 content: Cow<'a, [u8]>,
686 /// Encoding in which the `content` is stored inside the event
687 decoder: Decoder,
688}
689
690impl<'a> BytesCData<'a> {
691 /// Creates a new `BytesCData` from a byte sequence in the specified encoding.
692 #[inline]
693 pub(crate) fn wrap<C: Into<Cow<'a, [u8]>>>(content: C, decoder: Decoder) -> Self {
694 Self {
695 content: content.into(),
696 decoder,
697 }
698 }
699
700 /// Creates a new `BytesCData` from a string.
701 ///
702 /// # Warning
703 ///
704 /// `content` must not contain the `]]>` sequence. You can use
705 /// [`BytesCData::escaped`] to escape the content instead.
706 #[inline]
707 pub fn new<C: Into<Cow<'a, str>>>(content: C) -> Self {
708 Self::wrap(str_cow_to_bytes(content), Decoder::utf8())
709 }
710
711 /// Creates an iterator of `BytesCData` from a string.
712 ///
713 /// If a string contains `]]>`, it needs to be split into multiple `CDATA`
714 /// sections, splitting the `]]` and `>` characters, because the CDATA closing
715 /// sequence cannot be escaped. This iterator yields a `BytesCData` instance
716 /// for each of those sections.
717 ///
718 /// # Examples
719 ///
720 /// ```
721 /// # use quick_xml::events::BytesCData;
722 /// # use pretty_assertions::assert_eq;
723 /// let content = "";
724 /// let cdata = BytesCData::escaped(content).collect::<Vec<_>>();
725 /// assert_eq!(cdata, &[BytesCData::new("")]);
726 ///
727 /// let content = "Certain tokens like ]]> can be difficult and <invalid>";
728 /// let cdata = BytesCData::escaped(content).collect::<Vec<_>>();
729 /// assert_eq!(cdata, &[
730 /// BytesCData::new("Certain tokens like ]]"),
731 /// BytesCData::new("> can be difficult and <invalid>"),
732 /// ]);
733 ///
734 /// let content = "foo]]>bar]]>baz]]>quux";
735 /// let cdata = BytesCData::escaped(content).collect::<Vec<_>>();
736 /// assert_eq!(cdata, &[
737 /// BytesCData::new("foo]]"),
738 /// BytesCData::new(">bar]]"),
739 /// BytesCData::new(">baz]]"),
740 /// BytesCData::new(">quux"),
741 /// ]);
742 /// ```
743 #[inline]
744 pub fn escaped(content: &'a str) -> CDataIterator<'a> {
745 CDataIterator {
746 unprocessed: content.as_bytes(),
747 finished: false,
748 }
749 }
750
751 /// Ensures that all data is owned to extend the object's lifetime if
752 /// necessary.
753 #[inline]
754 pub fn into_owned(self) -> BytesCData<'static> {
755 BytesCData {
756 content: self.content.into_owned().into(),
757 decoder: self.decoder,
758 }
759 }
760
761 /// Extracts the inner `Cow` from the `BytesCData` event container.
762 #[inline]
763 pub fn into_inner(self) -> Cow<'a, [u8]> {
764 self.content
765 }
766
767 /// Converts the event into a borrowed event.
768 #[inline]
769 pub fn borrow(&self) -> BytesCData {
770 BytesCData {
771 content: Cow::Borrowed(&self.content),
772 decoder: self.decoder,
773 }
774 }
775
776 /// Converts this CDATA content to an escaped version, that can be written
777 /// as an usual text in XML.
778 ///
779 /// This function performs following replacements:
780 ///
781 /// | Character | Replacement
782 /// |-----------|------------
783 /// | `<` | `<`
784 /// | `>` | `>`
785 /// | `&` | `&`
786 /// | `'` | `'`
787 /// | `"` | `"`
788 pub fn escape(self) -> Result<BytesText<'a>, EncodingError> {
789 let decoded = self.decode()?;
790 Ok(BytesText::wrap(
791 match escape(decoded) {
792 Cow::Borrowed(escaped) => Cow::Borrowed(escaped.as_bytes()),
793 Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
794 },
795 Decoder::utf8(),
796 ))
797 }
798
799 /// Converts this CDATA content to an escaped version, that can be written
800 /// as an usual text in XML.
801 ///
802 /// In XML text content, it is allowed (though not recommended) to leave
803 /// the quote special characters `"` and `'` unescaped.
804 ///
805 /// This function performs following replacements:
806 ///
807 /// | Character | Replacement
808 /// |-----------|------------
809 /// | `<` | `<`
810 /// | `>` | `>`
811 /// | `&` | `&`
812 pub fn partial_escape(self) -> Result<BytesText<'a>, EncodingError> {
813 let decoded = self.decode()?;
814 Ok(BytesText::wrap(
815 match partial_escape(decoded) {
816 Cow::Borrowed(escaped) => Cow::Borrowed(escaped.as_bytes()),
817 Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
818 },
819 Decoder::utf8(),
820 ))
821 }
822
823 /// Converts this CDATA content to an escaped version, that can be written
824 /// as an usual text in XML. This method escapes only those characters that
825 /// must be escaped according to the [specification].
826 ///
827 /// This function performs following replacements:
828 ///
829 /// | Character | Replacement
830 /// |-----------|------------
831 /// | `<` | `<`
832 /// | `&` | `&`
833 ///
834 /// [specification]: https://www.w3.org/TR/xml11/#syntax
835 pub fn minimal_escape(self) -> Result<BytesText<'a>, EncodingError> {
836 let decoded = self.decode()?;
837 Ok(BytesText::wrap(
838 match minimal_escape(decoded) {
839 Cow::Borrowed(escaped) => Cow::Borrowed(escaped.as_bytes()),
840 Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
841 },
842 Decoder::utf8(),
843 ))
844 }
845
846 /// Decodes the raw input byte content of the CDATA section into a string,
847 /// without performing XML entity escaping.
848 ///
849 /// When this event produced by the XML reader, it uses the encoding information
850 /// associated with that reader to interpret the raw bytes contained within this
851 /// CDATA event.
852 pub fn decode(&self) -> Result<Cow<'a, str>, EncodingError> {
853 Ok(self.decoder.decode_cow(&self.content)?)
854 }
855}
856
857impl<'a> Debug for BytesCData<'a> {
858 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
859 write!(f, "BytesCData {{ content: ")?;
860 write_cow_string(f, &self.content)?;
861 write!(f, " }}")
862 }
863}
864
865impl<'a> Deref for BytesCData<'a> {
866 type Target = [u8];
867
868 fn deref(&self) -> &[u8] {
869 &self.content
870 }
871}
872
873#[cfg(feature = "arbitrary")]
874impl<'a> arbitrary::Arbitrary<'a> for BytesCData<'a> {
875 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
876 Ok(Self::new(<&str>::arbitrary(u)?))
877 }
878 fn size_hint(depth: usize) -> (usize, Option<usize>) {
879 return <&str as arbitrary::Arbitrary>::size_hint(depth);
880 }
881}
882
883/// Iterator over `CDATA` sections in a string.
884///
885/// This iterator is created by the [`BytesCData::escaped`] method.
886#[derive(Clone)]
887pub struct CDataIterator<'a> {
888 /// The unprocessed data which should be emitted as `BytesCData` events.
889 /// At each iteration, the processed data is cut from this slice.
890 unprocessed: &'a [u8],
891 finished: bool,
892}
893
894impl<'a> Debug for CDataIterator<'a> {
895 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
896 f.debug_struct("CDataIterator")
897 .field("unprocessed", &Bytes(self.unprocessed))
898 .field("finished", &self.finished)
899 .finish()
900 }
901}
902
903impl<'a> Iterator for CDataIterator<'a> {
904 type Item = BytesCData<'a>;
905
906 fn next(&mut self) -> Option<BytesCData<'a>> {
907 if self.finished {
908 return None;
909 }
910
911 for gt in memchr::memchr_iter(b'>', self.unprocessed) {
912 if self.unprocessed[..gt].ends_with(b"]]") {
913 let (slice, rest) = self.unprocessed.split_at(gt);
914 self.unprocessed = rest;
915 return Some(BytesCData::wrap(slice, Decoder::utf8()));
916 }
917 }
918
919 self.finished = true;
920 Some(BytesCData::wrap(self.unprocessed, Decoder::utf8()))
921 }
922}
923
924impl FusedIterator for CDataIterator<'_> {}
925
926////////////////////////////////////////////////////////////////////////////////////////////////////
927
928/// [Processing instructions][PI] (PIs) allow documents to contain instructions for applications.
929///
930/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
931/// returns the content of this event between `<?` and `?>`.
932///
933/// Note, that inner text will not contain `?>` sequence inside:
934///
935/// ```
936/// # use quick_xml::events::{BytesPI, Event};
937/// # use quick_xml::reader::Reader;
938/// # use pretty_assertions::assert_eq;
939/// let mut reader = Reader::from_str("<?processing instruction >:-<~ ?>");
940/// let content = "processing instruction >:-<~ ";
941/// let event = BytesPI::new(content);
942///
943/// assert_eq!(reader.read_event().unwrap(), Event::PI(event.borrow()));
944/// // deref coercion of &BytesPI to &[u8]
945/// assert_eq!(&event as &[u8], content.as_bytes());
946/// // AsRef<[u8]> for &T + deref coercion
947/// assert_eq!(event.as_ref(), content.as_bytes());
948/// ```
949///
950/// [PI]: https://www.w3.org/TR/xml11/#sec-pi
951#[derive(Clone, Eq, PartialEq)]
952pub struct BytesPI<'a> {
953 content: BytesStart<'a>,
954}
955
956impl<'a> BytesPI<'a> {
957 /// Creates a new `BytesPI` from a byte sequence in the specified encoding.
958 #[inline]
959 pub(crate) const fn wrap(content: &'a [u8], target_len: usize) -> Self {
960 Self {
961 content: BytesStart::wrap(content, target_len),
962 }
963 }
964
965 /// Creates a new `BytesPI` from a string.
966 ///
967 /// # Warning
968 ///
969 /// `content` must not contain the `?>` sequence.
970 #[inline]
971 pub fn new<C: Into<Cow<'a, str>>>(content: C) -> Self {
972 let buf = str_cow_to_bytes(content);
973 let name_len = name_len(&buf);
974 Self {
975 content: BytesStart { buf, name_len },
976 }
977 }
978
979 /// Ensures that all data is owned to extend the object's lifetime if
980 /// necessary.
981 #[inline]
982 pub fn into_owned(self) -> BytesPI<'static> {
983 BytesPI {
984 content: self.content.into_owned().into(),
985 }
986 }
987
988 /// Extracts the inner `Cow` from the `BytesPI` event container.
989 #[inline]
990 pub fn into_inner(self) -> Cow<'a, [u8]> {
991 self.content.buf
992 }
993
994 /// Converts the event into a borrowed event.
995 #[inline]
996 pub fn borrow(&self) -> BytesPI {
997 BytesPI {
998 content: self.content.borrow(),
999 }
1000 }
1001
1002 /// A target used to identify the application to which the instruction is directed.
1003 ///
1004 /// # Example
1005 ///
1006 /// ```
1007 /// # use pretty_assertions::assert_eq;
1008 /// use quick_xml::events::BytesPI;
1009 ///
1010 /// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#);
1011 /// assert_eq!(instruction.target(), b"xml-stylesheet");
1012 /// ```
1013 #[inline]
1014 pub fn target(&self) -> &[u8] {
1015 self.content.name().0
1016 }
1017
1018 /// Content of the processing instruction. Contains everything between target
1019 /// name and the end of the instruction. A direct consequence is that the first
1020 /// character is always a space character.
1021 ///
1022 /// # Example
1023 ///
1024 /// ```
1025 /// # use pretty_assertions::assert_eq;
1026 /// use quick_xml::events::BytesPI;
1027 ///
1028 /// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#);
1029 /// assert_eq!(instruction.content(), br#" href="style.css""#);
1030 /// ```
1031 #[inline]
1032 pub fn content(&self) -> &[u8] {
1033 self.content.attributes_raw()
1034 }
1035
1036 /// A view of the processing instructions' content as a list of key-value pairs.
1037 ///
1038 /// Key-value pairs are used in some processing instructions, for example in
1039 /// `<?xml-stylesheet?>`.
1040 ///
1041 /// Returned iterator does not validate attribute values as may required by
1042 /// target's rules. For example, it doesn't check that substring `?>` is not
1043 /// present in the attribute value. That shouldn't be the problem when event
1044 /// is produced by the reader, because reader detects end of processing instruction
1045 /// by the first `?>` sequence, as required by the specification, and therefore
1046 /// this sequence cannot appear inside it.
1047 ///
1048 /// # Example
1049 ///
1050 /// ```
1051 /// # use pretty_assertions::assert_eq;
1052 /// use std::borrow::Cow;
1053 /// use quick_xml::events::attributes::Attribute;
1054 /// use quick_xml::events::BytesPI;
1055 /// use quick_xml::name::QName;
1056 ///
1057 /// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#);
1058 /// for attr in instruction.attributes() {
1059 /// assert_eq!(attr, Ok(Attribute {
1060 /// key: QName(b"href"),
1061 /// value: Cow::Borrowed(b"style.css"),
1062 /// }));
1063 /// }
1064 /// ```
1065 #[inline]
1066 pub fn attributes(&self) -> Attributes {
1067 self.content.attributes()
1068 }
1069}
1070
1071impl<'a> Debug for BytesPI<'a> {
1072 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
1073 write!(f, "BytesPI {{ content: ")?;
1074 write_cow_string(f, &self.content.buf)?;
1075 write!(f, " }}")
1076 }
1077}
1078
1079impl<'a> Deref for BytesPI<'a> {
1080 type Target = [u8];
1081
1082 fn deref(&self) -> &[u8] {
1083 &self.content
1084 }
1085}
1086
1087#[cfg(feature = "arbitrary")]
1088impl<'a> arbitrary::Arbitrary<'a> for BytesPI<'a> {
1089 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
1090 Ok(Self::new(<&str>::arbitrary(u)?))
1091 }
1092 fn size_hint(depth: usize) -> (usize, Option<usize>) {
1093 return <&str as arbitrary::Arbitrary>::size_hint(depth);
1094 }
1095}
1096
1097////////////////////////////////////////////////////////////////////////////////////////////////////
1098
1099/// An XML declaration (`Event::Decl`).
1100///
1101/// [W3C XML 1.1 Prolog and Document Type Declaration](http://w3.org/TR/xml11/#sec-prolog-dtd)
1102///
1103/// This event implements `Deref<Target = [u8]>`. The `deref()` implementation
1104/// returns the content of this event between `<?` and `?>`.
1105///
1106/// Note, that inner text will not contain `?>` sequence inside:
1107///
1108/// ```
1109/// # use quick_xml::events::{BytesDecl, BytesStart, Event};
1110/// # use quick_xml::reader::Reader;
1111/// # use pretty_assertions::assert_eq;
1112/// let mut reader = Reader::from_str("<?xml version = '1.0' ?>");
1113/// let content = "xml version = '1.0' ";
1114/// let event = BytesDecl::from_start(BytesStart::from_content(content, 3));
1115///
1116/// assert_eq!(reader.read_event().unwrap(), Event::Decl(event.borrow()));
1117/// // deref coercion of &BytesDecl to &[u8]
1118/// assert_eq!(&event as &[u8], content.as_bytes());
1119/// // AsRef<[u8]> for &T + deref coercion
1120/// assert_eq!(event.as_ref(), content.as_bytes());
1121/// ```
1122#[derive(Clone, Debug, Eq, PartialEq)]
1123pub struct BytesDecl<'a> {
1124 content: BytesStart<'a>,
1125}
1126
1127impl<'a> BytesDecl<'a> {
1128 /// Constructs a new `XmlDecl` from the (mandatory) _version_ (should be `1.0` or `1.1`),
1129 /// the optional _encoding_ (e.g., `UTF-8`) and the optional _standalone_ (`yes` or `no`)
1130 /// attribute.
1131 ///
1132 /// Does not escape any of its inputs. Always uses double quotes to wrap the attribute values.
1133 /// The caller is responsible for escaping attribute values. Shouldn't usually be relevant since
1134 /// the double quote character is not allowed in any of the attribute values.
1135 pub fn new(
1136 version: &str,
1137 encoding: Option<&str>,
1138 standalone: Option<&str>,
1139 ) -> BytesDecl<'static> {
1140 // Compute length of the buffer based on supplied attributes
1141 // ' encoding=""' => 12
1142 let encoding_attr_len = if let Some(xs) = encoding {
1143 12 + xs.len()
1144 } else {
1145 0
1146 };
1147 // ' standalone=""' => 14
1148 let standalone_attr_len = if let Some(xs) = standalone {
1149 14 + xs.len()
1150 } else {
1151 0
1152 };
1153 // 'xml version=""' => 14
1154 let mut buf = String::with_capacity(14 + encoding_attr_len + standalone_attr_len);
1155
1156 buf.push_str("xml version=\"");
1157 buf.push_str(version);
1158
1159 if let Some(encoding_val) = encoding {
1160 buf.push_str("\" encoding=\"");
1161 buf.push_str(encoding_val);
1162 }
1163
1164 if let Some(standalone_val) = standalone {
1165 buf.push_str("\" standalone=\"");
1166 buf.push_str(standalone_val);
1167 }
1168 buf.push('"');
1169
1170 BytesDecl {
1171 content: BytesStart::from_content(buf, 3),
1172 }
1173 }
1174
1175 /// Creates a `BytesDecl` from a `BytesStart`
1176 pub const fn from_start(start: BytesStart<'a>) -> Self {
1177 Self { content: start }
1178 }
1179
1180 /// Gets xml version, excluding quotes (`'` or `"`).
1181 ///
1182 /// According to the [grammar], the version *must* be the first thing in the declaration.
1183 /// This method tries to extract the first thing in the declaration and return it.
1184 /// In case of multiple attributes value of the first one is returned.
1185 ///
1186 /// If version is missed in the declaration, or the first thing is not a version,
1187 /// [`IllFormedError::MissingDeclVersion`] will be returned.
1188 ///
1189 /// # Examples
1190 ///
1191 /// ```
1192 /// use quick_xml::errors::{Error, IllFormedError};
1193 /// use quick_xml::events::{BytesDecl, BytesStart};
1194 ///
1195 /// // <?xml version='1.1'?>
1196 /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0));
1197 /// assert_eq!(decl.version().unwrap(), b"1.1".as_ref());
1198 ///
1199 /// // <?xml version='1.0' version='1.1'?>
1200 /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.0' version='1.1'", 0));
1201 /// assert_eq!(decl.version().unwrap(), b"1.0".as_ref());
1202 ///
1203 /// // <?xml encoding='utf-8'?>
1204 /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8'", 0));
1205 /// match decl.version() {
1206 /// Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(key)))) => assert_eq!(key, "encoding"),
1207 /// _ => assert!(false),
1208 /// }
1209 ///
1210 /// // <?xml encoding='utf-8' version='1.1'?>
1211 /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8' version='1.1'", 0));
1212 /// match decl.version() {
1213 /// Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(key)))) => assert_eq!(key, "encoding"),
1214 /// _ => assert!(false),
1215 /// }
1216 ///
1217 /// // <?xml?>
1218 /// let decl = BytesDecl::from_start(BytesStart::from_content("", 0));
1219 /// match decl.version() {
1220 /// Err(Error::IllFormed(IllFormedError::MissingDeclVersion(None))) => {},
1221 /// _ => assert!(false),
1222 /// }
1223 /// ```
1224 ///
1225 /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
1226 pub fn version(&self) -> Result<Cow<[u8]>, Error> {
1227 // The version *must* be the first thing in the declaration.
1228 match self.content.attributes().with_checks(false).next() {
1229 Some(Ok(a)) if a.key.as_ref() == b"version" => Ok(a.value),
1230 // first attribute was not "version"
1231 Some(Ok(a)) => {
1232 let found = from_utf8(a.key.as_ref())
1233 .map_err(|_| IllFormedError::MissingDeclVersion(None))?
1234 .to_string();
1235 Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(
1236 found,
1237 ))))
1238 }
1239 // error parsing attributes
1240 Some(Err(e)) => Err(e.into()),
1241 // no attributes
1242 None => Err(Error::IllFormed(IllFormedError::MissingDeclVersion(None))),
1243 }
1244 }
1245
1246 /// Gets xml encoding, excluding quotes (`'` or `"`).
1247 ///
1248 /// Although according to the [grammar] encoding must appear before `"standalone"`
1249 /// and after `"version"`, this method does not check that. The first occurrence
1250 /// of the attribute will be returned even if there are several. Also, method does
1251 /// not restrict symbols that can forming the encoding, so the returned encoding
1252 /// name may not correspond to the grammar.
1253 ///
1254 /// # Examples
1255 ///
1256 /// ```
1257 /// use std::borrow::Cow;
1258 /// use quick_xml::Error;
1259 /// use quick_xml::events::{BytesDecl, BytesStart};
1260 ///
1261 /// // <?xml version='1.1'?>
1262 /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0));
1263 /// assert!(decl.encoding().is_none());
1264 ///
1265 /// // <?xml encoding='utf-8'?>
1266 /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8'", 0));
1267 /// match decl.encoding() {
1268 /// Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"utf-8"),
1269 /// _ => assert!(false),
1270 /// }
1271 ///
1272 /// // <?xml encoding='something_WRONG' encoding='utf-8'?>
1273 /// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='something_WRONG' encoding='utf-8'", 0));
1274 /// match decl.encoding() {
1275 /// Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"something_WRONG"),
1276 /// _ => assert!(false),
1277 /// }
1278 /// ```
1279 ///
1280 /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
1281 pub fn encoding(&self) -> Option<Result<Cow<[u8]>, AttrError>> {
1282 self.content
1283 .try_get_attribute("encoding")
1284 .map(|a| a.map(|a| a.value))
1285 .transpose()
1286 }
1287
1288 /// Gets xml standalone, excluding quotes (`'` or `"`).
1289 ///
1290 /// Although according to the [grammar] standalone flag must appear after `"version"`
1291 /// and `"encoding"`, this method does not check that. The first occurrence of the
1292 /// attribute will be returned even if there are several. Also, method does not
1293 /// restrict symbols that can forming the value, so the returned flag name may not
1294 /// correspond to the grammar.
1295 ///
1296 /// # Examples
1297 ///
1298 /// ```
1299 /// use std::borrow::Cow;
1300 /// use quick_xml::Error;
1301 /// use quick_xml::events::{BytesDecl, BytesStart};
1302 ///
1303 /// // <?xml version='1.1'?>
1304 /// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0));
1305 /// assert!(decl.standalone().is_none());
1306 ///
1307 /// // <?xml standalone='yes'?>
1308 /// let decl = BytesDecl::from_start(BytesStart::from_content(" standalone='yes'", 0));
1309 /// match decl.standalone() {
1310 /// Some(Ok(Cow::Borrowed(encoding))) => assert_eq!(encoding, b"yes"),
1311 /// _ => assert!(false),
1312 /// }
1313 ///
1314 /// // <?xml standalone='something_WRONG' encoding='utf-8'?>
1315 /// let decl = BytesDecl::from_start(BytesStart::from_content(" standalone='something_WRONG' encoding='utf-8'", 0));
1316 /// match decl.standalone() {
1317 /// Some(Ok(Cow::Borrowed(flag))) => assert_eq!(flag, b"something_WRONG"),
1318 /// _ => assert!(false),
1319 /// }
1320 /// ```
1321 ///
1322 /// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
1323 pub fn standalone(&self) -> Option<Result<Cow<[u8]>, AttrError>> {
1324 self.content
1325 .try_get_attribute("standalone")
1326 .map(|a| a.map(|a| a.value))
1327 .transpose()
1328 }
1329
1330 /// Gets the actual encoding using [_get an encoding_](https://encoding.spec.whatwg.org/#concept-encoding-get)
1331 /// algorithm.
1332 ///
1333 /// If encoding in not known, or `encoding` key was not found, returns `None`.
1334 /// In case of duplicated `encoding` key, encoding, corresponding to the first
1335 /// one, is returned.
1336 #[cfg(feature = "encoding")]
1337 pub fn encoder(&self) -> Option<&'static Encoding> {
1338 self.encoding()
1339 .and_then(|e| e.ok())
1340 .and_then(|e| Encoding::for_label(&e))
1341 }
1342
1343 /// Converts the event into an owned event.
1344 pub fn into_owned(self) -> BytesDecl<'static> {
1345 BytesDecl {
1346 content: self.content.into_owned(),
1347 }
1348 }
1349
1350 /// Converts the event into a borrowed event.
1351 #[inline]
1352 pub fn borrow(&self) -> BytesDecl {
1353 BytesDecl {
1354 content: self.content.borrow(),
1355 }
1356 }
1357}
1358
1359impl<'a> Deref for BytesDecl<'a> {
1360 type Target = [u8];
1361
1362 fn deref(&self) -> &[u8] {
1363 &self.content
1364 }
1365}
1366
1367#[cfg(feature = "arbitrary")]
1368impl<'a> arbitrary::Arbitrary<'a> for BytesDecl<'a> {
1369 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
1370 Ok(Self::new(
1371 <&str>::arbitrary(u)?,
1372 Option::<&str>::arbitrary(u)?,
1373 Option::<&str>::arbitrary(u)?,
1374 ))
1375 }
1376
1377 fn size_hint(depth: usize) -> (usize, Option<usize>) {
1378 return <&str as arbitrary::Arbitrary>::size_hint(depth);
1379 }
1380}
1381
1382////////////////////////////////////////////////////////////////////////////////////////////////////
1383
1384/// Event emitted by [`Reader::read_event_into`].
1385///
1386/// [`Reader::read_event_into`]: crate::reader::Reader::read_event_into
1387#[derive(Clone, Debug, Eq, PartialEq)]
1388#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
1389pub enum Event<'a> {
1390 /// Start tag (with attributes) `<tag attr="value">`.
1391 Start(BytesStart<'a>),
1392 /// End tag `</tag>`.
1393 End(BytesEnd<'a>),
1394 /// Empty element tag (with attributes) `<tag attr="value" />`.
1395 Empty(BytesStart<'a>),
1396 /// Escaped character data between tags.
1397 Text(BytesText<'a>),
1398 /// Unescaped character data stored in `<![CDATA[...]]>`.
1399 CData(BytesCData<'a>),
1400 /// Comment `<!-- ... -->`.
1401 Comment(BytesText<'a>),
1402 /// XML declaration `<?xml ...?>`.
1403 Decl(BytesDecl<'a>),
1404 /// Processing instruction `<?...?>`.
1405 PI(BytesPI<'a>),
1406 /// Document type definition data (DTD) stored in `<!DOCTYPE ...>`.
1407 DocType(BytesText<'a>),
1408 /// End of XML document.
1409 Eof,
1410}
1411
1412impl<'a> Event<'a> {
1413 /// Converts the event to an owned version, untied to the lifetime of
1414 /// buffer used when reading but incurring a new, separate allocation.
1415 pub fn into_owned(self) -> Event<'static> {
1416 match self {
1417 Event::Start(e) => Event::Start(e.into_owned()),
1418 Event::End(e) => Event::End(e.into_owned()),
1419 Event::Empty(e) => Event::Empty(e.into_owned()),
1420 Event::Text(e) => Event::Text(e.into_owned()),
1421 Event::Comment(e) => Event::Comment(e.into_owned()),
1422 Event::CData(e) => Event::CData(e.into_owned()),
1423 Event::Decl(e) => Event::Decl(e.into_owned()),
1424 Event::PI(e) => Event::PI(e.into_owned()),
1425 Event::DocType(e) => Event::DocType(e.into_owned()),
1426 Event::Eof => Event::Eof,
1427 }
1428 }
1429
1430 /// Converts the event into a borrowed event.
1431 #[inline]
1432 pub fn borrow(&self) -> Event {
1433 match self {
1434 Event::Start(e) => Event::Start(e.borrow()),
1435 Event::End(e) => Event::End(e.borrow()),
1436 Event::Empty(e) => Event::Empty(e.borrow()),
1437 Event::Text(e) => Event::Text(e.borrow()),
1438 Event::Comment(e) => Event::Comment(e.borrow()),
1439 Event::CData(e) => Event::CData(e.borrow()),
1440 Event::Decl(e) => Event::Decl(e.borrow()),
1441 Event::PI(e) => Event::PI(e.borrow()),
1442 Event::DocType(e) => Event::DocType(e.borrow()),
1443 Event::Eof => Event::Eof,
1444 }
1445 }
1446}
1447
1448impl<'a> Deref for Event<'a> {
1449 type Target = [u8];
1450
1451 fn deref(&self) -> &[u8] {
1452 match *self {
1453 Event::Start(ref e) | Event::Empty(ref e) => e,
1454 Event::End(ref e) => e,
1455 Event::Text(ref e) => e,
1456 Event::Decl(ref e) => e,
1457 Event::PI(ref e) => e,
1458 Event::CData(ref e) => e,
1459 Event::Comment(ref e) => e,
1460 Event::DocType(ref e) => e,
1461 Event::Eof => &[],
1462 }
1463 }
1464}
1465
1466impl<'a> AsRef<Event<'a>> for Event<'a> {
1467 fn as_ref(&self) -> &Event<'a> {
1468 self
1469 }
1470}
1471
1472////////////////////////////////////////////////////////////////////////////////////////////////////
1473
1474#[inline]
1475fn str_cow_to_bytes<'a, C: Into<Cow<'a, str>>>(content: C) -> Cow<'a, [u8]> {
1476 match content.into() {
1477 Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()),
1478 Cow::Owned(s) => Cow::Owned(s.into_bytes()),
1479 }
1480}
1481
1482fn trim_cow<'a, F>(value: Cow<'a, [u8]>, trim: F) -> Cow<'a, [u8]>
1483where
1484 F: FnOnce(&[u8]) -> &[u8],
1485{
1486 match value {
1487 Cow::Borrowed(bytes) => Cow::Borrowed(trim(bytes)),
1488 Cow::Owned(mut bytes) => {
1489 let trimmed = trim(&bytes);
1490 if trimmed.len() != bytes.len() {
1491 bytes = trimmed.to_vec();
1492 }
1493 Cow::Owned(bytes)
1494 }
1495 }
1496}
1497
1498#[cfg(test)]
1499mod test {
1500 use super::*;
1501 use pretty_assertions::assert_eq;
1502
1503 #[test]
1504 fn bytestart_create() {
1505 let b = BytesStart::new("test");
1506 assert_eq!(b.len(), 4);
1507 assert_eq!(b.name(), QName(b"test"));
1508 }
1509
1510 #[test]
1511 fn bytestart_set_name() {
1512 let mut b = BytesStart::new("test");
1513 assert_eq!(b.len(), 4);
1514 assert_eq!(b.name(), QName(b"test"));
1515 assert_eq!(b.attributes_raw(), b"");
1516 b.push_attribute(("x", "a"));
1517 assert_eq!(b.len(), 10);
1518 assert_eq!(b.attributes_raw(), b" x=\"a\"");
1519 b.set_name(b"g");
1520 assert_eq!(b.len(), 7);
1521 assert_eq!(b.name(), QName(b"g"));
1522 }
1523
1524 #[test]
1525 fn bytestart_clear_attributes() {
1526 let mut b = BytesStart::new("test");
1527 b.push_attribute(("x", "y\"z"));
1528 b.push_attribute(("x", "y\"z"));
1529 b.clear_attributes();
1530 assert!(b.attributes().next().is_none());
1531 assert_eq!(b.len(), 4);
1532 assert_eq!(b.name(), QName(b"test"));
1533 }
1534}