quick_xml/
writer.rs

1//! Contains high-level interface for an events-based XML emitter.
2
3use std::borrow::Cow;
4use std::io::{self, Write};
5
6use crate::encoding::UTF8_BOM;
7use crate::events::{attributes::Attribute, BytesCData, BytesPI, BytesStart, BytesText, Event};
8
9#[cfg(feature = "async-tokio")]
10mod async_tokio;
11
12/// XML writer. Writes XML [`Event`]s to a [`std::io::Write`] or [`tokio::io::AsyncWrite`] implementor.
13#[cfg(feature = "serialize")]
14use {crate::se::SeError, serde::Serialize};
15
16/// XML writer. Writes XML [`Event`]s to a [`std::io::Write`] implementor.
17///
18/// # Examples
19///
20/// ```
21/// # use pretty_assertions::assert_eq;
22/// use quick_xml::events::{Event, BytesEnd, BytesStart};
23/// use quick_xml::reader::Reader;
24/// use quick_xml::writer::Writer;
25/// use std::io::Cursor;
26///
27/// let xml = r#"<this_tag k1="v1" k2="v2"><child>text</child></this_tag>"#;
28/// let mut reader = Reader::from_str(xml);
29/// let mut writer = Writer::new(Cursor::new(Vec::new()));
30/// loop {
31///     match reader.read_event() {
32///         Ok(Event::Start(e)) if e.name().as_ref() == b"this_tag" => {
33///
34///             // crates a new element ... alternatively we could reuse `e` by calling
35///             // `e.into_owned()`
36///             let mut elem = BytesStart::new("my_elem");
37///
38///             // collect existing attributes
39///             elem.extend_attributes(e.attributes().map(|attr| attr.unwrap()));
40///
41///             // copy existing attributes, adds a new my-key="some value" attribute
42///             elem.push_attribute(("my-key", "some value"));
43///
44///             // writes the event to the writer
45///             assert!(writer.write_event(Event::Start(elem)).is_ok());
46///         },
47///         Ok(Event::End(e)) if e.name().as_ref() == b"this_tag" => {
48///             assert!(writer.write_event(Event::End(BytesEnd::new("my_elem"))).is_ok());
49///         },
50///         Ok(Event::Eof) => break,
51///         // we can either move or borrow the event to write, depending on your use-case
52///         Ok(e) => assert!(writer.write_event(e.borrow()).is_ok()),
53///         Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e),
54///     }
55/// }
56///
57/// let result = writer.into_inner().into_inner();
58/// let expected = r#"<my_elem k1="v1" k2="v2" my-key="some value"><child>text</child></my_elem>"#;
59/// assert_eq!(result, expected.as_bytes());
60/// ```
61#[derive(Clone)]
62pub struct Writer<W> {
63    /// underlying writer
64    writer: W,
65    indent: Option<Indentation>,
66}
67
68impl<W> Writer<W> {
69    /// Creates a `Writer` from a generic writer.
70    pub const fn new(inner: W) -> Writer<W> {
71        Writer {
72            writer: inner,
73            indent: None,
74        }
75    }
76
77    /// Creates a `Writer` with configured indents from a generic writer.
78    pub fn new_with_indent(inner: W, indent_char: u8, indent_size: usize) -> Writer<W> {
79        Writer {
80            writer: inner,
81            indent: Some(Indentation::new(indent_char, indent_size)),
82        }
83    }
84
85    /// Consumes this `Writer`, returning the underlying writer.
86    pub fn into_inner(self) -> W {
87        self.writer
88    }
89
90    /// Get a mutable reference to the underlying writer.
91    pub fn get_mut(&mut self) -> &mut W {
92        &mut self.writer
93    }
94
95    /// Get a reference to the underlying writer.
96    pub const fn get_ref(&self) -> &W {
97        &self.writer
98    }
99
100    /// Provides a simple, high-level API for writing XML elements.
101    ///
102    /// Returns an [`ElementWriter`] that simplifies setting attributes and writing
103    /// content inside the element.
104    ///
105    /// # Example
106    ///
107    /// ```
108    /// # use quick_xml::Result;
109    /// # fn main() -> Result<()> {
110    /// use quick_xml::events::{BytesStart, BytesText, Event};
111    /// use quick_xml::writer::Writer;
112    /// use quick_xml::Error;
113    /// use std::io::Cursor;
114    ///
115    /// let mut writer = Writer::new(Cursor::new(Vec::new()));
116    ///
117    /// // writes <tag attr1="value1"/>
118    /// writer.create_element("tag")
119    ///     .with_attribute(("attr1", "value1"))  // chain `with_attribute()` calls to add many attributes
120    ///     .write_empty()?;
121    ///
122    /// // writes <tag attr1="value1" attr2="value2">with some text inside</tag>
123    /// writer.create_element("tag")
124    ///     .with_attributes(vec![("attr1", "value1"), ("attr2", "value2")].into_iter())  // or add attributes from an iterator
125    ///     .write_text_content(BytesText::new("with some text inside"))?;
126    ///
127    /// // writes <tag><fruit quantity="0">apple</fruit><fruit quantity="1">orange</fruit></tag>
128    /// writer.create_element("tag")
129    ///     // We need to provide error type, because it is not named somewhere explicitly
130    ///     .write_inner_content(|writer| {
131    ///         let fruits = ["apple", "orange"];
132    ///         for (quant, item) in fruits.iter().enumerate() {
133    ///             writer
134    ///                 .create_element("fruit")
135    ///                 .with_attribute(("quantity", quant.to_string().as_str()))
136    ///                 .write_text_content(BytesText::new(item))?;
137    ///         }
138    ///         Ok(())
139    ///     })?;
140    /// # Ok(())
141    /// # }
142    /// ```
143    #[must_use]
144    pub fn create_element<'a, N>(&'a mut self, name: N) -> ElementWriter<'a, W>
145    where
146        N: Into<Cow<'a, str>>,
147    {
148        ElementWriter {
149            writer: self,
150            start_tag: BytesStart::new(name),
151            state: AttributeIndent::NoneAttributesWritten,
152            spaces: Vec::new(),
153        }
154    }
155}
156
157impl<W: Write> Writer<W> {
158    /// Write a [Byte-Order-Mark] character to the document.
159    ///
160    /// # Example
161    ///
162    /// ```rust
163    /// # use quick_xml::Result;
164    /// # fn main() -> Result<()> {
165    /// use quick_xml::events::{BytesStart, BytesText, Event};
166    /// use quick_xml::writer::Writer;
167    /// use quick_xml::Error;
168    /// use std::io::Cursor;
169    ///
170    /// let mut buffer = Vec::new();
171    /// let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4);
172    ///
173    /// writer.write_bom()?;
174    /// writer
175    ///     .create_element("empty")
176    ///     .with_attribute(("attr1", "value1"))
177    ///     .write_empty()
178    ///     .expect("failure");
179    ///
180    /// assert_eq!(
181    ///     std::str::from_utf8(&buffer).unwrap(),
182    ///     "\u{FEFF}<empty attr1=\"value1\"/>"
183    /// );
184    /// # Ok(())
185    /// # }
186    /// ```
187    /// [Byte-Order-Mark]: https://unicode.org/faq/utf_bom.html#BOM
188    pub fn write_bom(&mut self) -> io::Result<()> {
189        self.write(UTF8_BOM)
190    }
191
192    /// Writes the given event to the underlying writer.
193    pub fn write_event<'a, E: Into<Event<'a>>>(&mut self, event: E) -> io::Result<()> {
194        let mut next_should_line_break = true;
195        let result = match event.into() {
196            Event::Start(e) => {
197                let result = self.write_wrapped(b"<", &e, b">");
198                if let Some(i) = self.indent.as_mut() {
199                    i.grow();
200                }
201                result
202            }
203            Event::End(e) => {
204                if let Some(i) = self.indent.as_mut() {
205                    i.shrink();
206                }
207                self.write_wrapped(b"</", &e, b">")
208            }
209            Event::Empty(e) => self.write_wrapped(b"<", &e, b"/>"),
210            Event::Text(e) => {
211                next_should_line_break = false;
212                self.write(&e)
213            }
214            Event::Comment(e) => self.write_wrapped(b"<!--", &e, b"-->"),
215            Event::CData(e) => {
216                next_should_line_break = false;
217                self.write(b"<![CDATA[")?;
218                self.write(&e)?;
219                self.write(b"]]>")
220            }
221            Event::Decl(e) => self.write_wrapped(b"<?", &e, b"?>"),
222            Event::PI(e) => self.write_wrapped(b"<?", &e, b"?>"),
223            Event::DocType(e) => self.write_wrapped(b"<!DOCTYPE ", &e, b">"),
224            Event::GeneralRef(e) => self.write_wrapped(b"&", &e, b";"),
225            Event::Eof => Ok(()),
226        };
227        if let Some(i) = self.indent.as_mut() {
228            i.should_line_break = next_should_line_break;
229        }
230        result
231    }
232
233    /// Writes bytes
234    #[inline]
235    pub(crate) fn write(&mut self, value: &[u8]) -> io::Result<()> {
236        self.writer.write_all(value).map_err(Into::into)
237    }
238
239    #[inline]
240    fn write_wrapped(&mut self, before: &[u8], value: &[u8], after: &[u8]) -> io::Result<()> {
241        if let Some(ref i) = self.indent {
242            if i.should_line_break {
243                self.writer.write_all(b"\n")?;
244                self.writer.write_all(i.current())?;
245            }
246        }
247        self.write(before)?;
248        self.write(value)?;
249        self.write(after)?;
250        Ok(())
251    }
252
253    /// Manually write a newline and indentation at the proper level.
254    ///
255    /// This can be used when the heuristic to line break and indent after any
256    /// [`Event`] apart from [`Text`] fails such as when a [`Start`] occurs directly
257    /// after [`Text`].
258    ///
259    /// This method will do nothing if `Writer` was not constructed with [`new_with_indent`].
260    ///
261    /// [`Text`]: Event::Text
262    /// [`Start`]: Event::Start
263    /// [`new_with_indent`]: Self::new_with_indent
264    pub fn write_indent(&mut self) -> io::Result<()> {
265        if let Some(ref i) = self.indent {
266            self.writer.write_all(b"\n")?;
267            self.writer.write_all(i.current())?;
268        }
269        Ok(())
270    }
271
272    /// Write an arbitrary serializable type
273    ///
274    /// Note: If you are attempting to write XML in a non-UTF-8 encoding, this may not
275    /// be safe to use. Rust basic types assume UTF-8 encodings.
276    ///
277    /// ```rust
278    /// # use pretty_assertions::assert_eq;
279    /// # use serde::Serialize;
280    /// # use quick_xml::events::{BytesStart, Event};
281    /// # use quick_xml::writer::Writer;
282    /// # use quick_xml::se::SeError;
283    /// # fn main() -> Result<(), SeError> {
284    /// #[derive(Debug, PartialEq, Serialize)]
285    /// struct MyData {
286    ///     question: String,
287    ///     answer: u32,
288    /// }
289    ///
290    /// let data = MyData {
291    ///     question: "The Ultimate Question of Life, the Universe, and Everything".into(),
292    ///     answer: 42,
293    /// };
294    ///
295    /// let mut buffer = Vec::new();
296    /// let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4);
297    ///
298    /// let start = BytesStart::new("root");
299    /// let end = start.to_end();
300    ///
301    /// writer.write_event(Event::Start(start.clone()))?;
302    /// writer.write_serializable("my_data", &data)?;
303    /// writer.write_event(Event::End(end))?;
304    ///
305    /// assert_eq!(
306    ///     std::str::from_utf8(&buffer)?,
307    ///     r#"<root>
308    ///     <my_data>
309    ///         <question>The Ultimate Question of Life, the Universe, and Everything</question>
310    ///         <answer>42</answer>
311    ///     </my_data>
312    /// </root>"#
313    /// );
314    /// # Ok(())
315    /// # }
316    /// ```
317    #[cfg(feature = "serialize")]
318    pub fn write_serializable<T: Serialize>(
319        &mut self,
320        tag_name: &str,
321        content: &T,
322    ) -> Result<(), SeError> {
323        use crate::se::{Indent, Serializer};
324
325        self.write_indent()?;
326        let mut fmt = ToFmtWrite(&mut self.writer);
327        let mut serializer = Serializer::with_root(&mut fmt, Some(tag_name))?;
328
329        if let Some(indent) = &mut self.indent {
330            serializer.set_indent(Indent::Borrow(indent));
331        }
332
333        content.serialize(serializer)?;
334
335        Ok(())
336    }
337}
338
339/// Track indent inside elements state
340///
341/// ```mermaid
342/// stateDiagram-v2
343///     [*] --> NoneAttributesWritten
344///     NoneAttributesWritten --> Spaces : .with_attribute()
345///     NoneAttributesWritten --> WriteConfigured : .new_line()
346///
347///     Spaces --> Spaces : .with_attribute()
348///     Spaces --> WriteSpaces : .new_line()
349///
350///     WriteSpaces --> Spaces : .with_attribute()
351///     WriteSpaces --> WriteSpaces : .new_line()
352///
353///     Configured --> Configured : .with_attribute()
354///     Configured --> WriteConfigured : .new_line()
355///
356///     WriteConfigured --> Configured : .with_attribute()
357///     WriteConfigured --> WriteConfigured : .new_line()
358/// ```
359#[derive(Debug)]
360enum AttributeIndent {
361    /// Initial state. `ElementWriter` was just created and no attributes written yet
362    NoneAttributesWritten,
363    /// Write specified count of spaces to indent before writing attribute in `with_attribute()`
364    WriteSpaces(usize),
365    /// Keep space indent that should be used if `new_line()` would be called
366    Spaces(usize),
367    /// Write specified count of indent characters before writing attribute in `with_attribute()`
368    WriteConfigured(usize),
369    /// Keep indent that should be used if `new_line()` would be called
370    Configured(usize),
371}
372
373/// A struct to write an element. Contains methods to add attributes and inner
374/// elements to the element
375pub struct ElementWriter<'a, W> {
376    writer: &'a mut Writer<W>,
377    start_tag: BytesStart<'a>,
378    state: AttributeIndent,
379    /// Contains spaces used to write space indents of attributes
380    spaces: Vec<u8>,
381}
382
383impl<'a, W> ElementWriter<'a, W> {
384    /// Adds an attribute to this element.
385    pub fn with_attribute<'b, I>(mut self, attr: I) -> Self
386    where
387        I: Into<Attribute<'b>>,
388    {
389        self.write_attr(attr.into());
390        self
391    }
392
393    /// Add additional attributes to this element using an iterator.
394    ///
395    /// The yielded items must be convertible to [`Attribute`] using `Into`.
396    pub fn with_attributes<'b, I>(mut self, attributes: I) -> Self
397    where
398        I: IntoIterator,
399        I::Item: Into<Attribute<'b>>,
400    {
401        let mut iter = attributes.into_iter();
402        if let Some(attr) = iter.next() {
403            self.write_attr(attr.into());
404            self.start_tag.extend_attributes(iter);
405        }
406        self
407    }
408
409    /// Push a new line inside an element between attributes. Note, that this
410    /// method does nothing if [`Writer`] was created without indentation support.
411    ///
412    /// # Examples
413    ///
414    /// The following code
415    ///
416    /// ```
417    /// # use quick_xml::writer::Writer;
418    /// let mut buffer = Vec::new();
419    /// let mut writer = Writer::new_with_indent(&mut buffer, b' ', 2);
420    /// writer
421    ///   .create_element("element")
422    ///     //.new_line() (1)
423    ///     .with_attribute(("first", "1"))
424    ///     .with_attribute(("second", "2"))
425    ///     .new_line()
426    ///     .with_attributes([
427    ///         ("third", "3"),
428    ///         ("fourth", "4"),
429    ///     ])
430    ///     //.new_line() (2)
431    ///     .write_empty();
432    /// ```
433    /// will produce the following XMLs:
434    /// ```xml
435    /// <!-- result of the code above. Spaces always is used -->
436    /// <element first="1" second="2"
437    ///          third="3" fourth="4"/>
438    ///
439    /// <!-- if uncomment only (1) - indent depends on indentation
440    ///      settings - 2 spaces here -->
441    /// <element
442    ///   first="1" second="2"
443    ///   third="3" fourth="4"/>
444    ///
445    /// <!-- if uncomment only (2). Spaces always is used  -->
446    /// <element first="1" second="2"
447    ///          third="3" fourth="4"
448    /// />
449    /// ```
450    pub fn new_line(mut self) -> Self {
451        if let Some(i) = self.writer.indent.as_mut() {
452            match self.state {
453                // .new_line() called just after .create_element().
454                // Use element indent to additionally indent attributes
455                AttributeIndent::NoneAttributesWritten => {
456                    self.state = AttributeIndent::WriteConfigured(i.indent_size)
457                }
458
459                AttributeIndent::WriteSpaces(_) => {}
460                // .new_line() called when .with_attribute() was called at least once.
461                // The spaces should be used to indent
462                // Plan saved indent
463                AttributeIndent::Spaces(indent) => {
464                    self.state = AttributeIndent::WriteSpaces(indent)
465                }
466
467                AttributeIndent::WriteConfigured(_) => {}
468                // .new_line() called when .with_attribute() was called at least once.
469                // The configured indent characters should be used to indent
470                // Plan saved indent
471                AttributeIndent::Configured(indent) => {
472                    self.state = AttributeIndent::WriteConfigured(indent)
473                }
474            }
475            self.start_tag.push_newline();
476        };
477        self
478    }
479
480    /// Writes attribute and maintain indentation state
481    fn write_attr<'b>(&mut self, attr: Attribute<'b>) {
482        if let Some(i) = self.writer.indent.as_mut() {
483            // Save the indent that we should use next time when .new_line() be called
484            self.state = match self.state {
485                // Neither .new_line() or .with_attribute() yet called
486                // If newline inside attributes will be requested, we should indent them
487                // by the length of tag name and +1 for `<` and +1 for one space
488                AttributeIndent::NoneAttributesWritten => {
489                    self.start_tag.push_attribute(attr);
490                    AttributeIndent::Spaces(self.start_tag.name().as_ref().len() + 2)
491                }
492
493                // Indent was requested by previous call to .new_line(), write it
494                // New line was already written
495                AttributeIndent::WriteSpaces(indent) => {
496                    if self.spaces.len() < indent {
497                        self.spaces.resize(indent, b' ');
498                    }
499                    self.start_tag.push_indent(&self.spaces[..indent]);
500                    self.start_tag.push_attr(attr.into());
501                    AttributeIndent::Spaces(indent)
502                }
503                // .new_line() was not called, but .with_attribute() was.
504                // use the previously calculated indent
505                AttributeIndent::Spaces(indent) => {
506                    self.start_tag.push_attribute(attr);
507                    AttributeIndent::Spaces(indent)
508                }
509
510                // Indent was requested by previous call to .new_line(), write it
511                // New line was already written
512                AttributeIndent::WriteConfigured(indent) => {
513                    self.start_tag.push_indent(i.additional(indent));
514                    self.start_tag.push_attr(attr.into());
515                    AttributeIndent::Configured(indent)
516                }
517                // .new_line() was not called, but .with_attribute() was.
518                // use the previously calculated indent
519                AttributeIndent::Configured(indent) => {
520                    self.start_tag.push_attribute(attr);
521                    AttributeIndent::Configured(indent)
522                }
523            };
524        } else {
525            self.start_tag.push_attribute(attr);
526        }
527    }
528}
529
530impl<'a, W: Write> ElementWriter<'a, W> {
531    /// Write some text inside the current element.
532    pub fn write_text_content(self, text: BytesText) -> io::Result<&'a mut Writer<W>> {
533        self.writer
534            .write_event(Event::Start(self.start_tag.borrow()))?;
535        self.writer.write_event(Event::Text(text))?;
536        self.writer
537            .write_event(Event::End(self.start_tag.to_end()))?;
538        Ok(self.writer)
539    }
540
541    /// Write a CData event `<![CDATA[...]]>` inside the current element.
542    pub fn write_cdata_content(self, text: BytesCData) -> io::Result<&'a mut Writer<W>> {
543        self.writer
544            .write_event(Event::Start(self.start_tag.borrow()))?;
545        self.writer.write_event(Event::CData(text))?;
546        self.writer
547            .write_event(Event::End(self.start_tag.to_end()))?;
548        Ok(self.writer)
549    }
550
551    /// Write a processing instruction `<?...?>` inside the current element.
552    pub fn write_pi_content(self, pi: BytesPI) -> io::Result<&'a mut Writer<W>> {
553        self.writer
554            .write_event(Event::Start(self.start_tag.borrow()))?;
555        self.writer.write_event(Event::PI(pi))?;
556        self.writer
557            .write_event(Event::End(self.start_tag.to_end()))?;
558        Ok(self.writer)
559    }
560
561    /// Write an empty (self-closing) tag.
562    pub fn write_empty(self) -> io::Result<&'a mut Writer<W>> {
563        self.writer.write_event(Event::Empty(self.start_tag))?;
564        Ok(self.writer)
565    }
566
567    /// Create a new scope for writing XML inside the current element.
568    pub fn write_inner_content<F>(self, closure: F) -> io::Result<&'a mut Writer<W>>
569    where
570        F: FnOnce(&mut Writer<W>) -> io::Result<()>,
571    {
572        self.writer
573            .write_event(Event::Start(self.start_tag.borrow()))?;
574        closure(self.writer)?;
575        self.writer
576            .write_event(Event::End(self.start_tag.to_end()))?;
577        Ok(self.writer)
578    }
579}
580#[cfg(feature = "serialize")]
581pub(crate) struct ToFmtWrite<T>(pub T);
582
583#[cfg(feature = "serialize")]
584impl<T> std::fmt::Write for ToFmtWrite<T>
585where
586    T: std::io::Write,
587{
588    fn write_str(&mut self, s: &str) -> std::fmt::Result {
589        self.0.write_all(s.as_bytes()).map_err(|_| std::fmt::Error)
590    }
591}
592
593#[derive(Clone)]
594pub(crate) struct Indentation {
595    /// todo: this is an awkward fit as it has no impact on indentation logic, but it is
596    /// only applicable when an indentation exists. Potentially refactor later
597    should_line_break: bool,
598    /// The character code to be used for indentations (e.g. ` ` or `\t`)
599    indent_char: u8,
600    /// How many instances of the indent character ought to be used for each level of indentation
601    indent_size: usize,
602    /// Used as a cache for the bytes used for indentation
603    indents: Vec<u8>,
604    /// The current amount of indentation
605    current_indent_len: usize,
606}
607
608impl Indentation {
609    pub fn new(indent_char: u8, indent_size: usize) -> Self {
610        Self {
611            should_line_break: false,
612            indent_char,
613            indent_size,
614            indents: vec![indent_char; 128],
615            current_indent_len: 0, // invariant - needs to remain less than indents.len()
616        }
617    }
618
619    /// Increase indentation by one level
620    pub fn grow(&mut self) {
621        self.current_indent_len += self.indent_size;
622        self.ensure(self.current_indent_len);
623    }
624
625    /// Decrease indentation by one level. Do nothing, if level already zero
626    pub fn shrink(&mut self) {
627        self.current_indent_len = self.current_indent_len.saturating_sub(self.indent_size);
628    }
629
630    /// Returns indent string for current level
631    pub fn current(&self) -> &[u8] {
632        &self.indents[..self.current_indent_len]
633    }
634
635    /// Returns indent with current indent plus additional indent
636    pub fn additional(&mut self, additional_indent: usize) -> &[u8] {
637        let new_len = self.current_indent_len + additional_indent;
638        self.ensure(new_len);
639        &self.indents[..new_len]
640    }
641
642    fn ensure(&mut self, new_len: usize) {
643        if self.indents.len() < new_len {
644            self.indents.resize(new_len, self.indent_char);
645        }
646    }
647}