quick_xml/writer.rs
1//! Contains high-level interface for an events-based XML emitter.
2
3use std::borrow::Cow;
4use std::io::{self, Write};
5
6use crate::encoding::UTF8_BOM;
7use crate::events::{attributes::Attribute, BytesCData, BytesPI, BytesStart, BytesText, Event};
8
9#[cfg(feature = "async-tokio")]
10mod async_tokio;
11
12/// XML writer. Writes XML [`Event`]s to a [`std::io::Write`] or [`tokio::io::AsyncWrite`] implementor.
13#[cfg(feature = "serialize")]
14use {crate::se::SeError, serde::Serialize};
15
16/// XML writer. Writes XML [`Event`]s to a [`std::io::Write`] implementor.
17///
18/// # Examples
19///
20/// ```
21/// # use pretty_assertions::assert_eq;
22/// use quick_xml::events::{Event, BytesEnd, BytesStart};
23/// use quick_xml::reader::Reader;
24/// use quick_xml::writer::Writer;
25/// use std::io::Cursor;
26///
27/// let xml = r#"<this_tag k1="v1" k2="v2"><child>text</child></this_tag>"#;
28/// let mut reader = Reader::from_str(xml);
29/// let mut writer = Writer::new(Cursor::new(Vec::new()));
30/// loop {
31/// match reader.read_event() {
32/// Ok(Event::Start(e)) if e.name().as_ref() == b"this_tag" => {
33///
34/// // crates a new element ... alternatively we could reuse `e` by calling
35/// // `e.into_owned()`
36/// let mut elem = BytesStart::new("my_elem");
37///
38/// // collect existing attributes
39/// elem.extend_attributes(e.attributes().map(|attr| attr.unwrap()));
40///
41/// // copy existing attributes, adds a new my-key="some value" attribute
42/// elem.push_attribute(("my-key", "some value"));
43///
44/// // writes the event to the writer
45/// assert!(writer.write_event(Event::Start(elem)).is_ok());
46/// },
47/// Ok(Event::End(e)) if e.name().as_ref() == b"this_tag" => {
48/// assert!(writer.write_event(Event::End(BytesEnd::new("my_elem"))).is_ok());
49/// },
50/// Ok(Event::Eof) => break,
51/// // we can either move or borrow the event to write, depending on your use-case
52/// Ok(e) => assert!(writer.write_event(e.borrow()).is_ok()),
53/// Err(e) => panic!("Error at position {}: {:?}", reader.error_position(), e),
54/// }
55/// }
56///
57/// let result = writer.into_inner().into_inner();
58/// let expected = r#"<my_elem k1="v1" k2="v2" my-key="some value"><child>text</child></my_elem>"#;
59/// assert_eq!(result, expected.as_bytes());
60/// ```
61#[derive(Clone)]
62pub struct Writer<W> {
63 /// underlying writer
64 writer: W,
65 indent: Option<Indentation>,
66}
67
68impl<W> Writer<W> {
69 /// Creates a `Writer` from a generic writer.
70 pub const fn new(inner: W) -> Writer<W> {
71 Writer {
72 writer: inner,
73 indent: None,
74 }
75 }
76
77 /// Creates a `Writer` with configured indents from a generic writer.
78 pub fn new_with_indent(inner: W, indent_char: u8, indent_size: usize) -> Writer<W> {
79 Writer {
80 writer: inner,
81 indent: Some(Indentation::new(indent_char, indent_size)),
82 }
83 }
84
85 /// Consumes this `Writer`, returning the underlying writer.
86 pub fn into_inner(self) -> W {
87 self.writer
88 }
89
90 /// Get a mutable reference to the underlying writer.
91 pub fn get_mut(&mut self) -> &mut W {
92 &mut self.writer
93 }
94
95 /// Get a reference to the underlying writer.
96 pub const fn get_ref(&self) -> &W {
97 &self.writer
98 }
99
100 /// Provides a simple, high-level API for writing XML elements.
101 ///
102 /// Returns an [`ElementWriter`] that simplifies setting attributes and writing
103 /// content inside the element.
104 ///
105 /// # Example
106 ///
107 /// ```
108 /// # use quick_xml::Result;
109 /// # fn main() -> Result<()> {
110 /// use quick_xml::events::{BytesStart, BytesText, Event};
111 /// use quick_xml::writer::Writer;
112 /// use quick_xml::Error;
113 /// use std::io::Cursor;
114 ///
115 /// let mut writer = Writer::new(Cursor::new(Vec::new()));
116 ///
117 /// // writes <tag attr1="value1"/>
118 /// writer.create_element("tag")
119 /// .with_attribute(("attr1", "value1")) // chain `with_attribute()` calls to add many attributes
120 /// .write_empty()?;
121 ///
122 /// // writes <tag attr1="value1" attr2="value2">with some text inside</tag>
123 /// writer.create_element("tag")
124 /// .with_attributes(vec![("attr1", "value1"), ("attr2", "value2")].into_iter()) // or add attributes from an iterator
125 /// .write_text_content(BytesText::new("with some text inside"))?;
126 ///
127 /// // writes <tag><fruit quantity="0">apple</fruit><fruit quantity="1">orange</fruit></tag>
128 /// writer.create_element("tag")
129 /// // We need to provide error type, because it is not named somewhere explicitly
130 /// .write_inner_content(|writer| {
131 /// let fruits = ["apple", "orange"];
132 /// for (quant, item) in fruits.iter().enumerate() {
133 /// writer
134 /// .create_element("fruit")
135 /// .with_attribute(("quantity", quant.to_string().as_str()))
136 /// .write_text_content(BytesText::new(item))?;
137 /// }
138 /// Ok(())
139 /// })?;
140 /// # Ok(())
141 /// # }
142 /// ```
143 #[must_use]
144 pub fn create_element<'a, N>(&'a mut self, name: N) -> ElementWriter<'a, W>
145 where
146 N: Into<Cow<'a, str>>,
147 {
148 ElementWriter {
149 writer: self,
150 start_tag: BytesStart::new(name),
151 state: AttributeIndent::NoneAttributesWritten,
152 spaces: Vec::new(),
153 }
154 }
155}
156
157impl<W: Write> Writer<W> {
158 /// Write a [Byte-Order-Mark] character to the document.
159 ///
160 /// # Example
161 ///
162 /// ```rust
163 /// # use quick_xml::Result;
164 /// # fn main() -> Result<()> {
165 /// use quick_xml::events::{BytesStart, BytesText, Event};
166 /// use quick_xml::writer::Writer;
167 /// use quick_xml::Error;
168 /// use std::io::Cursor;
169 ///
170 /// let mut buffer = Vec::new();
171 /// let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4);
172 ///
173 /// writer.write_bom()?;
174 /// writer
175 /// .create_element("empty")
176 /// .with_attribute(("attr1", "value1"))
177 /// .write_empty()
178 /// .expect("failure");
179 ///
180 /// assert_eq!(
181 /// std::str::from_utf8(&buffer).unwrap(),
182 /// "\u{FEFF}<empty attr1=\"value1\"/>"
183 /// );
184 /// # Ok(())
185 /// # }
186 /// ```
187 /// [Byte-Order-Mark]: https://unicode.org/faq/utf_bom.html#BOM
188 pub fn write_bom(&mut self) -> io::Result<()> {
189 self.write(UTF8_BOM)
190 }
191
192 /// Writes the given event to the underlying writer.
193 pub fn write_event<'a, E: Into<Event<'a>>>(&mut self, event: E) -> io::Result<()> {
194 let mut next_should_line_break = true;
195 let result = match event.into() {
196 Event::Start(e) => {
197 let result = self.write_wrapped(b"<", &e, b">");
198 if let Some(i) = self.indent.as_mut() {
199 i.grow();
200 }
201 result
202 }
203 Event::End(e) => {
204 if let Some(i) = self.indent.as_mut() {
205 i.shrink();
206 }
207 self.write_wrapped(b"</", &e, b">")
208 }
209 Event::Empty(e) => self.write_wrapped(b"<", &e, b"/>"),
210 Event::Text(e) => {
211 next_should_line_break = false;
212 self.write(&e)
213 }
214 Event::Comment(e) => self.write_wrapped(b"<!--", &e, b"-->"),
215 Event::CData(e) => {
216 next_should_line_break = false;
217 self.write(b"<![CDATA[")?;
218 self.write(&e)?;
219 self.write(b"]]>")
220 }
221 Event::Decl(e) => self.write_wrapped(b"<?", &e, b"?>"),
222 Event::PI(e) => self.write_wrapped(b"<?", &e, b"?>"),
223 Event::DocType(e) => self.write_wrapped(b"<!DOCTYPE ", &e, b">"),
224 Event::GeneralRef(e) => self.write_wrapped(b"&", &e, b";"),
225 Event::Eof => Ok(()),
226 };
227 if let Some(i) = self.indent.as_mut() {
228 i.should_line_break = next_should_line_break;
229 }
230 result
231 }
232
233 /// Writes bytes
234 #[inline]
235 pub(crate) fn write(&mut self, value: &[u8]) -> io::Result<()> {
236 self.writer.write_all(value).map_err(Into::into)
237 }
238
239 #[inline]
240 fn write_wrapped(&mut self, before: &[u8], value: &[u8], after: &[u8]) -> io::Result<()> {
241 if let Some(ref i) = self.indent {
242 if i.should_line_break {
243 self.writer.write_all(b"\n")?;
244 self.writer.write_all(i.current())?;
245 }
246 }
247 self.write(before)?;
248 self.write(value)?;
249 self.write(after)?;
250 Ok(())
251 }
252
253 /// Manually write a newline and indentation at the proper level.
254 ///
255 /// This can be used when the heuristic to line break and indent after any
256 /// [`Event`] apart from [`Text`] fails such as when a [`Start`] occurs directly
257 /// after [`Text`].
258 ///
259 /// This method will do nothing if `Writer` was not constructed with [`new_with_indent`].
260 ///
261 /// [`Text`]: Event::Text
262 /// [`Start`]: Event::Start
263 /// [`new_with_indent`]: Self::new_with_indent
264 pub fn write_indent(&mut self) -> io::Result<()> {
265 if let Some(ref i) = self.indent {
266 self.writer.write_all(b"\n")?;
267 self.writer.write_all(i.current())?;
268 }
269 Ok(())
270 }
271
272 /// Write an arbitrary serializable type
273 ///
274 /// Note: If you are attempting to write XML in a non-UTF-8 encoding, this may not
275 /// be safe to use. Rust basic types assume UTF-8 encodings.
276 ///
277 /// ```rust
278 /// # use pretty_assertions::assert_eq;
279 /// # use serde::Serialize;
280 /// # use quick_xml::events::{BytesStart, Event};
281 /// # use quick_xml::writer::Writer;
282 /// # use quick_xml::se::SeError;
283 /// # fn main() -> Result<(), SeError> {
284 /// #[derive(Debug, PartialEq, Serialize)]
285 /// struct MyData {
286 /// question: String,
287 /// answer: u32,
288 /// }
289 ///
290 /// let data = MyData {
291 /// question: "The Ultimate Question of Life, the Universe, and Everything".into(),
292 /// answer: 42,
293 /// };
294 ///
295 /// let mut buffer = Vec::new();
296 /// let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4);
297 ///
298 /// let start = BytesStart::new("root");
299 /// let end = start.to_end();
300 ///
301 /// writer.write_event(Event::Start(start.clone()))?;
302 /// writer.write_serializable("my_data", &data)?;
303 /// writer.write_event(Event::End(end))?;
304 ///
305 /// assert_eq!(
306 /// std::str::from_utf8(&buffer)?,
307 /// r#"<root>
308 /// <my_data>
309 /// <question>The Ultimate Question of Life, the Universe, and Everything</question>
310 /// <answer>42</answer>
311 /// </my_data>
312 /// </root>"#
313 /// );
314 /// # Ok(())
315 /// # }
316 /// ```
317 #[cfg(feature = "serialize")]
318 pub fn write_serializable<T: Serialize>(
319 &mut self,
320 tag_name: &str,
321 content: &T,
322 ) -> Result<(), SeError> {
323 use crate::se::{Indent, Serializer};
324
325 self.write_indent()?;
326 let mut fmt = ToFmtWrite(&mut self.writer);
327 let mut serializer = Serializer::with_root(&mut fmt, Some(tag_name))?;
328
329 if let Some(indent) = &mut self.indent {
330 serializer.set_indent(Indent::Borrow(indent));
331 }
332
333 content.serialize(serializer)?;
334
335 Ok(())
336 }
337}
338
339/// Track indent inside elements state
340///
341/// ```mermaid
342/// stateDiagram-v2
343/// [*] --> NoneAttributesWritten
344/// NoneAttributesWritten --> Spaces : .with_attribute()
345/// NoneAttributesWritten --> WriteConfigured : .new_line()
346///
347/// Spaces --> Spaces : .with_attribute()
348/// Spaces --> WriteSpaces : .new_line()
349///
350/// WriteSpaces --> Spaces : .with_attribute()
351/// WriteSpaces --> WriteSpaces : .new_line()
352///
353/// Configured --> Configured : .with_attribute()
354/// Configured --> WriteConfigured : .new_line()
355///
356/// WriteConfigured --> Configured : .with_attribute()
357/// WriteConfigured --> WriteConfigured : .new_line()
358/// ```
359#[derive(Debug)]
360enum AttributeIndent {
361 /// Initial state. `ElementWriter` was just created and no attributes written yet
362 NoneAttributesWritten,
363 /// Write specified count of spaces to indent before writing attribute in `with_attribute()`
364 WriteSpaces(usize),
365 /// Keep space indent that should be used if `new_line()` would be called
366 Spaces(usize),
367 /// Write specified count of indent characters before writing attribute in `with_attribute()`
368 WriteConfigured(usize),
369 /// Keep indent that should be used if `new_line()` would be called
370 Configured(usize),
371}
372
373/// A struct to write an element. Contains methods to add attributes and inner
374/// elements to the element
375pub struct ElementWriter<'a, W> {
376 writer: &'a mut Writer<W>,
377 start_tag: BytesStart<'a>,
378 state: AttributeIndent,
379 /// Contains spaces used to write space indents of attributes
380 spaces: Vec<u8>,
381}
382
383impl<'a, W> ElementWriter<'a, W> {
384 /// Adds an attribute to this element.
385 pub fn with_attribute<'b, I>(mut self, attr: I) -> Self
386 where
387 I: Into<Attribute<'b>>,
388 {
389 self.write_attr(attr.into());
390 self
391 }
392
393 /// Add additional attributes to this element using an iterator.
394 ///
395 /// The yielded items must be convertible to [`Attribute`] using `Into`.
396 pub fn with_attributes<'b, I>(mut self, attributes: I) -> Self
397 where
398 I: IntoIterator,
399 I::Item: Into<Attribute<'b>>,
400 {
401 let mut iter = attributes.into_iter();
402 if let Some(attr) = iter.next() {
403 self.write_attr(attr.into());
404 self.start_tag.extend_attributes(iter);
405 }
406 self
407 }
408
409 /// Push a new line inside an element between attributes. Note, that this
410 /// method does nothing if [`Writer`] was created without indentation support.
411 ///
412 /// # Examples
413 ///
414 /// The following code
415 ///
416 /// ```
417 /// # use quick_xml::writer::Writer;
418 /// let mut buffer = Vec::new();
419 /// let mut writer = Writer::new_with_indent(&mut buffer, b' ', 2);
420 /// writer
421 /// .create_element("element")
422 /// //.new_line() (1)
423 /// .with_attribute(("first", "1"))
424 /// .with_attribute(("second", "2"))
425 /// .new_line()
426 /// .with_attributes([
427 /// ("third", "3"),
428 /// ("fourth", "4"),
429 /// ])
430 /// //.new_line() (2)
431 /// .write_empty();
432 /// ```
433 /// will produce the following XMLs:
434 /// ```xml
435 /// <!-- result of the code above. Spaces always is used -->
436 /// <element first="1" second="2"
437 /// third="3" fourth="4"/>
438 ///
439 /// <!-- if uncomment only (1) - indent depends on indentation
440 /// settings - 2 spaces here -->
441 /// <element
442 /// first="1" second="2"
443 /// third="3" fourth="4"/>
444 ///
445 /// <!-- if uncomment only (2). Spaces always is used -->
446 /// <element first="1" second="2"
447 /// third="3" fourth="4"
448 /// />
449 /// ```
450 pub fn new_line(mut self) -> Self {
451 if let Some(i) = self.writer.indent.as_mut() {
452 match self.state {
453 // .new_line() called just after .create_element().
454 // Use element indent to additionally indent attributes
455 AttributeIndent::NoneAttributesWritten => {
456 self.state = AttributeIndent::WriteConfigured(i.indent_size)
457 }
458
459 AttributeIndent::WriteSpaces(_) => {}
460 // .new_line() called when .with_attribute() was called at least once.
461 // The spaces should be used to indent
462 // Plan saved indent
463 AttributeIndent::Spaces(indent) => {
464 self.state = AttributeIndent::WriteSpaces(indent)
465 }
466
467 AttributeIndent::WriteConfigured(_) => {}
468 // .new_line() called when .with_attribute() was called at least once.
469 // The configured indent characters should be used to indent
470 // Plan saved indent
471 AttributeIndent::Configured(indent) => {
472 self.state = AttributeIndent::WriteConfigured(indent)
473 }
474 }
475 self.start_tag.push_newline();
476 };
477 self
478 }
479
480 /// Writes attribute and maintain indentation state
481 fn write_attr<'b>(&mut self, attr: Attribute<'b>) {
482 if let Some(i) = self.writer.indent.as_mut() {
483 // Save the indent that we should use next time when .new_line() be called
484 self.state = match self.state {
485 // Neither .new_line() or .with_attribute() yet called
486 // If newline inside attributes will be requested, we should indent them
487 // by the length of tag name and +1 for `<` and +1 for one space
488 AttributeIndent::NoneAttributesWritten => {
489 self.start_tag.push_attribute(attr);
490 AttributeIndent::Spaces(self.start_tag.name().as_ref().len() + 2)
491 }
492
493 // Indent was requested by previous call to .new_line(), write it
494 // New line was already written
495 AttributeIndent::WriteSpaces(indent) => {
496 if self.spaces.len() < indent {
497 self.spaces.resize(indent, b' ');
498 }
499 self.start_tag.push_indent(&self.spaces[..indent]);
500 self.start_tag.push_attr(attr.into());
501 AttributeIndent::Spaces(indent)
502 }
503 // .new_line() was not called, but .with_attribute() was.
504 // use the previously calculated indent
505 AttributeIndent::Spaces(indent) => {
506 self.start_tag.push_attribute(attr);
507 AttributeIndent::Spaces(indent)
508 }
509
510 // Indent was requested by previous call to .new_line(), write it
511 // New line was already written
512 AttributeIndent::WriteConfigured(indent) => {
513 self.start_tag.push_indent(i.additional(indent));
514 self.start_tag.push_attr(attr.into());
515 AttributeIndent::Configured(indent)
516 }
517 // .new_line() was not called, but .with_attribute() was.
518 // use the previously calculated indent
519 AttributeIndent::Configured(indent) => {
520 self.start_tag.push_attribute(attr);
521 AttributeIndent::Configured(indent)
522 }
523 };
524 } else {
525 self.start_tag.push_attribute(attr);
526 }
527 }
528}
529
530impl<'a, W: Write> ElementWriter<'a, W> {
531 /// Write some text inside the current element.
532 pub fn write_text_content(self, text: BytesText) -> io::Result<&'a mut Writer<W>> {
533 self.writer
534 .write_event(Event::Start(self.start_tag.borrow()))?;
535 self.writer.write_event(Event::Text(text))?;
536 self.writer
537 .write_event(Event::End(self.start_tag.to_end()))?;
538 Ok(self.writer)
539 }
540
541 /// Write a CData event `<![CDATA[...]]>` inside the current element.
542 pub fn write_cdata_content(self, text: BytesCData) -> io::Result<&'a mut Writer<W>> {
543 self.writer
544 .write_event(Event::Start(self.start_tag.borrow()))?;
545 self.writer.write_event(Event::CData(text))?;
546 self.writer
547 .write_event(Event::End(self.start_tag.to_end()))?;
548 Ok(self.writer)
549 }
550
551 /// Write a processing instruction `<?...?>` inside the current element.
552 pub fn write_pi_content(self, pi: BytesPI) -> io::Result<&'a mut Writer<W>> {
553 self.writer
554 .write_event(Event::Start(self.start_tag.borrow()))?;
555 self.writer.write_event(Event::PI(pi))?;
556 self.writer
557 .write_event(Event::End(self.start_tag.to_end()))?;
558 Ok(self.writer)
559 }
560
561 /// Write an empty (self-closing) tag.
562 pub fn write_empty(self) -> io::Result<&'a mut Writer<W>> {
563 self.writer.write_event(Event::Empty(self.start_tag))?;
564 Ok(self.writer)
565 }
566
567 /// Create a new scope for writing XML inside the current element.
568 pub fn write_inner_content<F>(self, closure: F) -> io::Result<&'a mut Writer<W>>
569 where
570 F: FnOnce(&mut Writer<W>) -> io::Result<()>,
571 {
572 self.writer
573 .write_event(Event::Start(self.start_tag.borrow()))?;
574 closure(self.writer)?;
575 self.writer
576 .write_event(Event::End(self.start_tag.to_end()))?;
577 Ok(self.writer)
578 }
579}
580#[cfg(feature = "serialize")]
581pub(crate) struct ToFmtWrite<T>(pub T);
582
583#[cfg(feature = "serialize")]
584impl<T> std::fmt::Write for ToFmtWrite<T>
585where
586 T: std::io::Write,
587{
588 fn write_str(&mut self, s: &str) -> std::fmt::Result {
589 self.0.write_all(s.as_bytes()).map_err(|_| std::fmt::Error)
590 }
591}
592
593#[derive(Clone)]
594pub(crate) struct Indentation {
595 /// todo: this is an awkward fit as it has no impact on indentation logic, but it is
596 /// only applicable when an indentation exists. Potentially refactor later
597 should_line_break: bool,
598 /// The character code to be used for indentations (e.g. ` ` or `\t`)
599 indent_char: u8,
600 /// How many instances of the indent character ought to be used for each level of indentation
601 indent_size: usize,
602 /// Used as a cache for the bytes used for indentation
603 indents: Vec<u8>,
604 /// The current amount of indentation
605 current_indent_len: usize,
606}
607
608impl Indentation {
609 pub fn new(indent_char: u8, indent_size: usize) -> Self {
610 Self {
611 should_line_break: false,
612 indent_char,
613 indent_size,
614 indents: vec![indent_char; 128],
615 current_indent_len: 0, // invariant - needs to remain less than indents.len()
616 }
617 }
618
619 /// Increase indentation by one level
620 pub fn grow(&mut self) {
621 self.current_indent_len += self.indent_size;
622 self.ensure(self.current_indent_len);
623 }
624
625 /// Decrease indentation by one level. Do nothing, if level already zero
626 pub fn shrink(&mut self) {
627 self.current_indent_len = self.current_indent_len.saturating_sub(self.indent_size);
628 }
629
630 /// Returns indent string for current level
631 pub fn current(&self) -> &[u8] {
632 &self.indents[..self.current_indent_len]
633 }
634
635 /// Returns indent with current indent plus additional indent
636 pub fn additional(&mut self, additional_indent: usize) -> &[u8] {
637 let new_len = self.current_indent_len + additional_indent;
638 self.ensure(new_len);
639 &self.indents[..new_len]
640 }
641
642 fn ensure(&mut self, new_len: usize) {
643 if self.indents.len() < new_len {
644 self.indents.resize(new_len, self.indent_char);
645 }
646 }
647}