1 // Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 //! Really Bad Markup Language (rbml) is an internal serialization format of rustc.
12 //! This is not intended to be used by users.
14 //! Originally based on the Extensible Binary Markup Language
15 //! (ebml; http://www.matroska.org/technical/specs/rfc/index.html),
16 //! it is now a separate format tuned for the rust object metadata.
20 //! RBML document consists of the tag, length and data.
21 //! The encoded data can contain multiple RBML documents concatenated.
23 //! **Tags** are a hint for the following data.
24 //! Tags are a number from 0x000 to 0xfff, where 0xf0 through 0xff is reserved.
25 //! Tags less than 0xf0 are encoded in one literal byte.
26 //! Tags greater than 0xff are encoded in two big-endian bytes,
27 //! where the tag number is ORed with 0xf000. (E.g. tag 0x123 = `f1 23`)
29 //! **Lengths** encode the length of the following data.
30 //! It is a variable-length unsigned isize, and one of the following forms:
32 //! - `80` through `fe` for lengths up to 0x7e;
33 //! - `40 ff` through `7f ff` for lengths up to 0x3fff;
34 //! - `20 40 00` through `3f ff ff` for lengths up to 0x1fffff;
35 //! - `10 20 00 00` through `1f ff ff ff` for lengths up to 0xfffffff.
37 //! The "overlong" form is allowed so that the length can be encoded
38 //! without the prior knowledge of the encoded data.
39 //! For example, the length 0 can be represented either by `80`, `40 00`,
40 //! `20 00 00` or `10 00 00 00`.
41 //! The encoder tries to minimize the length if possible.
42 //! Also, some predefined tags listed below are so commonly used that
43 //! their lengths are omitted ("implicit length").
45 //! **Data** can be either binary bytes or zero or more nested RBML documents.
46 //! Nested documents cannot overflow, and should be entirely contained
47 //! within a parent document.
51 //! Most RBML tags are defined by the application.
52 //! (For the rust object metadata, see also `rustc::metadata::common`.)
53 //! RBML itself does define a set of predefined tags however,
54 //! intended for the auto-serialization implementation.
56 //! Predefined tags with an implicit length:
58 //! - `U8` (`00`): 1-byte unsigned integer.
59 //! - `U16` (`01`): 2-byte big endian unsigned integer.
60 //! - `U32` (`02`): 4-byte big endian unsigned integer.
61 //! - `U64` (`03`): 8-byte big endian unsigned integer.
62 //! Any of `U*` tags can be used to encode primitive unsigned integer types,
63 //! as long as it is no greater than the actual size.
64 //! For example, `u8` can only be represented via the `U8` tag.
66 //! - `I8` (`04`): 1-byte signed integer.
67 //! - `I16` (`05`): 2-byte big endian signed integer.
68 //! - `I32` (`06`): 4-byte big endian signed integer.
69 //! - `I64` (`07`): 8-byte big endian signed integer.
70 //! Similar to `U*` tags. Always uses two's complement encoding.
72 //! - `Bool` (`08`): 1-byte boolean value, `00` for false and `01` for true.
74 //! - `Char` (`09`): 4-byte big endian Unicode scalar value.
75 //! Surrogate pairs or out-of-bound values are invalid.
77 //! - `F32` (`0a`): 4-byte big endian unsigned integer representing
78 //! IEEE 754 binary32 floating-point format.
79 //! - `F64` (`0b`): 8-byte big endian unsigned integer representing
80 //! IEEE 754 binary64 floating-point format.
82 //! - `Sub8` (`0c`): 1-byte unsigned integer for supplementary information.
83 //! - `Sub32` (`0d`): 4-byte unsigned integer for supplementary information.
84 //! Those two tags normally occur as the first subdocument of certain tags,
85 //! namely `Enum`, `Vec` and `Map`, to provide a variant or size information.
86 //! They can be used interchangeably.
88 //! Predefined tags with an explicit length:
90 //! - `Str` (`10`): A UTF-8-encoded string.
92 //! - `Enum` (`11`): An enum.
93 //! The first subdocument should be `Sub*` tags with a variant ID.
94 //! Subsequent subdocuments, if any, encode variant arguments.
96 //! - `Vec` (`12`): A vector (sequence).
97 //! - `VecElt` (`13`): A vector element.
98 //! The first subdocument should be `Sub*` tags with the number of elements.
99 //! Subsequent subdocuments should be `VecElt` tag per each element.
101 //! - `Map` (`14`): A map (associated array).
102 //! - `MapKey` (`15`): A key part of the map entry.
103 //! - `MapVal` (`16`): A value part of the map entry.
104 //! The first subdocument should be `Sub*` tags with the number of entries.
105 //! Subsequent subdocuments should be an alternating sequence of
106 //! `MapKey` and `MapVal` tags per each entry.
108 //! - `Opaque` (`17`): An opaque, custom-format tag.
109 //! Used to wrap ordinary custom tags or data in the auto-serialized context.
110 //! Rustc typically uses this to encode type informations.
112 //! First 0x20 tags are reserved by RBML; custom tags start at 0x20.
114 #![crate_name = "rbml"]
115 #![unstable(feature = "rustc_private", issue = "27812")]
116 #![crate_type = "rlib"]
117 #![crate_type = "dylib"]
118 #![doc(html_logo_url = "https://www.rust-lang.org/logos/rust-logo-128x128-blk-v2.png",
119 html_favicon_url = "https://doc.rust-lang.org/favicon.ico",
120 html_root_url = "https://doc.rust-lang.org/nightly/",
121 html_playground_url = "https://play.rust-lang.org/",
122 test(attr(deny(warnings))))]
123 #![cfg_attr(not(stage0), deny(warnings))]
125 #![feature(rustc_private)]
126 #![feature(staged_api)]
128 #![cfg_attr(test, feature(test))]
130 extern crate serialize;
133 extern crate serialize as rustc_serialize; // Used by RustcEncodable
144 pub use self::EbmlEncoderTag::*;
145 pub use self::Error::*;
150 /// Common data structures
151 #[derive(Clone, Copy)]
158 impl<'doc> Doc<'doc> {
159 pub fn new(data: &'doc [u8]) -> Doc<'doc> {
167 pub fn get<'a>(&'a self, tag: usize) -> Doc<'a> {
168 reader::get_doc(*self, tag)
171 pub fn is_empty(&self) -> bool {
172 self.start == self.end
175 pub fn as_str_slice<'a>(&'a self) -> &'a str {
176 str::from_utf8(&self.data[self.start..self.end]).unwrap()
179 pub fn as_str(&self) -> String {
180 self.as_str_slice().to_string()
184 pub struct TaggedDoc<'a> {
189 #[derive(Copy, Clone, Debug)]
190 pub enum EbmlEncoderTag {
191 // tags 00..1f are reserved for auto-serialization.
192 // first NUM_IMPLICIT_TAGS tags are implicitly sized and lengths are not encoded.
193 EsU8 = 0x00, // + 1 byte
194 EsU16 = 0x01, // + 2 bytes
195 EsU32 = 0x02, // + 4 bytes
196 EsU64 = 0x03, // + 8 bytes
197 EsI8 = 0x04, // + 1 byte
198 EsI16 = 0x05, // + 2 bytes
199 EsI32 = 0x06, // + 4 bytes
200 EsI64 = 0x07, // + 8 bytes
201 EsBool = 0x08, // + 1 byte
202 EsChar = 0x09, // + 4 bytes
203 EsF32 = 0x0a, // + 4 bytes
204 EsF64 = 0x0b, // + 8 bytes
205 EsSub8 = 0x0c, // + 1 byte
206 EsSub32 = 0x0d, // + 4 bytes
207 // 0x0e and 0x0f are reserved
209 EsEnum = 0x11, // encodes the variant id as the first EsSub*
210 EsVec = 0x12, // encodes the # of elements as the first EsSub*
212 EsMap = 0x14, // encodes the # of pairs as the first EsSub*
218 const NUM_TAGS: usize = 0x1000;
219 const NUM_IMPLICIT_TAGS: usize = 0x0e;
221 #[cfg_attr(rustfmt, rustfmt_skip)]
222 static TAG_IMPLICIT_LEN: [i8; NUM_IMPLICIT_TAGS] = [
236 IoError(std::io::Error),
237 ApplicationError(String),
240 impl fmt::Display for Error {
241 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
242 // FIXME: this should be a more useful display form
243 fmt::Debug::fmt(self, f)
246 // --------------------------------------
252 use std::mem::transmute;
257 use super::{ApplicationError, EsVec, EsMap, EsEnum, EsSub8, EsSub32, EsVecElt, EsMapKey,
258 EsU64, EsU32, EsU16, EsU8, EsI64, EsI32, EsI16, EsI8, EsBool, EsF64, EsF32,
259 EsChar, EsStr, EsMapVal, EsOpaque, EbmlEncoderTag, Doc, TaggedDoc, Error,
260 IntTooBig, InvalidTag, Expected, NUM_IMPLICIT_TAGS, TAG_IMPLICIT_LEN};
262 pub type DecodeResult<T> = Result<T, Error>;
265 macro_rules! try_or {
266 ($e:expr, $r:expr) => (
270 debug!("ignored error: {:?}", e);
277 #[derive(Copy, Clone)]
283 pub fn tag_at(data: &[u8], start: usize) -> DecodeResult<Res> {
284 let v = data[start] as usize;
292 val: ((v & 0xf) << 8) | data[start + 1] as usize,
296 // every tag starting with byte 0xf0 is an overlong form, which is prohibited.
302 fn vuint_at_slow(data: &[u8], start: usize) -> DecodeResult<Res> {
306 val: (a & 0x7f) as usize,
312 val: ((a & 0x3f) as usize) << 8 | (data[start + 1] as usize),
318 val: ((a & 0x1f) as usize) << 16 | (data[start + 1] as usize) << 8 |
319 (data[start + 2] as usize),
325 val: ((a & 0x0f) as usize) << 24 | (data[start + 1] as usize) << 16 |
326 (data[start + 2] as usize) << 8 |
327 (data[start + 3] as usize),
331 Err(IntTooBig(a as usize))
334 pub fn vuint_at(data: &[u8], start: usize) -> DecodeResult<Res> {
335 if data.len() - start < 4 {
336 return vuint_at_slow(data, start);
339 // Lookup table for parsing EBML Element IDs as per
340 // http://ebml.sourceforge.net/specs/ The Element IDs are parsed by
341 // reading a big endian u32 positioned at data[start]. Using the four
342 // most significant bits of the u32 we lookup in the table below how
343 // the element ID should be derived from it.
345 // The table stores tuples (shift, mask) where shift is the number the
346 // u32 should be right shifted with and mask is the value the right
347 // shifted value should be masked with. If for example the most
348 // significant bit is set this means it's a class A ID and the u32
349 // should be right shifted with 24 and masked with 0x7f. Therefore we
350 // store (24, 0x7f) at index 0x8 - 0xF (four bit numbers where the most
351 // significant bit is set).
353 // By storing the number of shifts and masks in a table instead of
354 // checking in order if the most significant bit is set, the second
355 // most significant bit is set etc. we can replace up to three
356 // "and+branch" with a single table lookup which gives us a measured
357 // speedup of around 2x on x86_64.
358 static SHIFT_MASK_TABLE: [(usize, u32); 16] = [(0, 0x0),
376 let ptr = data.as_ptr().offset(start as isize) as *const u32;
377 let val = u32::from_be(*ptr);
379 let i = (val >> 28) as usize;
380 let (shift, mask) = SHIFT_MASK_TABLE[i];
382 val: ((val >> shift) & mask) as usize,
383 next: start + ((32 - shift) >> 3),
388 pub fn tag_len_at(data: &[u8], tag: Res) -> DecodeResult<Res> {
389 if tag.val < NUM_IMPLICIT_TAGS && TAG_IMPLICIT_LEN[tag.val] >= 0 {
391 val: TAG_IMPLICIT_LEN[tag.val] as usize,
395 vuint_at(data, tag.next)
399 pub fn doc_at<'a>(data: &'a [u8], start: usize) -> DecodeResult<TaggedDoc<'a>> {
400 let elt_tag = try!(tag_at(data, start));
401 let elt_size = try!(tag_len_at(data, elt_tag));
402 let end = elt_size.next + elt_size.val;
407 start: elt_size.next,
413 pub fn maybe_get_doc<'a>(d: Doc<'a>, tg: usize) -> Option<Doc<'a>> {
414 let mut pos = d.start;
416 let elt_tag = try_or!(tag_at(d.data, pos), None);
417 let elt_size = try_or!(tag_len_at(d.data, elt_tag), None);
418 pos = elt_size.next + elt_size.val;
419 if elt_tag.val == tg {
422 start: elt_size.next,
430 pub fn get_doc<'a>(d: Doc<'a>, tg: usize) -> Doc<'a> {
431 match maybe_get_doc(d, tg) {
434 error!("failed to find block with tag {:?}", tg);
440 pub fn docs<'a>(d: Doc<'a>) -> DocsIterator<'a> {
441 DocsIterator { d: d }
444 pub struct DocsIterator<'a> {
448 impl<'a> Iterator for DocsIterator<'a> {
449 type Item = (usize, Doc<'a>);
451 fn next(&mut self) -> Option<(usize, Doc<'a>)> {
452 if self.d.start >= self.d.end {
456 let elt_tag = try_or!(tag_at(self.d.data, self.d.start), {
457 self.d.start = self.d.end;
460 let elt_size = try_or!(tag_len_at(self.d.data, elt_tag), {
461 self.d.start = self.d.end;
465 let end = elt_size.next + elt_size.val;
468 start: elt_size.next,
473 return Some((elt_tag.val, doc));
477 pub fn tagged_docs<'a>(d: Doc<'a>, tag: usize) -> TaggedDocsIterator<'a> {
484 pub struct TaggedDocsIterator<'a> {
485 iter: DocsIterator<'a>,
489 impl<'a> Iterator for TaggedDocsIterator<'a> {
492 fn next(&mut self) -> Option<Doc<'a>> {
493 while let Some((tag, doc)) = self.iter.next() {
502 pub fn with_doc_data<T, F>(d: Doc, f: F) -> T
503 where F: FnOnce(&[u8]) -> T
505 f(&d.data[d.start..d.end])
508 pub fn doc_as_u8(d: Doc) -> u8 {
509 assert_eq!(d.end, d.start + 1);
513 pub fn doc_as_u64(d: Doc) -> u64 {
515 // For performance, we read 8 big-endian bytes,
516 // and mask off the junk if there is any. This
517 // obviously won't work on the first 8 bytes
518 // of a file - we will fall of the start
519 // of the page and segfault.
522 b.clone_from_slice(&d.data[d.end - 8..d.end]);
523 let data = unsafe { (*(b.as_ptr() as *const u64)).to_be() };
524 let len = d.end - d.start;
526 data & ((1 << (len * 8)) - 1)
532 for b in &d.data[d.start..d.end] {
533 result = (result << 8) + (*b as u64);
540 pub fn doc_as_u16(d: Doc) -> u16 {
544 pub fn doc_as_u32(d: Doc) -> u32 {
549 pub fn doc_as_i8(d: Doc) -> i8 {
553 pub fn doc_as_i16(d: Doc) -> i16 {
557 pub fn doc_as_i32(d: Doc) -> i32 {
561 pub fn doc_as_i64(d: Doc) -> i64 {
565 pub struct Decoder<'a> {
570 impl<'doc> Decoder<'doc> {
571 pub fn new(d: Doc<'doc>) -> Decoder<'doc> {
578 fn next_doc(&mut self, exp_tag: EbmlEncoderTag) -> DecodeResult<Doc<'doc>> {
579 debug!(". next_doc(exp_tag={:?})", exp_tag);
580 if self.pos >= self.parent.end {
581 return Err(Expected(format!("no more documents in current node!")));
583 let TaggedDoc { tag: r_tag, doc: r_doc } = try!(doc_at(self.parent.data, self.pos));
584 debug!("self.parent={:?}-{:?} self.pos={:?} r_tag={:?} r_doc={:?}-{:?}",
591 if r_tag != (exp_tag as usize) {
592 return Err(Expected(format!("expected EBML doc with tag {:?} but found tag {:?}",
596 if r_doc.end > self.parent.end {
597 return Err(Expected(format!("invalid EBML, child extends to {:#x}, parent to \
602 self.pos = r_doc.end;
606 fn push_doc<T, F>(&mut self, exp_tag: EbmlEncoderTag, f: F) -> DecodeResult<T>
607 where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult<T>
609 let d = try!(self.next_doc(exp_tag));
610 let old_parent = self.parent;
611 let old_pos = self.pos;
614 let r = try!(f(self));
615 self.parent = old_parent;
620 fn _next_sub(&mut self) -> DecodeResult<usize> {
621 // empty vector/map optimization
622 if self.parent.is_empty() {
626 let TaggedDoc { tag: r_tag, doc: r_doc } = try!(doc_at(self.parent.data, self.pos));
627 let r = if r_tag == (EsSub8 as usize) {
628 doc_as_u8(r_doc) as usize
629 } else if r_tag == (EsSub32 as usize) {
630 doc_as_u32(r_doc) as usize
632 return Err(Expected(format!("expected EBML doc with tag {:?} or {:?} but found \
638 if r_doc.end > self.parent.end {
639 return Err(Expected(format!("invalid EBML, child extends to {:#x}, parent to \
644 self.pos = r_doc.end;
645 debug!("_next_sub result={:?}", r);
649 // variable-length unsigned integer with different tags.
650 // `first_tag` should be a tag for u8 or i8.
651 // `last_tag` should be the largest allowed integer tag with the matching signedness.
652 // all tags between them should be valid, in the order of u8, u16, u32 and u64.
653 fn _next_int(&mut self,
654 first_tag: EbmlEncoderTag,
655 last_tag: EbmlEncoderTag)
656 -> DecodeResult<u64> {
657 if self.pos >= self.parent.end {
658 return Err(Expected(format!("no more documents in current node!")));
661 let TaggedDoc { tag: r_tag, doc: r_doc } = try!(doc_at(self.parent.data, self.pos));
662 let r = if first_tag as usize <= r_tag && r_tag <= last_tag as usize {
663 match r_tag - first_tag as usize {
664 0 => doc_as_u8(r_doc) as u64,
665 1 => doc_as_u16(r_doc) as u64,
666 2 => doc_as_u32(r_doc) as u64,
667 3 => doc_as_u64(r_doc),
671 return Err(Expected(format!("expected EBML doc with tag {:?} through {:?} but \
677 if r_doc.end > self.parent.end {
678 return Err(Expected(format!("invalid EBML, child extends to {:#x}, parent to \
683 self.pos = r_doc.end;
684 debug!("_next_int({:?}, {:?}) result={:?}", first_tag, last_tag, r);
688 pub fn read_opaque<R, F>(&mut self, op: F) -> DecodeResult<R>
689 where F: FnOnce(&mut opaque::Decoder, Doc) -> DecodeResult<R>
691 let doc = try!(self.next_doc(EsOpaque));
694 let mut opaque_decoder = opaque::Decoder::new(doc.data, doc.start);
695 try!(op(&mut opaque_decoder, doc))
701 pub fn position(&self) -> usize {
705 pub fn advance(&mut self, bytes: usize) {
710 impl<'doc> serialize::Decoder for Decoder<'doc> {
712 fn read_nil(&mut self) -> DecodeResult<()> {
716 fn read_u64(&mut self) -> DecodeResult<u64> {
717 self._next_int(EsU8, EsU64)
719 fn read_u32(&mut self) -> DecodeResult<u32> {
720 Ok(try!(self._next_int(EsU8, EsU32)) as u32)
722 fn read_u16(&mut self) -> DecodeResult<u16> {
723 Ok(try!(self._next_int(EsU8, EsU16)) as u16)
725 fn read_u8(&mut self) -> DecodeResult<u8> {
726 Ok(doc_as_u8(try!(self.next_doc(EsU8))))
728 fn read_uint(&mut self) -> DecodeResult<usize> {
729 let v = try!(self._next_int(EsU8, EsU64));
730 if v > (::std::usize::MAX as u64) {
731 Err(IntTooBig(v as usize))
737 fn read_i64(&mut self) -> DecodeResult<i64> {
738 Ok(try!(self._next_int(EsI8, EsI64)) as i64)
740 fn read_i32(&mut self) -> DecodeResult<i32> {
741 Ok(try!(self._next_int(EsI8, EsI32)) as i32)
743 fn read_i16(&mut self) -> DecodeResult<i16> {
744 Ok(try!(self._next_int(EsI8, EsI16)) as i16)
746 fn read_i8(&mut self) -> DecodeResult<i8> {
747 Ok(doc_as_u8(try!(self.next_doc(EsI8))) as i8)
749 fn read_int(&mut self) -> DecodeResult<isize> {
750 let v = try!(self._next_int(EsI8, EsI64)) as i64;
751 if v > (isize::MAX as i64) || v < (isize::MIN as i64) {
752 debug!("FIXME \\#6122: Removing this makes this function miscompile");
753 Err(IntTooBig(v as usize))
759 fn read_bool(&mut self) -> DecodeResult<bool> {
760 Ok(doc_as_u8(try!(self.next_doc(EsBool))) != 0)
763 fn read_f64(&mut self) -> DecodeResult<f64> {
764 let bits = doc_as_u64(try!(self.next_doc(EsF64)));
765 Ok(unsafe { transmute(bits) })
767 fn read_f32(&mut self) -> DecodeResult<f32> {
768 let bits = doc_as_u32(try!(self.next_doc(EsF32)));
769 Ok(unsafe { transmute(bits) })
771 fn read_char(&mut self) -> DecodeResult<char> {
772 Ok(char::from_u32(doc_as_u32(try!(self.next_doc(EsChar)))).unwrap())
774 fn read_str(&mut self) -> DecodeResult<String> {
775 Ok(try!(self.next_doc(EsStr)).as_str())
779 fn read_enum<T, F>(&mut self, name: &str, f: F) -> DecodeResult<T>
780 where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult<T>
782 debug!("read_enum({})", name);
784 let doc = try!(self.next_doc(EsEnum));
786 let (old_parent, old_pos) = (self.parent, self.pos);
788 self.pos = self.parent.start;
790 let result = try!(f(self));
792 self.parent = old_parent;
797 fn read_enum_variant<T, F>(&mut self, _: &[&str], mut f: F) -> DecodeResult<T>
798 where F: FnMut(&mut Decoder<'doc>, usize) -> DecodeResult<T>
800 debug!("read_enum_variant()");
801 let idx = try!(self._next_sub());
802 debug!(" idx={}", idx);
807 fn read_enum_variant_arg<T, F>(&mut self, idx: usize, f: F) -> DecodeResult<T>
808 where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult<T>
810 debug!("read_enum_variant_arg(idx={})", idx);
814 fn read_enum_struct_variant<T, F>(&mut self, _: &[&str], mut f: F) -> DecodeResult<T>
815 where F: FnMut(&mut Decoder<'doc>, usize) -> DecodeResult<T>
817 debug!("read_enum_struct_variant()");
818 let idx = try!(self._next_sub());
819 debug!(" idx={}", idx);
824 fn read_enum_struct_variant_field<T, F>(&mut self,
829 where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult<T>
831 debug!("read_enum_struct_variant_arg(name={}, idx={})", name, idx);
835 fn read_struct<T, F>(&mut self, name: &str, _: usize, f: F) -> DecodeResult<T>
836 where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult<T>
838 debug!("read_struct(name={})", name);
842 fn read_struct_field<T, F>(&mut self, name: &str, idx: usize, f: F) -> DecodeResult<T>
843 where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult<T>
845 debug!("read_struct_field(name={}, idx={})", name, idx);
849 fn read_tuple<T, F>(&mut self, tuple_len: usize, f: F) -> DecodeResult<T>
850 where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult<T>
852 debug!("read_tuple()");
853 self.read_seq(move |d, len| {
854 if len == tuple_len {
857 Err(Expected(format!("Expected tuple of length `{}`, found tuple of length \
865 fn read_tuple_arg<T, F>(&mut self, idx: usize, f: F) -> DecodeResult<T>
866 where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult<T>
868 debug!("read_tuple_arg(idx={})", idx);
869 self.read_seq_elt(idx, f)
872 fn read_tuple_struct<T, F>(&mut self, name: &str, len: usize, f: F) -> DecodeResult<T>
873 where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult<T>
875 debug!("read_tuple_struct(name={})", name);
876 self.read_tuple(len, f)
879 fn read_tuple_struct_arg<T, F>(&mut self, idx: usize, f: F) -> DecodeResult<T>
880 where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult<T>
882 debug!("read_tuple_struct_arg(idx={})", idx);
883 self.read_tuple_arg(idx, f)
886 fn read_option<T, F>(&mut self, mut f: F) -> DecodeResult<T>
887 where F: FnMut(&mut Decoder<'doc>, bool) -> DecodeResult<T>
889 debug!("read_option()");
890 self.read_enum("Option", move |this| {
891 this.read_enum_variant(&["None", "Some"], move |this, idx| {
895 _ => Err(Expected(format!("Expected None or Some"))),
901 fn read_seq<T, F>(&mut self, f: F) -> DecodeResult<T>
902 where F: FnOnce(&mut Decoder<'doc>, usize) -> DecodeResult<T>
904 debug!("read_seq()");
905 self.push_doc(EsVec, move |d| {
906 let len = try!(d._next_sub());
907 debug!(" len={}", len);
912 fn read_seq_elt<T, F>(&mut self, idx: usize, f: F) -> DecodeResult<T>
913 where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult<T>
915 debug!("read_seq_elt(idx={})", idx);
916 self.push_doc(EsVecElt, f)
919 fn read_map<T, F>(&mut self, f: F) -> DecodeResult<T>
920 where F: FnOnce(&mut Decoder<'doc>, usize) -> DecodeResult<T>
922 debug!("read_map()");
923 self.push_doc(EsMap, move |d| {
924 let len = try!(d._next_sub());
925 debug!(" len={}", len);
930 fn read_map_elt_key<T, F>(&mut self, idx: usize, f: F) -> DecodeResult<T>
931 where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult<T>
933 debug!("read_map_elt_key(idx={})", idx);
934 self.push_doc(EsMapKey, f)
937 fn read_map_elt_val<T, F>(&mut self, idx: usize, f: F) -> DecodeResult<T>
938 where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult<T>
940 debug!("read_map_elt_val(idx={})", idx);
941 self.push_doc(EsMapVal, f)
944 fn error(&mut self, err: &str) -> Error {
945 ApplicationError(err.to_string())
952 use std::io::prelude::*;
953 use std::io::{self, SeekFrom, Cursor};
956 use super::{EsVec, EsMap, EsEnum, EsSub8, EsSub32, EsVecElt, EsMapKey, EsU64, EsU32, EsU16,
957 EsU8, EsI64, EsI32, EsI16, EsI8, EsBool, EsF64, EsF32, EsChar, EsStr, EsMapVal,
958 EsOpaque, NUM_IMPLICIT_TAGS, NUM_TAGS};
963 pub type EncodeResult = io::Result<()>;
966 pub struct Encoder<'a> {
967 pub writer: &'a mut Cursor<Vec<u8>>,
968 size_positions: Vec<u64>,
969 relax_limit: u64, // do not move encoded bytes before this position
972 fn write_tag<W: Write>(w: &mut W, n: usize) -> EncodeResult {
974 w.write_all(&[n as u8])
975 } else if 0x100 <= n && n < NUM_TAGS {
976 w.write_all(&[0xf0 | (n >> 8) as u8, n as u8])
978 Err(io::Error::new(io::ErrorKind::Other, &format!("invalid tag: {}", n)[..]))
982 fn write_sized_vuint<W: Write>(w: &mut W, n: usize, size: usize) -> EncodeResult {
984 1 => w.write_all(&[0x80 | (n as u8)]),
985 2 => w.write_all(&[0x40 | ((n >> 8) as u8), n as u8]),
986 3 => w.write_all(&[0x20 | ((n >> 16) as u8), (n >> 8) as u8, n as u8]),
987 4 => w.write_all(&[0x10 | ((n >> 24) as u8), (n >> 16) as u8, (n >> 8) as u8, n as u8]),
988 _ => Err(io::Error::new(io::ErrorKind::Other, &format!("isize too big: {}", n)[..])),
992 pub fn write_vuint<W: Write>(w: &mut W, n: usize) -> EncodeResult {
994 return write_sized_vuint(w, n, 1);
997 return write_sized_vuint(w, n, 2);
1000 return write_sized_vuint(w, n, 3);
1003 return write_sized_vuint(w, n, 4);
1005 Err(io::Error::new(io::ErrorKind::Other, &format!("isize too big: {}", n)[..]))
1008 impl<'a> Encoder<'a> {
1009 pub fn new(w: &'a mut Cursor<Vec<u8>>) -> Encoder<'a> {
1012 size_positions: vec![],
1017 pub fn start_tag(&mut self, tag_id: usize) -> EncodeResult {
1018 debug!("Start tag {:?}", tag_id);
1019 assert!(tag_id >= NUM_IMPLICIT_TAGS);
1021 // Write the enum ID:
1022 try!(write_tag(self.writer, tag_id));
1024 // Write a placeholder four-byte size.
1025 let cur_pos = try!(self.writer.seek(SeekFrom::Current(0)));
1026 self.size_positions.push(cur_pos);
1027 let zeroes: &[u8] = &[0, 0, 0, 0];
1028 self.writer.write_all(zeroes)
1031 pub fn end_tag(&mut self) -> EncodeResult {
1032 let last_size_pos = self.size_positions.pop().unwrap();
1033 let cur_pos = try!(self.writer.seek(SeekFrom::Current(0)));
1034 try!(self.writer.seek(SeekFrom::Start(last_size_pos)));
1035 let size = (cur_pos - last_size_pos - 4) as usize;
1037 // relax the size encoding for small tags (bigger tags are costly to move).
1038 // we should never try to move the stable positions, however.
1039 const RELAX_MAX_SIZE: usize = 0x100;
1040 if size <= RELAX_MAX_SIZE && last_size_pos >= self.relax_limit {
1041 // we can't alter the buffer in place, so have a temporary buffer
1042 let mut buf = [0u8; RELAX_MAX_SIZE];
1044 let last_size_pos = last_size_pos as usize;
1045 let data = &self.writer.get_ref()[last_size_pos + 4..cur_pos as usize];
1046 buf[..size].clone_from_slice(data);
1049 // overwrite the size and data and continue
1050 try!(write_vuint(self.writer, size));
1051 try!(self.writer.write_all(&buf[..size]));
1053 // overwrite the size with an overlong encoding and skip past the data
1054 try!(write_sized_vuint(self.writer, size, 4));
1055 try!(self.writer.seek(SeekFrom::Start(cur_pos)));
1058 debug!("End tag (size = {:?})", size);
1062 pub fn wr_tag<F>(&mut self, tag_id: usize, blk: F) -> EncodeResult
1063 where F: FnOnce() -> EncodeResult
1065 try!(self.start_tag(tag_id));
1070 pub fn wr_tagged_bytes(&mut self, tag_id: usize, b: &[u8]) -> EncodeResult {
1071 assert!(tag_id >= NUM_IMPLICIT_TAGS);
1072 try!(write_tag(self.writer, tag_id));
1073 try!(write_vuint(self.writer, b.len()));
1074 self.writer.write_all(b)
1077 pub fn wr_tagged_u64(&mut self, tag_id: usize, v: u64) -> EncodeResult {
1078 let bytes: [u8; 8] = unsafe { mem::transmute(v.to_be()) };
1079 // tagged integers are emitted in big-endian, with no
1081 let leading_zero_bytes = v.leading_zeros() / 8;
1082 self.wr_tagged_bytes(tag_id, &bytes[leading_zero_bytes as usize..])
1086 pub fn wr_tagged_u32(&mut self, tag_id: usize, v: u32) -> EncodeResult {
1087 self.wr_tagged_u64(tag_id, v as u64)
1091 pub fn wr_tagged_u16(&mut self, tag_id: usize, v: u16) -> EncodeResult {
1092 self.wr_tagged_u64(tag_id, v as u64)
1096 pub fn wr_tagged_u8(&mut self, tag_id: usize, v: u8) -> EncodeResult {
1097 self.wr_tagged_bytes(tag_id, &[v])
1101 pub fn wr_tagged_i64(&mut self, tag_id: usize, v: i64) -> EncodeResult {
1102 self.wr_tagged_u64(tag_id, v as u64)
1106 pub fn wr_tagged_i32(&mut self, tag_id: usize, v: i32) -> EncodeResult {
1107 self.wr_tagged_u32(tag_id, v as u32)
1111 pub fn wr_tagged_i16(&mut self, tag_id: usize, v: i16) -> EncodeResult {
1112 self.wr_tagged_u16(tag_id, v as u16)
1116 pub fn wr_tagged_i8(&mut self, tag_id: usize, v: i8) -> EncodeResult {
1117 self.wr_tagged_bytes(tag_id, &[v as u8])
1120 pub fn wr_tagged_str(&mut self, tag_id: usize, v: &str) -> EncodeResult {
1121 self.wr_tagged_bytes(tag_id, v.as_bytes())
1124 // for auto-serialization
1125 fn wr_tagged_raw_bytes(&mut self, tag_id: usize, b: &[u8]) -> EncodeResult {
1126 try!(write_tag(self.writer, tag_id));
1127 self.writer.write_all(b)
1130 fn wr_tagged_raw_u64(&mut self, tag_id: usize, v: u64) -> EncodeResult {
1131 let bytes: [u8; 8] = unsafe { mem::transmute(v.to_be()) };
1132 self.wr_tagged_raw_bytes(tag_id, &bytes)
1135 fn wr_tagged_raw_u32(&mut self, tag_id: usize, v: u32) -> EncodeResult {
1136 let bytes: [u8; 4] = unsafe { mem::transmute(v.to_be()) };
1137 self.wr_tagged_raw_bytes(tag_id, &bytes)
1140 fn wr_tagged_raw_u16(&mut self, tag_id: usize, v: u16) -> EncodeResult {
1141 let bytes: [u8; 2] = unsafe { mem::transmute(v.to_be()) };
1142 self.wr_tagged_raw_bytes(tag_id, &bytes)
1145 fn wr_tagged_raw_u8(&mut self, tag_id: usize, v: u8) -> EncodeResult {
1146 self.wr_tagged_raw_bytes(tag_id, &[v])
1149 fn wr_tagged_raw_i64(&mut self, tag_id: usize, v: i64) -> EncodeResult {
1150 self.wr_tagged_raw_u64(tag_id, v as u64)
1153 fn wr_tagged_raw_i32(&mut self, tag_id: usize, v: i32) -> EncodeResult {
1154 self.wr_tagged_raw_u32(tag_id, v as u32)
1157 fn wr_tagged_raw_i16(&mut self, tag_id: usize, v: i16) -> EncodeResult {
1158 self.wr_tagged_raw_u16(tag_id, v as u16)
1161 fn wr_tagged_raw_i8(&mut self, tag_id: usize, v: i8) -> EncodeResult {
1162 self.wr_tagged_raw_bytes(tag_id, &[v as u8])
1165 pub fn wr_bytes(&mut self, b: &[u8]) -> EncodeResult {
1166 debug!("Write {:?} bytes", b.len());
1167 self.writer.write_all(b)
1170 pub fn wr_str(&mut self, s: &str) -> EncodeResult {
1171 debug!("Write str: {:?}", s);
1172 self.writer.write_all(s.as_bytes())
1175 /// Returns the current position while marking it stable, i.e.
1176 /// generated bytes so far wouldn't be affected by relaxation.
1177 pub fn mark_stable_position(&mut self) -> u64 {
1178 let pos = self.writer.seek(SeekFrom::Current(0)).unwrap();
1179 if self.relax_limit < pos {
1180 self.relax_limit = pos;
1186 impl<'a> Encoder<'a> {
1187 // used internally to emit things like the vector length and so on
1188 fn _emit_tagged_sub(&mut self, v: usize) -> EncodeResult {
1189 if v as u8 as usize == v {
1190 self.wr_tagged_raw_u8(EsSub8 as usize, v as u8)
1191 } else if v as u32 as usize == v {
1192 self.wr_tagged_raw_u32(EsSub32 as usize, v as u32)
1194 Err(io::Error::new(io::ErrorKind::Other,
1195 &format!("length or variant id too big: {}", v)[..]))
1199 pub fn emit_opaque<F>(&mut self, f: F) -> EncodeResult
1200 where F: FnOnce(&mut opaque::Encoder) -> EncodeResult
1202 try!(self.start_tag(EsOpaque as usize));
1205 let mut opaque_encoder = opaque::Encoder::new(self.writer);
1206 try!(f(&mut opaque_encoder));
1209 self.mark_stable_position();
1214 impl<'a> serialize::Encoder for Encoder<'a> {
1215 type Error = io::Error;
1217 fn emit_nil(&mut self) -> EncodeResult {
1221 fn emit_uint(&mut self, v: usize) -> EncodeResult {
1222 self.emit_u64(v as u64)
1224 fn emit_u64(&mut self, v: u64) -> EncodeResult {
1225 if v as u32 as u64 == v {
1226 self.emit_u32(v as u32)
1228 self.wr_tagged_raw_u64(EsU64 as usize, v)
1231 fn emit_u32(&mut self, v: u32) -> EncodeResult {
1232 if v as u16 as u32 == v {
1233 self.emit_u16(v as u16)
1235 self.wr_tagged_raw_u32(EsU32 as usize, v)
1238 fn emit_u16(&mut self, v: u16) -> EncodeResult {
1239 if v as u8 as u16 == v {
1240 self.emit_u8(v as u8)
1242 self.wr_tagged_raw_u16(EsU16 as usize, v)
1245 fn emit_u8(&mut self, v: u8) -> EncodeResult {
1246 self.wr_tagged_raw_u8(EsU8 as usize, v)
1249 fn emit_int(&mut self, v: isize) -> EncodeResult {
1250 self.emit_i64(v as i64)
1252 fn emit_i64(&mut self, v: i64) -> EncodeResult {
1253 if v as i32 as i64 == v {
1254 self.emit_i32(v as i32)
1256 self.wr_tagged_raw_i64(EsI64 as usize, v)
1259 fn emit_i32(&mut self, v: i32) -> EncodeResult {
1260 if v as i16 as i32 == v {
1261 self.emit_i16(v as i16)
1263 self.wr_tagged_raw_i32(EsI32 as usize, v)
1266 fn emit_i16(&mut self, v: i16) -> EncodeResult {
1267 if v as i8 as i16 == v {
1268 self.emit_i8(v as i8)
1270 self.wr_tagged_raw_i16(EsI16 as usize, v)
1273 fn emit_i8(&mut self, v: i8) -> EncodeResult {
1274 self.wr_tagged_raw_i8(EsI8 as usize, v)
1277 fn emit_bool(&mut self, v: bool) -> EncodeResult {
1278 self.wr_tagged_raw_u8(EsBool as usize, v as u8)
1281 fn emit_f64(&mut self, v: f64) -> EncodeResult {
1282 let bits = unsafe { mem::transmute(v) };
1283 self.wr_tagged_raw_u64(EsF64 as usize, bits)
1285 fn emit_f32(&mut self, v: f32) -> EncodeResult {
1286 let bits = unsafe { mem::transmute(v) };
1287 self.wr_tagged_raw_u32(EsF32 as usize, bits)
1289 fn emit_char(&mut self, v: char) -> EncodeResult {
1290 self.wr_tagged_raw_u32(EsChar as usize, v as u32)
1293 fn emit_str(&mut self, v: &str) -> EncodeResult {
1294 self.wr_tagged_str(EsStr as usize, v)
1297 fn emit_enum<F>(&mut self, _name: &str, f: F) -> EncodeResult
1298 where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1300 try!(self.start_tag(EsEnum as usize));
1305 fn emit_enum_variant<F>(&mut self, _: &str, v_id: usize, _: usize, f: F) -> EncodeResult
1306 where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1308 try!(self._emit_tagged_sub(v_id));
1312 fn emit_enum_variant_arg<F>(&mut self, _: usize, f: F) -> EncodeResult
1313 where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1318 fn emit_enum_struct_variant<F>(&mut self,
1324 where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1326 self.emit_enum_variant(v_name, v_id, cnt, f)
1329 fn emit_enum_struct_variant_field<F>(&mut self, _: &str, idx: usize, f: F) -> EncodeResult
1330 where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1332 self.emit_enum_variant_arg(idx, f)
1335 fn emit_struct<F>(&mut self, _: &str, _len: usize, f: F) -> EncodeResult
1336 where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1341 fn emit_struct_field<F>(&mut self, _name: &str, _: usize, f: F) -> EncodeResult
1342 where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1347 fn emit_tuple<F>(&mut self, len: usize, f: F) -> EncodeResult
1348 where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1350 self.emit_seq(len, f)
1352 fn emit_tuple_arg<F>(&mut self, idx: usize, f: F) -> EncodeResult
1353 where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1355 self.emit_seq_elt(idx, f)
1358 fn emit_tuple_struct<F>(&mut self, _: &str, len: usize, f: F) -> EncodeResult
1359 where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1361 self.emit_seq(len, f)
1363 fn emit_tuple_struct_arg<F>(&mut self, idx: usize, f: F) -> EncodeResult
1364 where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1366 self.emit_seq_elt(idx, f)
1369 fn emit_option<F>(&mut self, f: F) -> EncodeResult
1370 where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1372 self.emit_enum("Option", f)
1374 fn emit_option_none(&mut self) -> EncodeResult {
1375 self.emit_enum_variant("None", 0, 0, |_| Ok(()))
1377 fn emit_option_some<F>(&mut self, f: F) -> EncodeResult
1378 where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1381 self.emit_enum_variant("Some", 1, 1, f)
1384 fn emit_seq<F>(&mut self, len: usize, f: F) -> EncodeResult
1385 where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1388 // empty vector optimization
1389 return self.wr_tagged_bytes(EsVec as usize, &[]);
1392 try!(self.start_tag(EsVec as usize));
1393 try!(self._emit_tagged_sub(len));
1398 fn emit_seq_elt<F>(&mut self, _idx: usize, f: F) -> EncodeResult
1399 where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1402 try!(self.start_tag(EsVecElt as usize));
1407 fn emit_map<F>(&mut self, len: usize, f: F) -> EncodeResult
1408 where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1411 // empty map optimization
1412 return self.wr_tagged_bytes(EsMap as usize, &[]);
1415 try!(self.start_tag(EsMap as usize));
1416 try!(self._emit_tagged_sub(len));
1421 fn emit_map_elt_key<F>(&mut self, _idx: usize, f: F) -> EncodeResult
1422 where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1425 try!(self.start_tag(EsMapKey as usize));
1430 fn emit_map_elt_val<F>(&mut self, _idx: usize, f: F) -> EncodeResult
1431 where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1433 try!(self.start_tag(EsMapVal as usize));
1440 // ___________________________________________________________________________
1445 use super::{Doc, reader, writer};
1447 use serialize::{Encodable, Decodable};
1449 use std::io::Cursor;
1452 fn test_vuint_at() {
1460 0x10, 0x00, 0x00, 0x00,
1461 0x1f, 0xff, 0xff, 0xff
1464 let mut res: reader::Res;
1467 res = reader::vuint_at(data, 0).unwrap();
1468 assert_eq!(res.val, 0);
1469 assert_eq!(res.next, 1);
1470 res = reader::vuint_at(data, res.next).unwrap();
1471 assert_eq!(res.val, (1 << 7) - 1);
1472 assert_eq!(res.next, 2);
1475 res = reader::vuint_at(data, res.next).unwrap();
1476 assert_eq!(res.val, 0);
1477 assert_eq!(res.next, 4);
1478 res = reader::vuint_at(data, res.next).unwrap();
1479 assert_eq!(res.val, (1 << 14) - 1);
1480 assert_eq!(res.next, 6);
1483 res = reader::vuint_at(data, res.next).unwrap();
1484 assert_eq!(res.val, 0);
1485 assert_eq!(res.next, 9);
1486 res = reader::vuint_at(data, res.next).unwrap();
1487 assert_eq!(res.val, (1 << 21) - 1);
1488 assert_eq!(res.next, 12);
1491 res = reader::vuint_at(data, res.next).unwrap();
1492 assert_eq!(res.val, 0);
1493 assert_eq!(res.next, 16);
1494 res = reader::vuint_at(data, res.next).unwrap();
1495 assert_eq!(res.val, (1 << 28) - 1);
1496 assert_eq!(res.next, 20);
1500 fn test_option_int() {
1501 fn test_v(v: Option<isize>) {
1502 debug!("v == {:?}", v);
1503 let mut wr = Cursor::new(Vec::new());
1505 let mut rbml_w = writer::Encoder::new(&mut wr);
1506 let _ = v.encode(&mut rbml_w);
1508 let rbml_doc = Doc::new(wr.get_ref());
1509 let mut deser = reader::Decoder::new(rbml_doc);
1510 let v1 = Decodable::decode(&mut deser).unwrap();
1511 debug!("v1 == {:?}", v1);
1523 #![allow(non_snake_case)]
1528 pub fn vuint_at_A_aligned(b: &mut Bencher) {
1529 let data = (0..4 * 100)
1536 .collect::<Vec<_>>();
1540 while i < data.len() {
1541 sum += reader::vuint_at(&data, i).unwrap().val;
1548 pub fn vuint_at_A_unaligned(b: &mut Bencher) {
1549 let data = (0..4 * 100 + 1)
1556 .collect::<Vec<_>>();
1560 while i < data.len() {
1561 sum += reader::vuint_at(&data, i).unwrap().val;
1568 pub fn vuint_at_D_aligned(b: &mut Bencher) {
1569 let data = (0..4 * 100)
1577 .collect::<Vec<_>>();
1581 while i < data.len() {
1582 sum += reader::vuint_at(&data, i).unwrap().val;
1589 pub fn vuint_at_D_unaligned(b: &mut Bencher) {
1590 let data = (0..4 * 100 + 1)
1598 .collect::<Vec<_>>();
1602 while i < data.len() {
1603 sum += reader::vuint_at(&data, i).unwrap().val;