src/librbml/lib.rs

   1 // Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
   2 // file at the top-level directory of this distribution and at
   3 // http://rust-lang.org/COPYRIGHT.
   4 //
   5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
   6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
   7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
   8 // option. This file may not be copied, modified, or distributed
   9 // except according to those terms.
  10
  11 //! Really Bad Markup Language (rbml) is an internal serialization format of rustc.
  12 //! This is not intended to be used by users.
  13 //!
  14 //! Originally based on the Extensible Binary Markup Language
  15 //! (ebml; http://www.matroska.org/technical/specs/rfc/index.html),
  16 //! it is now a separate format tuned for the rust object metadata.
  17 //!
  18 //! # Encoding
  19 //!
  20 //! RBML document consists of the tag, length and data.
  21 //! The encoded data can contain multiple RBML documents concatenated.
  22 //!
  23 //! **Tags** are a hint for the following data.
  24 //! Tags are a number from 0x000 to 0xfff, where 0xf0 through 0xff is reserved.
  25 //! Tags less than 0xf0 are encoded in one literal byte.
  26 //! Tags greater than 0xff are encoded in two big-endian bytes,
  27 //! where the tag number is ORed with 0xf000. (E.g. tag 0x123 = `f1 23`)
  28 //!
  29 //! **Lengths** encode the length of the following data.
  30 //! It is a variable-length unsigned isize, and one of the following forms:
  31 //!
  32 //! - `80` through `fe` for lengths up to 0x7e;
  33 //! - `40 ff` through `7f ff` for lengths up to 0x3fff;
  34 //! - `20 40 00` through `3f ff ff` for lengths up to 0x1fffff;
  35 //! - `10 20 00 00` through `1f ff ff ff` for lengths up to 0xfffffff.
  36 //!
  37 //! The "overlong" form is allowed so that the length can be encoded
  38 //! without the prior knowledge of the encoded data.
  39 //! For example, the length 0 can be represented either by `80`, `40 00`,
  40 //! `20 00 00` or `10 00 00 00`.
  41 //! The encoder tries to minimize the length if possible.
  42 //! Also, some predefined tags listed below are so commonly used that
  43 //! their lengths are omitted ("implicit length").
  44 //!
  45 //! **Data** can be either binary bytes or zero or more nested RBML documents.
  46 //! Nested documents cannot overflow, and should be entirely contained
  47 //! within a parent document.
  48 //!
  49 //! # Predefined Tags
  50 //!
  51 //! Most RBML tags are defined by the application.
  52 //! (For the rust object metadata, see also `rustc::metadata::common`.)
  53 //! RBML itself does define a set of predefined tags however,
  54 //! intended for the auto-serialization implementation.
  55 //!
  56 //! Predefined tags with an implicit length:
  57 //!
  58 //! - `U8`  (`00`): 1-byte unsigned integer.
  59 //! - `U16` (`01`): 2-byte big endian unsigned integer.
  60 //! - `U32` (`02`): 4-byte big endian unsigned integer.
  61 //! - `U64` (`03`): 8-byte big endian unsigned integer.
  62 //!   Any of `U*` tags can be used to encode primitive unsigned integer types,
  63 //!   as long as it is no greater than the actual size.
  64 //!   For example, `u8` can only be represented via the `U8` tag.
  65 //!
  66 //! - `I8`  (`04`): 1-byte signed integer.
  67 //! - `I16` (`05`): 2-byte big endian signed integer.
  68 //! - `I32` (`06`): 4-byte big endian signed integer.
  69 //! - `I64` (`07`): 8-byte big endian signed integer.
  70 //!   Similar to `U*` tags. Always uses two's complement encoding.
  71 //!
  72 //! - `Bool` (`08`): 1-byte boolean value, `00` for false and `01` for true.
  73 //!
  74 //! - `Char` (`09`): 4-byte big endian Unicode scalar value.
  75 //!   Surrogate pairs or out-of-bound values are invalid.
  76 //!
  77 //! - `F32` (`0a`): 4-byte big endian unsigned integer representing
  78 //!   IEEE 754 binary32 floating-point format.
  79 //! - `F64` (`0b`): 8-byte big endian unsigned integer representing
  80 //!   IEEE 754 binary64 floating-point format.
  81 //!
  82 //! - `Sub8`  (`0c`): 1-byte unsigned integer for supplementary information.
  83 //! - `Sub32` (`0d`): 4-byte unsigned integer for supplementary information.
  84 //!   Those two tags normally occur as the first subdocument of certain tags,
  85 //!   namely `Enum`, `Vec` and `Map`, to provide a variant or size information.
  86 //!   They can be used interchangeably.
  87 //!
  88 //! Predefined tags with an explicit length:
  89 //!
  90 //! - `Str` (`10`): A UTF-8-encoded string.
  91 //!
  92 //! - `Enum` (`11`): An enum.
  93 //!   The first subdocument should be `Sub*` tags with a variant ID.
  94 //!   Subsequent subdocuments, if any, encode variant arguments.
  95 //!
  96 //! - `Vec` (`12`): A vector (sequence).
  97 //! - `VecElt` (`13`): A vector element.
  98 //!   The first subdocument should be `Sub*` tags with the number of elements.
  99 //!   Subsequent subdocuments should be `VecElt` tag per each element.
 100 //!
 101 //! - `Map` (`14`): A map (associated array).
 102 //! - `MapKey` (`15`): A key part of the map entry.
 103 //! - `MapVal` (`16`): A value part of the map entry.
 104 //!   The first subdocument should be `Sub*` tags with the number of entries.
 105 //!   Subsequent subdocuments should be an alternating sequence of
 106 //!   `MapKey` and `MapVal` tags per each entry.
 107 //!
 108 //! - `Opaque` (`17`): An opaque, custom-format tag.
 109 //!   Used to wrap ordinary custom tags or data in the auto-serialized context.
 110 //!   Rustc typically uses this to encode type informations.
 111 //!
 112 //! First 0x20 tags are reserved by RBML; custom tags start at 0x20.
 113
 114 #![crate_name = "rbml"]
 115 #![unstable(feature = "rustc_private", issue = "27812")]
 116 #![crate_type = "rlib"]
 117 #![crate_type = "dylib"]
 118 #![doc(html_logo_url = "https://www.rust-lang.org/logos/rust-logo-128x128-blk-v2.png",
 119        html_favicon_url = "https://doc.rust-lang.org/favicon.ico",
 120        html_root_url = "https://doc.rust-lang.org/nightly/",
 121        html_playground_url = "https://play.rust-lang.org/",
 122        test(attr(deny(warnings))))]
 123 #![cfg_attr(not(stage0), deny(warnings))]
 124
 125 #![feature(rustc_private)]
 126 #![feature(staged_api)]
 127
 128 #![cfg_attr(test, feature(test))]
 129
 130 extern crate serialize;
 131
 132 #[cfg(test)]
 133 extern crate serialize as rustc_serialize; // Used by RustcEncodable
 134
 135 #[macro_use]
 136 extern crate log;
 137
 138 #[cfg(test)]
 139 extern crate test;
 140
 141 pub mod opaque;
 142 pub mod leb128;
 143
 144 pub use self::EbmlEncoderTag::*;
 145 pub use self::Error::*;
 146
 147 use std::str;
 148 use std::fmt;
 149
 150 /// Common data structures
 151 #[derive(Clone, Copy)]
 152 pub struct Doc<'a> {
 153     pub data: &'a [u8],
 154     pub start: usize,
 155     pub end: usize,
 156 }
 157
 158 impl<'doc> Doc<'doc> {
 159     pub fn new(data: &'doc [u8]) -> Doc<'doc> {
 160         Doc {
 161             data: data,
 162             start: 0,
 163             end: data.len(),
 164         }
 165     }
 166
 167     pub fn get<'a>(&'a self, tag: usize) -> Doc<'a> {
 168         reader::get_doc(*self, tag)
 169     }
 170
 171     pub fn is_empty(&self) -> bool {
 172         self.start == self.end
 173     }
 174
 175     pub fn as_str_slice<'a>(&'a self) -> &'a str {
 176         str::from_utf8(&self.data[self.start..self.end]).unwrap()
 177     }
 178
 179     pub fn as_str(&self) -> String {
 180         self.as_str_slice().to_string()
 181     }
 182 }
 183
 184 pub struct TaggedDoc<'a> {
 185     tag: usize,
 186     pub doc: Doc<'a>,
 187 }
 188
 189 #[derive(Copy, Clone, Debug)]
 190 pub enum EbmlEncoderTag {
 191     // tags 00..1f are reserved for auto-serialization.
 192     // first NUM_IMPLICIT_TAGS tags are implicitly sized and lengths are not encoded.
 193     EsU8 = 0x00, // + 1 byte
 194     EsU16 = 0x01, // + 2 bytes
 195     EsU32 = 0x02, // + 4 bytes
 196     EsU64 = 0x03, // + 8 bytes
 197     EsI8 = 0x04, // + 1 byte
 198     EsI16 = 0x05, // + 2 bytes
 199     EsI32 = 0x06, // + 4 bytes
 200     EsI64 = 0x07, // + 8 bytes
 201     EsBool = 0x08, // + 1 byte
 202     EsChar = 0x09, // + 4 bytes
 203     EsF32 = 0x0a, // + 4 bytes
 204     EsF64 = 0x0b, // + 8 bytes
 205     EsSub8 = 0x0c, // + 1 byte
 206     EsSub32 = 0x0d, // + 4 bytes
 207     // 0x0e and 0x0f are reserved
 208     EsStr = 0x10,
 209     EsEnum = 0x11, // encodes the variant id as the first EsSub*
 210     EsVec = 0x12, // encodes the # of elements as the first EsSub*
 211     EsVecElt = 0x13,
 212     EsMap = 0x14, // encodes the # of pairs as the first EsSub*
 213     EsMapKey = 0x15,
 214     EsMapVal = 0x16,
 215     EsOpaque = 0x17,
 216 }
 217
 218 const NUM_TAGS: usize = 0x1000;
 219 const NUM_IMPLICIT_TAGS: usize = 0x0e;
 220
 221 #[cfg_attr(rustfmt, rustfmt_skip)]
 222 static TAG_IMPLICIT_LEN: [i8; NUM_IMPLICIT_TAGS] = [
 223     1, 2, 4, 8, // EsU*
 224     1, 2, 4, 8, // ESI*
 225     1, // EsBool
 226     4, // EsChar
 227     4, 8, // EsF*
 228     1, 4, // EsSub*
 229 ];
 230
 231 #[derive(Debug)]
 232 pub enum Error {
 233     IntTooBig(usize),
 234     InvalidTag(usize),
 235     Expected(String),
 236     IoError(std::io::Error),
 237     ApplicationError(String),
 238 }
 239
 240 impl fmt::Display for Error {
 241     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 242         // FIXME: this should be a more useful display form
 243         fmt::Debug::fmt(self, f)
 244     }
 245 }
 246 // --------------------------------------
 247
 248 pub mod reader {
 249     use std::char;
 250
 251     use std::isize;
 252     use std::mem::transmute;
 253
 254     use serialize;
 255
 256     use super::opaque;
 257     use super::{ApplicationError, EsVec, EsMap, EsEnum, EsSub8, EsSub32, EsVecElt, EsMapKey,
 258                 EsU64, EsU32, EsU16, EsU8, EsI64, EsI32, EsI16, EsI8, EsBool, EsF64, EsF32,
 259                 EsChar, EsStr, EsMapVal, EsOpaque, EbmlEncoderTag, Doc, TaggedDoc, Error,
 260                 IntTooBig, InvalidTag, Expected, NUM_IMPLICIT_TAGS, TAG_IMPLICIT_LEN};
 261
 262     pub type DecodeResult<T> = Result<T, Error>;
 263     // rbml reading
 264
 265     macro_rules! try_or {
 266         ($e:expr, $r:expr) => (
 267             match $e {
 268                 Ok(e) => e,
 269                 Err(e) => {
 270                     debug!("ignored error: {:?}", e);
 271                     return $r
 272                 }
 273             }
 274         )
 275     }
 276
 277     #[derive(Copy, Clone)]
 278     pub struct Res {
 279         pub val: usize,
 280         pub next: usize,
 281     }
 282
 283     pub fn tag_at(data: &[u8], start: usize) -> DecodeResult<Res> {
 284         let v = data[start] as usize;
 285         if v < 0xf0 {
 286             Ok(Res {
 287                 val: v,
 288                 next: start + 1,
 289             })
 290         } else if v > 0xf0 {
 291             Ok(Res {
 292                 val: ((v & 0xf) << 8) | data[start + 1] as usize,
 293                 next: start + 2,
 294             })
 295         } else {
 296             // every tag starting with byte 0xf0 is an overlong form, which is prohibited.
 297             Err(InvalidTag(v))
 298         }
 299     }
 300
 301     #[inline(never)]
 302     fn vuint_at_slow(data: &[u8], start: usize) -> DecodeResult<Res> {
 303         let a = data[start];
 304         if a & 0x80 != 0 {
 305             return Ok(Res {
 306                 val: (a & 0x7f) as usize,
 307                 next: start + 1,
 308             });
 309         }
 310         if a & 0x40 != 0 {
 311             return Ok(Res {
 312                 val: ((a & 0x3f) as usize) << 8 | (data[start + 1] as usize),
 313                 next: start + 2,
 314             });
 315         }
 316         if a & 0x20 != 0 {
 317             return Ok(Res {
 318                 val: ((a & 0x1f) as usize) << 16 | (data[start + 1] as usize) << 8 |
 319                      (data[start + 2] as usize),
 320                 next: start + 3,
 321             });
 322         }
 323         if a & 0x10 != 0 {
 324             return Ok(Res {
 325                 val: ((a & 0x0f) as usize) << 24 | (data[start + 1] as usize) << 16 |
 326                      (data[start + 2] as usize) << 8 |
 327                      (data[start + 3] as usize),
 328                 next: start + 4,
 329             });
 330         }
 331         Err(IntTooBig(a as usize))
 332     }
 333
 334     pub fn vuint_at(data: &[u8], start: usize) -> DecodeResult<Res> {
 335         if data.len() - start < 4 {
 336             return vuint_at_slow(data, start);
 337         }
 338
 339         // Lookup table for parsing EBML Element IDs as per
 340         // http://ebml.sourceforge.net/specs/ The Element IDs are parsed by
 341         // reading a big endian u32 positioned at data[start].  Using the four
 342         // most significant bits of the u32 we lookup in the table below how
 343         // the element ID should be derived from it.
 344         //
 345         // The table stores tuples (shift, mask) where shift is the number the
 346         // u32 should be right shifted with and mask is the value the right
 347         // shifted value should be masked with.  If for example the most
 348         // significant bit is set this means it's a class A ID and the u32
 349         // should be right shifted with 24 and masked with 0x7f. Therefore we
 350         // store (24, 0x7f) at index 0x8 - 0xF (four bit numbers where the most
 351         // significant bit is set).
 352         //
 353         // By storing the number of shifts and masks in a table instead of
 354         // checking in order if the most significant bit is set, the second
 355         // most significant bit is set etc. we can replace up to three
 356         // "and+branch" with a single table lookup which gives us a measured
 357         // speedup of around 2x on x86_64.
 358         static SHIFT_MASK_TABLE: [(usize, u32); 16] = [(0, 0x0),
 359                                                        (0, 0x0fffffff),
 360                                                        (8, 0x1fffff),
 361                                                        (8, 0x1fffff),
 362                                                        (16, 0x3fff),
 363                                                        (16, 0x3fff),
 364                                                        (16, 0x3fff),
 365                                                        (16, 0x3fff),
 366                                                        (24, 0x7f),
 367                                                        (24, 0x7f),
 368                                                        (24, 0x7f),
 369                                                        (24, 0x7f),
 370                                                        (24, 0x7f),
 371                                                        (24, 0x7f),
 372                                                        (24, 0x7f),
 373                                                        (24, 0x7f)];
 374
 375         unsafe {
 376             let ptr = data.as_ptr().offset(start as isize) as *const u32;
 377             let val = u32::from_be(*ptr);
 378
 379             let i = (val >> 28) as usize;
 380             let (shift, mask) = SHIFT_MASK_TABLE[i];
 381             Ok(Res {
 382                 val: ((val >> shift) & mask) as usize,
 383                 next: start + ((32 - shift) >> 3),
 384             })
 385         }
 386     }
 387
 388     pub fn tag_len_at(data: &[u8], tag: Res) -> DecodeResult<Res> {
 389         if tag.val < NUM_IMPLICIT_TAGS && TAG_IMPLICIT_LEN[tag.val] >= 0 {
 390             Ok(Res {
 391                 val: TAG_IMPLICIT_LEN[tag.val] as usize,
 392                 next: tag.next,
 393             })
 394         } else {
 395             vuint_at(data, tag.next)
 396         }
 397     }
 398
 399     pub fn doc_at<'a>(data: &'a [u8], start: usize) -> DecodeResult<TaggedDoc<'a>> {
 400         let elt_tag = try!(tag_at(data, start));
 401         let elt_size = try!(tag_len_at(data, elt_tag));
 402         let end = elt_size.next + elt_size.val;
 403         Ok(TaggedDoc {
 404             tag: elt_tag.val,
 405             doc: Doc {
 406                 data: data,
 407                 start: elt_size.next,
 408                 end: end,
 409             },
 410         })
 411     }
 412
 413     pub fn maybe_get_doc<'a>(d: Doc<'a>, tg: usize) -> Option<Doc<'a>> {
 414         let mut pos = d.start;
 415         while pos < d.end {
 416             let elt_tag = try_or!(tag_at(d.data, pos), None);
 417             let elt_size = try_or!(tag_len_at(d.data, elt_tag), None);
 418             pos = elt_size.next + elt_size.val;
 419             if elt_tag.val == tg {
 420                 return Some(Doc {
 421                     data: d.data,
 422                     start: elt_size.next,
 423                     end: pos,
 424                 });
 425             }
 426         }
 427         None
 428     }
 429
 430     pub fn get_doc<'a>(d: Doc<'a>, tg: usize) -> Doc<'a> {
 431         match maybe_get_doc(d, tg) {
 432             Some(d) => d,
 433             None => {
 434                 error!("failed to find block with tag {:?}", tg);
 435                 panic!();
 436             }
 437         }
 438     }
 439
 440     pub fn docs<'a>(d: Doc<'a>) -> DocsIterator<'a> {
 441         DocsIterator { d: d }
 442     }
 443
 444     pub struct DocsIterator<'a> {
 445         d: Doc<'a>,
 446     }
 447
 448     impl<'a> Iterator for DocsIterator<'a> {
 449         type Item = (usize, Doc<'a>);
 450
 451         fn next(&mut self) -> Option<(usize, Doc<'a>)> {
 452             if self.d.start >= self.d.end {
 453                 return None;
 454             }
 455
 456             let elt_tag = try_or!(tag_at(self.d.data, self.d.start), {
 457                 self.d.start = self.d.end;
 458                 None
 459             });
 460             let elt_size = try_or!(tag_len_at(self.d.data, elt_tag), {
 461                 self.d.start = self.d.end;
 462                 None
 463             });
 464
 465             let end = elt_size.next + elt_size.val;
 466             let doc = Doc {
 467                 data: self.d.data,
 468                 start: elt_size.next,
 469                 end: end,
 470             };
 471
 472             self.d.start = end;
 473             return Some((elt_tag.val, doc));
 474         }
 475     }
 476
 477     pub fn tagged_docs<'a>(d: Doc<'a>, tag: usize) -> TaggedDocsIterator<'a> {
 478         TaggedDocsIterator {
 479             iter: docs(d),
 480             tag: tag,
 481         }
 482     }
 483
 484     pub struct TaggedDocsIterator<'a> {
 485         iter: DocsIterator<'a>,
 486         tag: usize,
 487     }
 488
 489     impl<'a> Iterator for TaggedDocsIterator<'a> {
 490         type Item = Doc<'a>;
 491
 492         fn next(&mut self) -> Option<Doc<'a>> {
 493             while let Some((tag, doc)) = self.iter.next() {
 494                 if tag == self.tag {
 495                     return Some(doc);
 496                 }
 497             }
 498             None
 499         }
 500     }
 501
 502     pub fn with_doc_data<T, F>(d: Doc, f: F) -> T
 503         where F: FnOnce(&[u8]) -> T
 504     {
 505         f(&d.data[d.start..d.end])
 506     }
 507
 508     pub fn doc_as_u8(d: Doc) -> u8 {
 509         assert_eq!(d.end, d.start + 1);
 510         d.data[d.start]
 511     }
 512
 513     pub fn doc_as_u64(d: Doc) -> u64 {
 514         if d.end >= 8 {
 515             // For performance, we read 8 big-endian bytes,
 516             // and mask off the junk if there is any. This
 517             // obviously won't work on the first 8 bytes
 518             // of a file - we will fall of the start
 519             // of the page and segfault.
 520
 521             let mut b = [0; 8];
 522             b.clone_from_slice(&d.data[d.end - 8..d.end]);
 523             let data = unsafe { (*(b.as_ptr() as *const u64)).to_be() };
 524             let len = d.end - d.start;
 525             if len < 8 {
 526                 data & ((1 << (len * 8)) - 1)
 527             } else {
 528                 data
 529             }
 530         } else {
 531             let mut result = 0;
 532             for b in &d.data[d.start..d.end] {
 533                 result = (result << 8) + (*b as u64);
 534             }
 535             result
 536         }
 537     }
 538
 539     #[inline]
 540     pub fn doc_as_u16(d: Doc) -> u16 {
 541         doc_as_u64(d) as u16
 542     }
 543     #[inline]
 544     pub fn doc_as_u32(d: Doc) -> u32 {
 545         doc_as_u64(d) as u32
 546     }
 547
 548     #[inline]
 549     pub fn doc_as_i8(d: Doc) -> i8 {
 550         doc_as_u8(d) as i8
 551     }
 552     #[inline]
 553     pub fn doc_as_i16(d: Doc) -> i16 {
 554         doc_as_u16(d) as i16
 555     }
 556     #[inline]
 557     pub fn doc_as_i32(d: Doc) -> i32 {
 558         doc_as_u32(d) as i32
 559     }
 560     #[inline]
 561     pub fn doc_as_i64(d: Doc) -> i64 {
 562         doc_as_u64(d) as i64
 563     }
 564
 565     pub struct Decoder<'a> {
 566         parent: Doc<'a>,
 567         pos: usize,
 568     }
 569
 570     impl<'doc> Decoder<'doc> {
 571         pub fn new(d: Doc<'doc>) -> Decoder<'doc> {
 572             Decoder {
 573                 parent: d,
 574                 pos: d.start,
 575             }
 576         }
 577
 578         fn next_doc(&mut self, exp_tag: EbmlEncoderTag) -> DecodeResult<Doc<'doc>> {
 579             debug!(". next_doc(exp_tag={:?})", exp_tag);
 580             if self.pos >= self.parent.end {
 581                 return Err(Expected(format!("no more documents in current node!")));
 582             }
 583             let TaggedDoc { tag: r_tag, doc: r_doc } = try!(doc_at(self.parent.data, self.pos));
 584             debug!("self.parent={:?}-{:?} self.pos={:?} r_tag={:?} r_doc={:?}-{:?}",
 585                    self.parent.start,
 586                    self.parent.end,
 587                    self.pos,
 588                    r_tag,
 589                    r_doc.start,
 590                    r_doc.end);
 591             if r_tag != (exp_tag as usize) {
 592                 return Err(Expected(format!("expected EBML doc with tag {:?} but found tag {:?}",
 593                                             exp_tag,
 594                                             r_tag)));
 595             }
 596             if r_doc.end > self.parent.end {
 597                 return Err(Expected(format!("invalid EBML, child extends to {:#x}, parent to \
 598                                              {:#x}",
 599                                             r_doc.end,
 600                                             self.parent.end)));
 601             }
 602             self.pos = r_doc.end;
 603             Ok(r_doc)
 604         }
 605
 606         fn push_doc<T, F>(&mut self, exp_tag: EbmlEncoderTag, f: F) -> DecodeResult<T>
 607             where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult<T>
 608         {
 609             let d = try!(self.next_doc(exp_tag));
 610             let old_parent = self.parent;
 611             let old_pos = self.pos;
 612             self.parent = d;
 613             self.pos = d.start;
 614             let r = try!(f(self));
 615             self.parent = old_parent;
 616             self.pos = old_pos;
 617             Ok(r)
 618         }
 619
 620         fn _next_sub(&mut self) -> DecodeResult<usize> {
 621             // empty vector/map optimization
 622             if self.parent.is_empty() {
 623                 return Ok(0);
 624             }
 625
 626             let TaggedDoc { tag: r_tag, doc: r_doc } = try!(doc_at(self.parent.data, self.pos));
 627             let r = if r_tag == (EsSub8 as usize) {
 628                 doc_as_u8(r_doc) as usize
 629             } else if r_tag == (EsSub32 as usize) {
 630                 doc_as_u32(r_doc) as usize
 631             } else {
 632                 return Err(Expected(format!("expected EBML doc with tag {:?} or {:?} but found \
 633                                              tag {:?}",
 634                                             EsSub8,
 635                                             EsSub32,
 636                                             r_tag)));
 637             };
 638             if r_doc.end > self.parent.end {
 639                 return Err(Expected(format!("invalid EBML, child extends to {:#x}, parent to \
 640                                              {:#x}",
 641                                             r_doc.end,
 642                                             self.parent.end)));
 643             }
 644             self.pos = r_doc.end;
 645             debug!("_next_sub result={:?}", r);
 646             Ok(r)
 647         }
 648
 649         // variable-length unsigned integer with different tags.
 650         // `first_tag` should be a tag for u8 or i8.
 651         // `last_tag` should be the largest allowed integer tag with the matching signedness.
 652         // all tags between them should be valid, in the order of u8, u16, u32 and u64.
 653         fn _next_int(&mut self,
 654                      first_tag: EbmlEncoderTag,
 655                      last_tag: EbmlEncoderTag)
 656                      -> DecodeResult<u64> {
 657             if self.pos >= self.parent.end {
 658                 return Err(Expected(format!("no more documents in current node!")));
 659             }
 660
 661             let TaggedDoc { tag: r_tag, doc: r_doc } = try!(doc_at(self.parent.data, self.pos));
 662             let r = if first_tag as usize <= r_tag && r_tag <= last_tag as usize {
 663                 match r_tag - first_tag as usize {
 664                     0 => doc_as_u8(r_doc) as u64,
 665                     1 => doc_as_u16(r_doc) as u64,
 666                     2 => doc_as_u32(r_doc) as u64,
 667                     3 => doc_as_u64(r_doc),
 668                     _ => unreachable!(),
 669                 }
 670             } else {
 671                 return Err(Expected(format!("expected EBML doc with tag {:?} through {:?} but \
 672                                              found tag {:?}",
 673                                             first_tag,
 674                                             last_tag,
 675                                             r_tag)));
 676             };
 677             if r_doc.end > self.parent.end {
 678                 return Err(Expected(format!("invalid EBML, child extends to {:#x}, parent to \
 679                                              {:#x}",
 680                                             r_doc.end,
 681                                             self.parent.end)));
 682             }
 683             self.pos = r_doc.end;
 684             debug!("_next_int({:?}, {:?}) result={:?}", first_tag, last_tag, r);
 685             Ok(r)
 686         }
 687
 688         pub fn read_opaque<R, F>(&mut self, op: F) -> DecodeResult<R>
 689             where F: FnOnce(&mut opaque::Decoder, Doc) -> DecodeResult<R>
 690         {
 691             let doc = try!(self.next_doc(EsOpaque));
 692
 693             let result = {
 694                 let mut opaque_decoder = opaque::Decoder::new(doc.data, doc.start);
 695                 try!(op(&mut opaque_decoder, doc))
 696             };
 697
 698             Ok(result)
 699         }
 700
 701         pub fn position(&self) -> usize {
 702             self.pos
 703         }
 704
 705         pub fn advance(&mut self, bytes: usize) {
 706             self.pos += bytes;
 707         }
 708     }
 709
 710     impl<'doc> serialize::Decoder for Decoder<'doc> {
 711         type Error = Error;
 712         fn read_nil(&mut self) -> DecodeResult<()> {
 713             Ok(())
 714         }
 715
 716         fn read_u64(&mut self) -> DecodeResult<u64> {
 717             self._next_int(EsU8, EsU64)
 718         }
 719         fn read_u32(&mut self) -> DecodeResult<u32> {
 720             Ok(try!(self._next_int(EsU8, EsU32)) as u32)
 721         }
 722         fn read_u16(&mut self) -> DecodeResult<u16> {
 723             Ok(try!(self._next_int(EsU8, EsU16)) as u16)
 724         }
 725         fn read_u8(&mut self) -> DecodeResult<u8> {
 726             Ok(doc_as_u8(try!(self.next_doc(EsU8))))
 727         }
 728         fn read_uint(&mut self) -> DecodeResult<usize> {
 729             let v = try!(self._next_int(EsU8, EsU64));
 730             if v > (::std::usize::MAX as u64) {
 731                 Err(IntTooBig(v as usize))
 732             } else {
 733                 Ok(v as usize)
 734             }
 735         }
 736
 737         fn read_i64(&mut self) -> DecodeResult<i64> {
 738             Ok(try!(self._next_int(EsI8, EsI64)) as i64)
 739         }
 740         fn read_i32(&mut self) -> DecodeResult<i32> {
 741             Ok(try!(self._next_int(EsI8, EsI32)) as i32)
 742         }
 743         fn read_i16(&mut self) -> DecodeResult<i16> {
 744             Ok(try!(self._next_int(EsI8, EsI16)) as i16)
 745         }
 746         fn read_i8(&mut self) -> DecodeResult<i8> {
 747             Ok(doc_as_u8(try!(self.next_doc(EsI8))) as i8)
 748         }
 749         fn read_int(&mut self) -> DecodeResult<isize> {
 750             let v = try!(self._next_int(EsI8, EsI64)) as i64;
 751             if v > (isize::MAX as i64) || v < (isize::MIN as i64) {
 752                 debug!("FIXME \\#6122: Removing this makes this function miscompile");
 753                 Err(IntTooBig(v as usize))
 754             } else {
 755                 Ok(v as isize)
 756             }
 757         }
 758
 759         fn read_bool(&mut self) -> DecodeResult<bool> {
 760             Ok(doc_as_u8(try!(self.next_doc(EsBool))) != 0)
 761         }
 762
 763         fn read_f64(&mut self) -> DecodeResult<f64> {
 764             let bits = doc_as_u64(try!(self.next_doc(EsF64)));
 765             Ok(unsafe { transmute(bits) })
 766         }
 767         fn read_f32(&mut self) -> DecodeResult<f32> {
 768             let bits = doc_as_u32(try!(self.next_doc(EsF32)));
 769             Ok(unsafe { transmute(bits) })
 770         }
 771         fn read_char(&mut self) -> DecodeResult<char> {
 772             Ok(char::from_u32(doc_as_u32(try!(self.next_doc(EsChar)))).unwrap())
 773         }
 774         fn read_str(&mut self) -> DecodeResult<String> {
 775             Ok(try!(self.next_doc(EsStr)).as_str())
 776         }
 777
 778         // Compound types:
 779         fn read_enum<T, F>(&mut self, name: &str, f: F) -> DecodeResult<T>
 780             where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult<T>
 781         {
 782             debug!("read_enum({})", name);
 783
 784             let doc = try!(self.next_doc(EsEnum));
 785
 786             let (old_parent, old_pos) = (self.parent, self.pos);
 787             self.parent = doc;
 788             self.pos = self.parent.start;
 789
 790             let result = try!(f(self));
 791
 792             self.parent = old_parent;
 793             self.pos = old_pos;
 794             Ok(result)
 795         }
 796
 797         fn read_enum_variant<T, F>(&mut self, _: &[&str], mut f: F) -> DecodeResult<T>
 798             where F: FnMut(&mut Decoder<'doc>, usize) -> DecodeResult<T>
 799         {
 800             debug!("read_enum_variant()");
 801             let idx = try!(self._next_sub());
 802             debug!("  idx={}", idx);
 803
 804             f(self, idx)
 805         }
 806
 807         fn read_enum_variant_arg<T, F>(&mut self, idx: usize, f: F) -> DecodeResult<T>
 808             where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult<T>
 809         {
 810             debug!("read_enum_variant_arg(idx={})", idx);
 811             f(self)
 812         }
 813
 814         fn read_enum_struct_variant<T, F>(&mut self, _: &[&str], mut f: F) -> DecodeResult<T>
 815             where F: FnMut(&mut Decoder<'doc>, usize) -> DecodeResult<T>
 816         {
 817             debug!("read_enum_struct_variant()");
 818             let idx = try!(self._next_sub());
 819             debug!("  idx={}", idx);
 820
 821             f(self, idx)
 822         }
 823
 824         fn read_enum_struct_variant_field<T, F>(&mut self,
 825                                                 name: &str,
 826                                                 idx: usize,
 827                                                 f: F)
 828                                                 -> DecodeResult<T>
 829             where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult<T>
 830         {
 831             debug!("read_enum_struct_variant_arg(name={}, idx={})", name, idx);
 832             f(self)
 833         }
 834
 835         fn read_struct<T, F>(&mut self, name: &str, _: usize, f: F) -> DecodeResult<T>
 836             where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult<T>
 837         {
 838             debug!("read_struct(name={})", name);
 839             f(self)
 840         }
 841
 842         fn read_struct_field<T, F>(&mut self, name: &str, idx: usize, f: F) -> DecodeResult<T>
 843             where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult<T>
 844         {
 845             debug!("read_struct_field(name={}, idx={})", name, idx);
 846             f(self)
 847         }
 848
 849         fn read_tuple<T, F>(&mut self, tuple_len: usize, f: F) -> DecodeResult<T>
 850             where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult<T>
 851         {
 852             debug!("read_tuple()");
 853             self.read_seq(move |d, len| {
 854                 if len == tuple_len {
 855                     f(d)
 856                 } else {
 857                     Err(Expected(format!("Expected tuple of length `{}`, found tuple of length \
 858                                           `{}`",
 859                                          tuple_len,
 860                                          len)))
 861                 }
 862             })
 863         }
 864
 865         fn read_tuple_arg<T, F>(&mut self, idx: usize, f: F) -> DecodeResult<T>
 866             where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult<T>
 867         {
 868             debug!("read_tuple_arg(idx={})", idx);
 869             self.read_seq_elt(idx, f)
 870         }
 871
 872         fn read_tuple_struct<T, F>(&mut self, name: &str, len: usize, f: F) -> DecodeResult<T>
 873             where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult<T>
 874         {
 875             debug!("read_tuple_struct(name={})", name);
 876             self.read_tuple(len, f)
 877         }
 878
 879         fn read_tuple_struct_arg<T, F>(&mut self, idx: usize, f: F) -> DecodeResult<T>
 880             where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult<T>
 881         {
 882             debug!("read_tuple_struct_arg(idx={})", idx);
 883             self.read_tuple_arg(idx, f)
 884         }
 885
 886         fn read_option<T, F>(&mut self, mut f: F) -> DecodeResult<T>
 887             where F: FnMut(&mut Decoder<'doc>, bool) -> DecodeResult<T>
 888         {
 889             debug!("read_option()");
 890             self.read_enum("Option", move |this| {
 891                 this.read_enum_variant(&["None", "Some"], move |this, idx| {
 892                     match idx {
 893                         0 => f(this, false),
 894                         1 => f(this, true),
 895                         _ => Err(Expected(format!("Expected None or Some"))),
 896                     }
 897                 })
 898             })
 899         }
 900
 901         fn read_seq<T, F>(&mut self, f: F) -> DecodeResult<T>
 902             where F: FnOnce(&mut Decoder<'doc>, usize) -> DecodeResult<T>
 903         {
 904             debug!("read_seq()");
 905             self.push_doc(EsVec, move |d| {
 906                 let len = try!(d._next_sub());
 907                 debug!("  len={}", len);
 908                 f(d, len)
 909             })
 910         }
 911
 912         fn read_seq_elt<T, F>(&mut self, idx: usize, f: F) -> DecodeResult<T>
 913             where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult<T>
 914         {
 915             debug!("read_seq_elt(idx={})", idx);
 916             self.push_doc(EsVecElt, f)
 917         }
 918
 919         fn read_map<T, F>(&mut self, f: F) -> DecodeResult<T>
 920             where F: FnOnce(&mut Decoder<'doc>, usize) -> DecodeResult<T>
 921         {
 922             debug!("read_map()");
 923             self.push_doc(EsMap, move |d| {
 924                 let len = try!(d._next_sub());
 925                 debug!("  len={}", len);
 926                 f(d, len)
 927             })
 928         }
 929
 930         fn read_map_elt_key<T, F>(&mut self, idx: usize, f: F) -> DecodeResult<T>
 931             where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult<T>
 932         {
 933             debug!("read_map_elt_key(idx={})", idx);
 934             self.push_doc(EsMapKey, f)
 935         }
 936
 937         fn read_map_elt_val<T, F>(&mut self, idx: usize, f: F) -> DecodeResult<T>
 938             where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult<T>
 939         {
 940             debug!("read_map_elt_val(idx={})", idx);
 941             self.push_doc(EsMapVal, f)
 942         }
 943
 944         fn error(&mut self, err: &str) -> Error {
 945             ApplicationError(err.to_string())
 946         }
 947     }
 948 }
 949
 950 pub mod writer {
 951     use std::mem;
 952     use std::io::prelude::*;
 953     use std::io::{self, SeekFrom, Cursor};
 954
 955     use super::opaque;
 956     use super::{EsVec, EsMap, EsEnum, EsSub8, EsSub32, EsVecElt, EsMapKey, EsU64, EsU32, EsU16,
 957                 EsU8, EsI64, EsI32, EsI16, EsI8, EsBool, EsF64, EsF32, EsChar, EsStr, EsMapVal,
 958                 EsOpaque, NUM_IMPLICIT_TAGS, NUM_TAGS};
 959
 960     use serialize;
 961
 962
 963     pub type EncodeResult = io::Result<()>;
 964
 965     // rbml writing
 966     pub struct Encoder<'a> {
 967         pub writer: &'a mut Cursor<Vec<u8>>,
 968         size_positions: Vec<u64>,
 969         relax_limit: u64, // do not move encoded bytes before this position
 970     }
 971
 972     fn write_tag<W: Write>(w: &mut W, n: usize) -> EncodeResult {
 973         if n < 0xf0 {
 974             w.write_all(&[n as u8])
 975         } else if 0x100 <= n && n < NUM_TAGS {
 976             w.write_all(&[0xf0 | (n >> 8) as u8, n as u8])
 977         } else {
 978             Err(io::Error::new(io::ErrorKind::Other, &format!("invalid tag: {}", n)[..]))
 979         }
 980     }
 981
 982     fn write_sized_vuint<W: Write>(w: &mut W, n: usize, size: usize) -> EncodeResult {
 983         match size {
 984             1 => w.write_all(&[0x80 | (n as u8)]),
 985             2 => w.write_all(&[0x40 | ((n >> 8) as u8), n as u8]),
 986             3 => w.write_all(&[0x20 | ((n >> 16) as u8), (n >> 8) as u8, n as u8]),
 987             4 => w.write_all(&[0x10 | ((n >> 24) as u8), (n >> 16) as u8, (n >> 8) as u8, n as u8]),
 988             _ => Err(io::Error::new(io::ErrorKind::Other, &format!("isize too big: {}", n)[..])),
 989         }
 990     }
 991
 992     pub fn write_vuint<W: Write>(w: &mut W, n: usize) -> EncodeResult {
 993         if n < 0x7f {
 994             return write_sized_vuint(w, n, 1);
 995         }
 996         if n < 0x4000 {
 997             return write_sized_vuint(w, n, 2);
 998         }
 999         if n < 0x200000 {
1000             return write_sized_vuint(w, n, 3);
1001         }
1002         if n < 0x10000000 {
1003             return write_sized_vuint(w, n, 4);
1004         }
1005         Err(io::Error::new(io::ErrorKind::Other, &format!("isize too big: {}", n)[..]))
1006     }
1007
1008     impl<'a> Encoder<'a> {
1009         pub fn new(w: &'a mut Cursor<Vec<u8>>) -> Encoder<'a> {
1010             Encoder {
1011                 writer: w,
1012                 size_positions: vec![],
1013                 relax_limit: 0,
1014             }
1015         }
1016
1017         pub fn start_tag(&mut self, tag_id: usize) -> EncodeResult {
1018             debug!("Start tag {:?}", tag_id);
1019             assert!(tag_id >= NUM_IMPLICIT_TAGS);
1020
1021             // Write the enum ID:
1022             try!(write_tag(self.writer, tag_id));
1023
1024             // Write a placeholder four-byte size.
1025             let cur_pos = try!(self.writer.seek(SeekFrom::Current(0)));
1026             self.size_positions.push(cur_pos);
1027             let zeroes: &[u8] = &[0, 0, 0, 0];
1028             self.writer.write_all(zeroes)
1029         }
1030
1031         pub fn end_tag(&mut self) -> EncodeResult {
1032             let last_size_pos = self.size_positions.pop().unwrap();
1033             let cur_pos = try!(self.writer.seek(SeekFrom::Current(0)));
1034             try!(self.writer.seek(SeekFrom::Start(last_size_pos)));
1035             let size = (cur_pos - last_size_pos - 4) as usize;
1036
1037             // relax the size encoding for small tags (bigger tags are costly to move).
1038             // we should never try to move the stable positions, however.
1039             const RELAX_MAX_SIZE: usize = 0x100;
1040             if size <= RELAX_MAX_SIZE && last_size_pos >= self.relax_limit {
1041                 // we can't alter the buffer in place, so have a temporary buffer
1042                 let mut buf = [0u8; RELAX_MAX_SIZE];
1043                 {
1044                     let last_size_pos = last_size_pos as usize;
1045                     let data = &self.writer.get_ref()[last_size_pos + 4..cur_pos as usize];
1046                     buf[..size].clone_from_slice(data);
1047                 }
1048
1049                 // overwrite the size and data and continue
1050                 try!(write_vuint(self.writer, size));
1051                 try!(self.writer.write_all(&buf[..size]));
1052             } else {
1053                 // overwrite the size with an overlong encoding and skip past the data
1054                 try!(write_sized_vuint(self.writer, size, 4));
1055                 try!(self.writer.seek(SeekFrom::Start(cur_pos)));
1056             }
1057
1058             debug!("End tag (size = {:?})", size);
1059             Ok(())
1060         }
1061
1062         pub fn wr_tag<F>(&mut self, tag_id: usize, blk: F) -> EncodeResult
1063             where F: FnOnce() -> EncodeResult
1064         {
1065             try!(self.start_tag(tag_id));
1066             try!(blk());
1067             self.end_tag()
1068         }
1069
1070         pub fn wr_tagged_bytes(&mut self, tag_id: usize, b: &[u8]) -> EncodeResult {
1071             assert!(tag_id >= NUM_IMPLICIT_TAGS);
1072             try!(write_tag(self.writer, tag_id));
1073             try!(write_vuint(self.writer, b.len()));
1074             self.writer.write_all(b)
1075         }
1076
1077         pub fn wr_tagged_u64(&mut self, tag_id: usize, v: u64) -> EncodeResult {
1078             let bytes: [u8; 8] = unsafe { mem::transmute(v.to_be()) };
1079             // tagged integers are emitted in big-endian, with no
1080             // leading zeros.
1081             let leading_zero_bytes = v.leading_zeros() / 8;
1082             self.wr_tagged_bytes(tag_id, &bytes[leading_zero_bytes as usize..])
1083         }
1084
1085         #[inline]
1086         pub fn wr_tagged_u32(&mut self, tag_id: usize, v: u32) -> EncodeResult {
1087             self.wr_tagged_u64(tag_id, v as u64)
1088         }
1089
1090         #[inline]
1091         pub fn wr_tagged_u16(&mut self, tag_id: usize, v: u16) -> EncodeResult {
1092             self.wr_tagged_u64(tag_id, v as u64)
1093         }
1094
1095         #[inline]
1096         pub fn wr_tagged_u8(&mut self, tag_id: usize, v: u8) -> EncodeResult {
1097             self.wr_tagged_bytes(tag_id, &[v])
1098         }
1099
1100         #[inline]
1101         pub fn wr_tagged_i64(&mut self, tag_id: usize, v: i64) -> EncodeResult {
1102             self.wr_tagged_u64(tag_id, v as u64)
1103         }
1104
1105         #[inline]
1106         pub fn wr_tagged_i32(&mut self, tag_id: usize, v: i32) -> EncodeResult {
1107             self.wr_tagged_u32(tag_id, v as u32)
1108         }
1109
1110         #[inline]
1111         pub fn wr_tagged_i16(&mut self, tag_id: usize, v: i16) -> EncodeResult {
1112             self.wr_tagged_u16(tag_id, v as u16)
1113         }
1114
1115         #[inline]
1116         pub fn wr_tagged_i8(&mut self, tag_id: usize, v: i8) -> EncodeResult {
1117             self.wr_tagged_bytes(tag_id, &[v as u8])
1118         }
1119
1120         pub fn wr_tagged_str(&mut self, tag_id: usize, v: &str) -> EncodeResult {
1121             self.wr_tagged_bytes(tag_id, v.as_bytes())
1122         }
1123
1124         // for auto-serialization
1125         fn wr_tagged_raw_bytes(&mut self, tag_id: usize, b: &[u8]) -> EncodeResult {
1126             try!(write_tag(self.writer, tag_id));
1127             self.writer.write_all(b)
1128         }
1129
1130         fn wr_tagged_raw_u64(&mut self, tag_id: usize, v: u64) -> EncodeResult {
1131             let bytes: [u8; 8] = unsafe { mem::transmute(v.to_be()) };
1132             self.wr_tagged_raw_bytes(tag_id, &bytes)
1133         }
1134
1135         fn wr_tagged_raw_u32(&mut self, tag_id: usize, v: u32) -> EncodeResult {
1136             let bytes: [u8; 4] = unsafe { mem::transmute(v.to_be()) };
1137             self.wr_tagged_raw_bytes(tag_id, &bytes)
1138         }
1139
1140         fn wr_tagged_raw_u16(&mut self, tag_id: usize, v: u16) -> EncodeResult {
1141             let bytes: [u8; 2] = unsafe { mem::transmute(v.to_be()) };
1142             self.wr_tagged_raw_bytes(tag_id, &bytes)
1143         }
1144
1145         fn wr_tagged_raw_u8(&mut self, tag_id: usize, v: u8) -> EncodeResult {
1146             self.wr_tagged_raw_bytes(tag_id, &[v])
1147         }
1148
1149         fn wr_tagged_raw_i64(&mut self, tag_id: usize, v: i64) -> EncodeResult {
1150             self.wr_tagged_raw_u64(tag_id, v as u64)
1151         }
1152
1153         fn wr_tagged_raw_i32(&mut self, tag_id: usize, v: i32) -> EncodeResult {
1154             self.wr_tagged_raw_u32(tag_id, v as u32)
1155         }
1156
1157         fn wr_tagged_raw_i16(&mut self, tag_id: usize, v: i16) -> EncodeResult {
1158             self.wr_tagged_raw_u16(tag_id, v as u16)
1159         }
1160
1161         fn wr_tagged_raw_i8(&mut self, tag_id: usize, v: i8) -> EncodeResult {
1162             self.wr_tagged_raw_bytes(tag_id, &[v as u8])
1163         }
1164
1165         pub fn wr_bytes(&mut self, b: &[u8]) -> EncodeResult {
1166             debug!("Write {:?} bytes", b.len());
1167             self.writer.write_all(b)
1168         }
1169
1170         pub fn wr_str(&mut self, s: &str) -> EncodeResult {
1171             debug!("Write str: {:?}", s);
1172             self.writer.write_all(s.as_bytes())
1173         }
1174
1175         /// Returns the current position while marking it stable, i.e.
1176         /// generated bytes so far wouldn't be affected by relaxation.
1177         pub fn mark_stable_position(&mut self) -> u64 {
1178             let pos = self.writer.seek(SeekFrom::Current(0)).unwrap();
1179             if self.relax_limit < pos {
1180                 self.relax_limit = pos;
1181             }
1182             pos
1183         }
1184     }
1185
1186     impl<'a> Encoder<'a> {
1187         // used internally to emit things like the vector length and so on
1188         fn _emit_tagged_sub(&mut self, v: usize) -> EncodeResult {
1189             if v as u8 as usize == v {
1190                 self.wr_tagged_raw_u8(EsSub8 as usize, v as u8)
1191             } else if v as u32 as usize == v {
1192                 self.wr_tagged_raw_u32(EsSub32 as usize, v as u32)
1193             } else {
1194                 Err(io::Error::new(io::ErrorKind::Other,
1195                                    &format!("length or variant id too big: {}", v)[..]))
1196             }
1197         }
1198
1199         pub fn emit_opaque<F>(&mut self, f: F) -> EncodeResult
1200             where F: FnOnce(&mut opaque::Encoder) -> EncodeResult
1201         {
1202             try!(self.start_tag(EsOpaque as usize));
1203
1204             {
1205                 let mut opaque_encoder = opaque::Encoder::new(self.writer);
1206                 try!(f(&mut opaque_encoder));
1207             }
1208
1209             self.mark_stable_position();
1210             self.end_tag()
1211         }
1212     }
1213
1214     impl<'a> serialize::Encoder for Encoder<'a> {
1215         type Error = io::Error;
1216
1217         fn emit_nil(&mut self) -> EncodeResult {
1218             Ok(())
1219         }
1220
1221         fn emit_uint(&mut self, v: usize) -> EncodeResult {
1222             self.emit_u64(v as u64)
1223         }
1224         fn emit_u64(&mut self, v: u64) -> EncodeResult {
1225             if v as u32 as u64 == v {
1226                 self.emit_u32(v as u32)
1227             } else {
1228                 self.wr_tagged_raw_u64(EsU64 as usize, v)
1229             }
1230         }
1231         fn emit_u32(&mut self, v: u32) -> EncodeResult {
1232             if v as u16 as u32 == v {
1233                 self.emit_u16(v as u16)
1234             } else {
1235                 self.wr_tagged_raw_u32(EsU32 as usize, v)
1236             }
1237         }
1238         fn emit_u16(&mut self, v: u16) -> EncodeResult {
1239             if v as u8 as u16 == v {
1240                 self.emit_u8(v as u8)
1241             } else {
1242                 self.wr_tagged_raw_u16(EsU16 as usize, v)
1243             }
1244         }
1245         fn emit_u8(&mut self, v: u8) -> EncodeResult {
1246             self.wr_tagged_raw_u8(EsU8 as usize, v)
1247         }
1248
1249         fn emit_int(&mut self, v: isize) -> EncodeResult {
1250             self.emit_i64(v as i64)
1251         }
1252         fn emit_i64(&mut self, v: i64) -> EncodeResult {
1253             if v as i32 as i64 == v {
1254                 self.emit_i32(v as i32)
1255             } else {
1256                 self.wr_tagged_raw_i64(EsI64 as usize, v)
1257             }
1258         }
1259         fn emit_i32(&mut self, v: i32) -> EncodeResult {
1260             if v as i16 as i32 == v {
1261                 self.emit_i16(v as i16)
1262             } else {
1263                 self.wr_tagged_raw_i32(EsI32 as usize, v)
1264             }
1265         }
1266         fn emit_i16(&mut self, v: i16) -> EncodeResult {
1267             if v as i8 as i16 == v {
1268                 self.emit_i8(v as i8)
1269             } else {
1270                 self.wr_tagged_raw_i16(EsI16 as usize, v)
1271             }
1272         }
1273         fn emit_i8(&mut self, v: i8) -> EncodeResult {
1274             self.wr_tagged_raw_i8(EsI8 as usize, v)
1275         }
1276
1277         fn emit_bool(&mut self, v: bool) -> EncodeResult {
1278             self.wr_tagged_raw_u8(EsBool as usize, v as u8)
1279         }
1280
1281         fn emit_f64(&mut self, v: f64) -> EncodeResult {
1282             let bits = unsafe { mem::transmute(v) };
1283             self.wr_tagged_raw_u64(EsF64 as usize, bits)
1284         }
1285         fn emit_f32(&mut self, v: f32) -> EncodeResult {
1286             let bits = unsafe { mem::transmute(v) };
1287             self.wr_tagged_raw_u32(EsF32 as usize, bits)
1288         }
1289         fn emit_char(&mut self, v: char) -> EncodeResult {
1290             self.wr_tagged_raw_u32(EsChar as usize, v as u32)
1291         }
1292
1293         fn emit_str(&mut self, v: &str) -> EncodeResult {
1294             self.wr_tagged_str(EsStr as usize, v)
1295         }
1296
1297         fn emit_enum<F>(&mut self, _name: &str, f: F) -> EncodeResult
1298             where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1299         {
1300             try!(self.start_tag(EsEnum as usize));
1301             try!(f(self));
1302             self.end_tag()
1303         }
1304
1305         fn emit_enum_variant<F>(&mut self, _: &str, v_id: usize, _: usize, f: F) -> EncodeResult
1306             where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1307         {
1308             try!(self._emit_tagged_sub(v_id));
1309             f(self)
1310         }
1311
1312         fn emit_enum_variant_arg<F>(&mut self, _: usize, f: F) -> EncodeResult
1313             where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1314         {
1315             f(self)
1316         }
1317
1318         fn emit_enum_struct_variant<F>(&mut self,
1319                                        v_name: &str,
1320                                        v_id: usize,
1321                                        cnt: usize,
1322                                        f: F)
1323                                        -> EncodeResult
1324             where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1325         {
1326             self.emit_enum_variant(v_name, v_id, cnt, f)
1327         }
1328
1329         fn emit_enum_struct_variant_field<F>(&mut self, _: &str, idx: usize, f: F) -> EncodeResult
1330             where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1331         {
1332             self.emit_enum_variant_arg(idx, f)
1333         }
1334
1335         fn emit_struct<F>(&mut self, _: &str, _len: usize, f: F) -> EncodeResult
1336             where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1337         {
1338             f(self)
1339         }
1340
1341         fn emit_struct_field<F>(&mut self, _name: &str, _: usize, f: F) -> EncodeResult
1342             where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1343         {
1344             f(self)
1345         }
1346
1347         fn emit_tuple<F>(&mut self, len: usize, f: F) -> EncodeResult
1348             where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1349         {
1350             self.emit_seq(len, f)
1351         }
1352         fn emit_tuple_arg<F>(&mut self, idx: usize, f: F) -> EncodeResult
1353             where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1354         {
1355             self.emit_seq_elt(idx, f)
1356         }
1357
1358         fn emit_tuple_struct<F>(&mut self, _: &str, len: usize, f: F) -> EncodeResult
1359             where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1360         {
1361             self.emit_seq(len, f)
1362         }
1363         fn emit_tuple_struct_arg<F>(&mut self, idx: usize, f: F) -> EncodeResult
1364             where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1365         {
1366             self.emit_seq_elt(idx, f)
1367         }
1368
1369         fn emit_option<F>(&mut self, f: F) -> EncodeResult
1370             where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1371         {
1372             self.emit_enum("Option", f)
1373         }
1374         fn emit_option_none(&mut self) -> EncodeResult {
1375             self.emit_enum_variant("None", 0, 0, |_| Ok(()))
1376         }
1377         fn emit_option_some<F>(&mut self, f: F) -> EncodeResult
1378             where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1379         {
1380
1381             self.emit_enum_variant("Some", 1, 1, f)
1382         }
1383
1384         fn emit_seq<F>(&mut self, len: usize, f: F) -> EncodeResult
1385             where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1386         {
1387             if len == 0 {
1388                 // empty vector optimization
1389                 return self.wr_tagged_bytes(EsVec as usize, &[]);
1390             }
1391
1392             try!(self.start_tag(EsVec as usize));
1393             try!(self._emit_tagged_sub(len));
1394             try!(f(self));
1395             self.end_tag()
1396         }
1397
1398         fn emit_seq_elt<F>(&mut self, _idx: usize, f: F) -> EncodeResult
1399             where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1400         {
1401
1402             try!(self.start_tag(EsVecElt as usize));
1403             try!(f(self));
1404             self.end_tag()
1405         }
1406
1407         fn emit_map<F>(&mut self, len: usize, f: F) -> EncodeResult
1408             where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1409         {
1410             if len == 0 {
1411                 // empty map optimization
1412                 return self.wr_tagged_bytes(EsMap as usize, &[]);
1413             }
1414
1415             try!(self.start_tag(EsMap as usize));
1416             try!(self._emit_tagged_sub(len));
1417             try!(f(self));
1418             self.end_tag()
1419         }
1420
1421         fn emit_map_elt_key<F>(&mut self, _idx: usize, f: F) -> EncodeResult
1422             where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1423         {
1424
1425             try!(self.start_tag(EsMapKey as usize));
1426             try!(f(self));
1427             self.end_tag()
1428         }
1429
1430         fn emit_map_elt_val<F>(&mut self, _idx: usize, f: F) -> EncodeResult
1431             where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1432         {
1433             try!(self.start_tag(EsMapVal as usize));
1434             try!(f(self));
1435             self.end_tag()
1436         }
1437     }
1438 }
1439
1440 // ___________________________________________________________________________
1441 // Testing
1442
1443 #[cfg(test)]
1444 mod tests {
1445     use super::{Doc, reader, writer};
1446
1447     use serialize::{Encodable, Decodable};
1448
1449     use std::io::Cursor;
1450
1451     #[test]
1452     fn test_vuint_at() {
1453         let data = &[
1454             0x80,
1455             0xff,
1456             0x40, 0x00,
1457             0x7f, 0xff,
1458             0x20, 0x00, 0x00,
1459             0x3f, 0xff, 0xff,
1460             0x10, 0x00, 0x00, 0x00,
1461             0x1f, 0xff, 0xff, 0xff
1462         ];
1463
1464         let mut res: reader::Res;
1465
1466         // Class A
1467         res = reader::vuint_at(data, 0).unwrap();
1468         assert_eq!(res.val, 0);
1469         assert_eq!(res.next, 1);
1470         res = reader::vuint_at(data, res.next).unwrap();
1471         assert_eq!(res.val, (1 << 7) - 1);
1472         assert_eq!(res.next, 2);
1473
1474         // Class B
1475         res = reader::vuint_at(data, res.next).unwrap();
1476         assert_eq!(res.val, 0);
1477         assert_eq!(res.next, 4);
1478         res = reader::vuint_at(data, res.next).unwrap();
1479         assert_eq!(res.val, (1 << 14) - 1);
1480         assert_eq!(res.next, 6);
1481
1482         // Class C
1483         res = reader::vuint_at(data, res.next).unwrap();
1484         assert_eq!(res.val, 0);
1485         assert_eq!(res.next, 9);
1486         res = reader::vuint_at(data, res.next).unwrap();
1487         assert_eq!(res.val, (1 << 21) - 1);
1488         assert_eq!(res.next, 12);
1489
1490         // Class D
1491         res = reader::vuint_at(data, res.next).unwrap();
1492         assert_eq!(res.val, 0);
1493         assert_eq!(res.next, 16);
1494         res = reader::vuint_at(data, res.next).unwrap();
1495         assert_eq!(res.val, (1 << 28) - 1);
1496         assert_eq!(res.next, 20);
1497     }
1498
1499     #[test]
1500     fn test_option_int() {
1501         fn test_v(v: Option<isize>) {
1502             debug!("v == {:?}", v);
1503             let mut wr = Cursor::new(Vec::new());
1504             {
1505                 let mut rbml_w = writer::Encoder::new(&mut wr);
1506                 let _ = v.encode(&mut rbml_w);
1507             }
1508             let rbml_doc = Doc::new(wr.get_ref());
1509             let mut deser = reader::Decoder::new(rbml_doc);
1510             let v1 = Decodable::decode(&mut deser).unwrap();
1511             debug!("v1 == {:?}", v1);
1512             assert_eq!(v, v1);
1513         }
1514
1515         test_v(Some(22));
1516         test_v(None);
1517         test_v(Some(3));
1518     }
1519 }
1520
1521 #[cfg(test)]
1522 mod bench {
1523     #![allow(non_snake_case)]
1524     use test::Bencher;
1525     use super::reader;
1526
1527     #[bench]
1528     pub fn vuint_at_A_aligned(b: &mut Bencher) {
1529         let data = (0..4 * 100)
1530                        .map(|i| {
1531                            match i % 2 {
1532                                0 => 0x80,
1533                                _ => i as u8,
1534                            }
1535                        })
1536                        .collect::<Vec<_>>();
1537         let mut sum = 0;
1538         b.iter(|| {
1539             let mut i = 0;
1540             while i < data.len() {
1541                 sum += reader::vuint_at(&data, i).unwrap().val;
1542                 i += 4;
1543             }
1544         });
1545     }
1546
1547     #[bench]
1548     pub fn vuint_at_A_unaligned(b: &mut Bencher) {
1549         let data = (0..4 * 100 + 1)
1550                        .map(|i| {
1551                            match i % 2 {
1552                                1 => 0x80,
1553                                _ => i as u8,
1554                            }
1555                        })
1556                        .collect::<Vec<_>>();
1557         let mut sum = 0;
1558         b.iter(|| {
1559             let mut i = 1;
1560             while i < data.len() {
1561                 sum += reader::vuint_at(&data, i).unwrap().val;
1562                 i += 4;
1563             }
1564         });
1565     }
1566
1567     #[bench]
1568     pub fn vuint_at_D_aligned(b: &mut Bencher) {
1569         let data = (0..4 * 100)
1570                        .map(|i| {
1571                            match i % 4 {
1572                                0 => 0x10,
1573                                3 => i as u8,
1574                                _ => 0,
1575                            }
1576                        })
1577                        .collect::<Vec<_>>();
1578         let mut sum = 0;
1579         b.iter(|| {
1580             let mut i = 0;
1581             while i < data.len() {
1582                 sum += reader::vuint_at(&data, i).unwrap().val;
1583                 i += 4;
1584             }
1585         });
1586     }
1587
1588     #[bench]
1589     pub fn vuint_at_D_unaligned(b: &mut Bencher) {
1590         let data = (0..4 * 100 + 1)
1591                        .map(|i| {
1592                            match i % 4 {
1593                                1 => 0x10,
1594                                0 => i as u8,
1595                                _ => 0,
1596                            }
1597                        })
1598                        .collect::<Vec<_>>();
1599         let mut sum = 0;
1600         b.iter(|| {
1601             let mut i = 1;
1602             while i < data.len() {
1603                 sum += reader::vuint_at(&data, i).unwrap().val;
1604                 i += 4;
1605             }
1606         });
1607     }
1608 }