src/librbml/lib.rs

   1 // Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
   2 // file at the top-level directory of this distribution and at
   3 // http://rust-lang.org/COPYRIGHT.
   4 //
   5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
   6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
   7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
   8 // option. This file may not be copied, modified, or distributed
   9 // except according to those terms.
  10
  11 //! Really Bad Markup Language (rbml) is an internal serialization format of rustc.
  12 //! This is not intended to be used by users.
  13 //!
  14 //! Originally based on the Extensible Binary Markup Language
  15 //! (ebml; http://www.matroska.org/technical/specs/rfc/index.html),
  16 //! it is now a separate format tuned for the rust object metadata.
  17 //!
  18 //! # Encoding
  19 //!
  20 //! RBML document consists of the tag, length and data.
  21 //! The encoded data can contain multiple RBML documents concatenated.
  22 //!
  23 //! **Tags** are a hint for the following data.
  24 //! Tags are a number from 0x000 to 0xfff, where 0xf0 through 0xff is reserved.
  25 //! Tags less than 0xf0 are encoded in one literal byte.
  26 //! Tags greater than 0xff are encoded in two big-endian bytes,
  27 //! where the tag number is ORed with 0xf000. (E.g. tag 0x123 = `f1 23`)
  28 //!
  29 //! **Lengths** encode the length of the following data.
  30 //! It is a variable-length unsigned isize, and one of the following forms:
  31 //!
  32 //! - `80` through `fe` for lengths up to 0x7e;
  33 //! - `40 ff` through `7f ff` for lengths up to 0x3fff;
  34 //! - `20 40 00` through `3f ff ff` for lengths up to 0x1fffff;
  35 //! - `10 20 00 00` through `1f ff ff ff` for lengths up to 0xfffffff.
  36 //!
  37 //! The "overlong" form is allowed so that the length can be encoded
  38 //! without the prior knowledge of the encoded data.
  39 //! For example, the length 0 can be represented either by `80`, `40 00`,
  40 //! `20 00 00` or `10 00 00 00`.
  41 //! The encoder tries to minimize the length if possible.
  42 //! Also, some predefined tags listed below are so commonly used that
  43 //! their lengths are omitted ("implicit length").
  44 //!
  45 //! **Data** can be either binary bytes or zero or more nested RBML documents.
  46 //! Nested documents cannot overflow, and should be entirely contained
  47 //! within a parent document.
  48 //!
  49 //! # Predefined Tags
  50 //!
  51 //! Most RBML tags are defined by the application.
  52 //! (For the rust object metadata, see also `rustc::metadata::common`.)
  53 //! RBML itself does define a set of predefined tags however,
  54 //! intended for the auto-serialization implementation.
  55 //!
  56 //! Predefined tags with an implicit length:
  57 //!
  58 //! - `U8`  (`00`): 1-byte unsigned integer.
  59 //! - `U16` (`01`): 2-byte big endian unsigned integer.
  60 //! - `U32` (`02`): 4-byte big endian unsigned integer.
  61 //! - `U64` (`03`): 8-byte big endian unsigned integer.
  62 //!   Any of `U*` tags can be used to encode primitive unsigned integer types,
  63 //!   as long as it is no greater than the actual size.
  64 //!   For example, `u8` can only be represented via the `U8` tag.
  65 //!
  66 //! - `I8`  (`04`): 1-byte signed integer.
  67 //! - `I16` (`05`): 2-byte big endian signed integer.
  68 //! - `I32` (`06`): 4-byte big endian signed integer.
  69 //! - `I64` (`07`): 8-byte big endian signed integer.
  70 //!   Similar to `U*` tags. Always uses two's complement encoding.
  71 //!
  72 //! - `Bool` (`08`): 1-byte boolean value, `00` for false and `01` for true.
  73 //!
  74 //! - `Char` (`09`): 4-byte big endian Unicode scalar value.
  75 //!   Surrogate pairs or out-of-bound values are invalid.
  76 //!
  77 //! - `F32` (`0a`): 4-byte big endian unsigned integer representing
  78 //!   IEEE 754 binary32 floating-point format.
  79 //! - `F64` (`0b`): 8-byte big endian unsigned integer representing
  80 //!   IEEE 754 binary64 floating-point format.
  81 //!
  82 //! - `Sub8`  (`0c`): 1-byte unsigned integer for supplementary information.
  83 //! - `Sub32` (`0d`): 4-byte unsigned integer for supplementary information.
  84 //!   Those two tags normally occur as the first subdocument of certain tags,
  85 //!   namely `Enum`, `Vec` and `Map`, to provide a variant or size information.
  86 //!   They can be used interchangeably.
  87 //!
  88 //! Predefined tags with an explicit length:
  89 //!
  90 //! - `Str` (`10`): A UTF-8-encoded string.
  91 //!
  92 //! - `Enum` (`11`): An enum.
  93 //!   The first subdocument should be `Sub*` tags with a variant ID.
  94 //!   Subsequent subdocuments, if any, encode variant arguments.
  95 //!
  96 //! - `Vec` (`12`): A vector (sequence).
  97 //! - `VecElt` (`13`): A vector element.
  98 //!   The first subdocument should be `Sub*` tags with the number of elements.
  99 //!   Subsequent subdocuments should be `VecElt` tag per each element.
 100 //!
 101 //! - `Map` (`14`): A map (associated array).
 102 //! - `MapKey` (`15`): A key part of the map entry.
 103 //! - `MapVal` (`16`): A value part of the map entry.
 104 //!   The first subdocument should be `Sub*` tags with the number of entries.
 105 //!   Subsequent subdocuments should be an alternating sequence of
 106 //!   `MapKey` and `MapVal` tags per each entry.
 107 //!
 108 //! - `Opaque` (`17`): An opaque, custom-format tag.
 109 //!   Used to wrap ordinary custom tags or data in the auto-serialized context.
 110 //!   Rustc typically uses this to encode type informations.
 111 //!
 112 //! First 0x20 tags are reserved by RBML; custom tags start at 0x20.
 113
 114 #![crate_name = "rbml"]
 115 #![unstable(feature = "rustc_private", issue = "27812")]
 116 #![crate_type = "rlib"]
 117 #![crate_type = "dylib"]
 118 #![doc(html_logo_url = "https://www.rust-lang.org/logos/rust-logo-128x128-blk-v2.png",
 119        html_favicon_url = "https://doc.rust-lang.org/favicon.ico",
 120        html_root_url = "https://doc.rust-lang.org/nightly/",
 121        html_playground_url = "https://play.rust-lang.org/",
 122        test(attr(deny(warnings))))]
 123
 124 #![feature(rustc_private)]
 125 #![feature(staged_api)]
 126
 127 #![cfg_attr(test, feature(test))]
 128
 129 extern crate serialize;
 130
 131 #[cfg(test)]
 132 extern crate serialize as rustc_serialize; // Used by RustcEncodable
 133
 134 #[macro_use]
 135 extern crate log;
 136
 137 #[cfg(test)]
 138 extern crate test;
 139
 140 pub mod opaque;
 141 pub mod leb128;
 142
 143 pub use self::EbmlEncoderTag::*;
 144 pub use self::Error::*;
 145
 146 use std::str;
 147 use std::fmt;
 148
 149 /// Common data structures
 150 #[derive(Clone, Copy)]
 151 pub struct Doc<'a> {
 152     pub data: &'a [u8],
 153     pub start: usize,
 154     pub end: usize,
 155 }
 156
 157 impl<'doc> Doc<'doc> {
 158     pub fn new(data: &'doc [u8]) -> Doc<'doc> {
 159         Doc {
 160             data: data,
 161             start: 0,
 162             end: data.len(),
 163         }
 164     }
 165
 166     pub fn get<'a>(&'a self, tag: usize) -> Doc<'a> {
 167         reader::get_doc(*self, tag)
 168     }
 169
 170     pub fn is_empty(&self) -> bool {
 171         self.start == self.end
 172     }
 173
 174     pub fn as_str_slice<'a>(&'a self) -> &'a str {
 175         str::from_utf8(&self.data[self.start..self.end]).unwrap()
 176     }
 177
 178     pub fn as_str(&self) -> String {
 179         self.as_str_slice().to_string()
 180     }
 181 }
 182
 183 pub struct TaggedDoc<'a> {
 184     tag: usize,
 185     pub doc: Doc<'a>,
 186 }
 187
 188 #[derive(Copy, Clone, Debug)]
 189 pub enum EbmlEncoderTag {
 190     // tags 00..1f are reserved for auto-serialization.
 191     // first NUM_IMPLICIT_TAGS tags are implicitly sized and lengths are not encoded.
 192     EsU8 = 0x00, // + 1 byte
 193     EsU16 = 0x01, // + 2 bytes
 194     EsU32 = 0x02, // + 4 bytes
 195     EsU64 = 0x03, // + 8 bytes
 196     EsI8 = 0x04, // + 1 byte
 197     EsI16 = 0x05, // + 2 bytes
 198     EsI32 = 0x06, // + 4 bytes
 199     EsI64 = 0x07, // + 8 bytes
 200     EsBool = 0x08, // + 1 byte
 201     EsChar = 0x09, // + 4 bytes
 202     EsF32 = 0x0a, // + 4 bytes
 203     EsF64 = 0x0b, // + 8 bytes
 204     EsSub8 = 0x0c, // + 1 byte
 205     EsSub32 = 0x0d, // + 4 bytes
 206     // 0x0e and 0x0f are reserved
 207     EsStr = 0x10,
 208     EsEnum = 0x11, // encodes the variant id as the first EsSub*
 209     EsVec = 0x12, // encodes the # of elements as the first EsSub*
 210     EsVecElt = 0x13,
 211     EsMap = 0x14, // encodes the # of pairs as the first EsSub*
 212     EsMapKey = 0x15,
 213     EsMapVal = 0x16,
 214     EsOpaque = 0x17,
 215 }
 216
 217 const NUM_TAGS: usize = 0x1000;
 218 const NUM_IMPLICIT_TAGS: usize = 0x0e;
 219
 220 #[cfg_attr(rustfmt, rustfmt_skip)]
 221 static TAG_IMPLICIT_LEN: [i8; NUM_IMPLICIT_TAGS] = [
 222     1, 2, 4, 8, // EsU*
 223     1, 2, 4, 8, // ESI*
 224     1, // EsBool
 225     4, // EsChar
 226     4, 8, // EsF*
 227     1, 4, // EsSub*
 228 ];
 229
 230 #[derive(Debug)]
 231 pub enum Error {
 232     IntTooBig(usize),
 233     InvalidTag(usize),
 234     Expected(String),
 235     IoError(std::io::Error),
 236     ApplicationError(String),
 237 }
 238
 239 impl fmt::Display for Error {
 240     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 241         // FIXME: this should be a more useful display form
 242         fmt::Debug::fmt(self, f)
 243     }
 244 }
 245 // --------------------------------------
 246
 247 pub mod reader {
 248     use std::char;
 249
 250     use std::isize;
 251     use std::mem::transmute;
 252
 253     use serialize;
 254
 255     use super::opaque;
 256     use super::{ApplicationError, EsVec, EsMap, EsEnum, EsSub8, EsSub32, EsVecElt, EsMapKey,
 257                 EsU64, EsU32, EsU16, EsU8, EsI64, EsI32, EsI16, EsI8, EsBool, EsF64, EsF32,
 258                 EsChar, EsStr, EsMapVal, EsOpaque, EbmlEncoderTag, Doc, TaggedDoc, Error,
 259                 IntTooBig, InvalidTag, Expected, NUM_IMPLICIT_TAGS, TAG_IMPLICIT_LEN};
 260
 261     pub type DecodeResult<T> = Result<T, Error>;
 262     // rbml reading
 263
 264     macro_rules! try_or {
 265         ($e:expr, $r:expr) => (
 266             match $e {
 267                 Ok(e) => e,
 268                 Err(e) => {
 269                     debug!("ignored error: {:?}", e);
 270                     return $r
 271                 }
 272             }
 273         )
 274     }
 275
 276     #[derive(Copy, Clone)]
 277     pub struct Res {
 278         pub val: usize,
 279         pub next: usize,
 280     }
 281
 282     pub fn tag_at(data: &[u8], start: usize) -> DecodeResult<Res> {
 283         let v = data[start] as usize;
 284         if v < 0xf0 {
 285             Ok(Res {
 286                 val: v,
 287                 next: start + 1,
 288             })
 289         } else if v > 0xf0 {
 290             Ok(Res {
 291                 val: ((v & 0xf) << 8) | data[start + 1] as usize,
 292                 next: start + 2,
 293             })
 294         } else {
 295             // every tag starting with byte 0xf0 is an overlong form, which is prohibited.
 296             Err(InvalidTag(v))
 297         }
 298     }
 299
 300     #[inline(never)]
 301     fn vuint_at_slow(data: &[u8], start: usize) -> DecodeResult<Res> {
 302         let a = data[start];
 303         if a & 0x80 != 0 {
 304             return Ok(Res {
 305                 val: (a & 0x7f) as usize,
 306                 next: start + 1,
 307             });
 308         }
 309         if a & 0x40 != 0 {
 310             return Ok(Res {
 311                 val: ((a & 0x3f) as usize) << 8 | (data[start + 1] as usize),
 312                 next: start + 2,
 313             });
 314         }
 315         if a & 0x20 != 0 {
 316             return Ok(Res {
 317                 val: ((a & 0x1f) as usize) << 16 | (data[start + 1] as usize) << 8 |
 318                      (data[start + 2] as usize),
 319                 next: start + 3,
 320             });
 321         }
 322         if a & 0x10 != 0 {
 323             return Ok(Res {
 324                 val: ((a & 0x0f) as usize) << 24 | (data[start + 1] as usize) << 16 |
 325                      (data[start + 2] as usize) << 8 |
 326                      (data[start + 3] as usize),
 327                 next: start + 4,
 328             });
 329         }
 330         Err(IntTooBig(a as usize))
 331     }
 332
 333     pub fn vuint_at(data: &[u8], start: usize) -> DecodeResult<Res> {
 334         if data.len() - start < 4 {
 335             return vuint_at_slow(data, start);
 336         }
 337
 338         // Lookup table for parsing EBML Element IDs as per
 339         // http://ebml.sourceforge.net/specs/ The Element IDs are parsed by
 340         // reading a big endian u32 positioned at data[start].  Using the four
 341         // most significant bits of the u32 we lookup in the table below how
 342         // the element ID should be derived from it.
 343         //
 344         // The table stores tuples (shift, mask) where shift is the number the
 345         // u32 should be right shifted with and mask is the value the right
 346         // shifted value should be masked with.  If for example the most
 347         // significant bit is set this means it's a class A ID and the u32
 348         // should be right shifted with 24 and masked with 0x7f. Therefore we
 349         // store (24, 0x7f) at index 0x8 - 0xF (four bit numbers where the most
 350         // significant bit is set).
 351         //
 352         // By storing the number of shifts and masks in a table instead of
 353         // checking in order if the most significant bit is set, the second
 354         // most significant bit is set etc. we can replace up to three
 355         // "and+branch" with a single table lookup which gives us a measured
 356         // speedup of around 2x on x86_64.
 357         static SHIFT_MASK_TABLE: [(usize, u32); 16] = [(0, 0x0),
 358                                                        (0, 0x0fffffff),
 359                                                        (8, 0x1fffff),
 360                                                        (8, 0x1fffff),
 361                                                        (16, 0x3fff),
 362                                                        (16, 0x3fff),
 363                                                        (16, 0x3fff),
 364                                                        (16, 0x3fff),
 365                                                        (24, 0x7f),
 366                                                        (24, 0x7f),
 367                                                        (24, 0x7f),
 368                                                        (24, 0x7f),
 369                                                        (24, 0x7f),
 370                                                        (24, 0x7f),
 371                                                        (24, 0x7f),
 372                                                        (24, 0x7f)];
 373
 374         unsafe {
 375             let ptr = data.as_ptr().offset(start as isize) as *const u32;
 376             let val = u32::from_be(*ptr);
 377
 378             let i = (val >> 28) as usize;
 379             let (shift, mask) = SHIFT_MASK_TABLE[i];
 380             Ok(Res {
 381                 val: ((val >> shift) & mask) as usize,
 382                 next: start + ((32 - shift) >> 3),
 383             })
 384         }
 385     }
 386
 387     pub fn tag_len_at(data: &[u8], tag: Res) -> DecodeResult<Res> {
 388         if tag.val < NUM_IMPLICIT_TAGS && TAG_IMPLICIT_LEN[tag.val] >= 0 {
 389             Ok(Res {
 390                 val: TAG_IMPLICIT_LEN[tag.val] as usize,
 391                 next: tag.next,
 392             })
 393         } else {
 394             vuint_at(data, tag.next)
 395         }
 396     }
 397
 398     pub fn doc_at<'a>(data: &'a [u8], start: usize) -> DecodeResult<TaggedDoc<'a>> {
 399         let elt_tag = try!(tag_at(data, start));
 400         let elt_size = try!(tag_len_at(data, elt_tag));
 401         let end = elt_size.next + elt_size.val;
 402         Ok(TaggedDoc {
 403             tag: elt_tag.val,
 404             doc: Doc {
 405                 data: data,
 406                 start: elt_size.next,
 407                 end: end,
 408             },
 409         })
 410     }
 411
 412     pub fn maybe_get_doc<'a>(d: Doc<'a>, tg: usize) -> Option<Doc<'a>> {
 413         let mut pos = d.start;
 414         while pos < d.end {
 415             let elt_tag = try_or!(tag_at(d.data, pos), None);
 416             let elt_size = try_or!(tag_len_at(d.data, elt_tag), None);
 417             pos = elt_size.next + elt_size.val;
 418             if elt_tag.val == tg {
 419                 return Some(Doc {
 420                     data: d.data,
 421                     start: elt_size.next,
 422                     end: pos,
 423                 });
 424             }
 425         }
 426         None
 427     }
 428
 429     pub fn get_doc<'a>(d: Doc<'a>, tg: usize) -> Doc<'a> {
 430         match maybe_get_doc(d, tg) {
 431             Some(d) => d,
 432             None => {
 433                 error!("failed to find block with tag {:?}", tg);
 434                 panic!();
 435             }
 436         }
 437     }
 438
 439     pub fn docs<'a>(d: Doc<'a>) -> DocsIterator<'a> {
 440         DocsIterator { d: d }
 441     }
 442
 443     pub struct DocsIterator<'a> {
 444         d: Doc<'a>,
 445     }
 446
 447     impl<'a> Iterator for DocsIterator<'a> {
 448         type Item = (usize, Doc<'a>);
 449
 450         fn next(&mut self) -> Option<(usize, Doc<'a>)> {
 451             if self.d.start >= self.d.end {
 452                 return None;
 453             }
 454
 455             let elt_tag = try_or!(tag_at(self.d.data, self.d.start), {
 456                 self.d.start = self.d.end;
 457                 None
 458             });
 459             let elt_size = try_or!(tag_len_at(self.d.data, elt_tag), {
 460                 self.d.start = self.d.end;
 461                 None
 462             });
 463
 464             let end = elt_size.next + elt_size.val;
 465             let doc = Doc {
 466                 data: self.d.data,
 467                 start: elt_size.next,
 468                 end: end,
 469             };
 470
 471             self.d.start = end;
 472             return Some((elt_tag.val, doc));
 473         }
 474     }
 475
 476     pub fn tagged_docs<'a>(d: Doc<'a>, tag: usize) -> TaggedDocsIterator<'a> {
 477         TaggedDocsIterator {
 478             iter: docs(d),
 479             tag: tag,
 480         }
 481     }
 482
 483     pub struct TaggedDocsIterator<'a> {
 484         iter: DocsIterator<'a>,
 485         tag: usize,
 486     }
 487
 488     impl<'a> Iterator for TaggedDocsIterator<'a> {
 489         type Item = Doc<'a>;
 490
 491         fn next(&mut self) -> Option<Doc<'a>> {
 492             while let Some((tag, doc)) = self.iter.next() {
 493                 if tag == self.tag {
 494                     return Some(doc);
 495                 }
 496             }
 497             None
 498         }
 499     }
 500
 501     pub fn with_doc_data<T, F>(d: Doc, f: F) -> T
 502         where F: FnOnce(&[u8]) -> T
 503     {
 504         f(&d.data[d.start..d.end])
 505     }
 506
 507     pub fn doc_as_u8(d: Doc) -> u8 {
 508         assert_eq!(d.end, d.start + 1);
 509         d.data[d.start]
 510     }
 511
 512     pub fn doc_as_u64(d: Doc) -> u64 {
 513         if d.end >= 8 {
 514             // For performance, we read 8 big-endian bytes,
 515             // and mask off the junk if there is any. This
 516             // obviously won't work on the first 8 bytes
 517             // of a file - we will fall of the start
 518             // of the page and segfault.
 519
 520             let mut b = [0; 8];
 521             b.clone_from_slice(&d.data[d.end - 8..d.end]);
 522             let data = unsafe { (*(b.as_ptr() as *const u64)).to_be() };
 523             let len = d.end - d.start;
 524             if len < 8 {
 525                 data & ((1 << (len * 8)) - 1)
 526             } else {
 527                 data
 528             }
 529         } else {
 530             let mut result = 0;
 531             for b in &d.data[d.start..d.end] {
 532                 result = (result << 8) + (*b as u64);
 533             }
 534             result
 535         }
 536     }
 537
 538     #[inline]
 539     pub fn doc_as_u16(d: Doc) -> u16 {
 540         doc_as_u64(d) as u16
 541     }
 542     #[inline]
 543     pub fn doc_as_u32(d: Doc) -> u32 {
 544         doc_as_u64(d) as u32
 545     }
 546
 547     #[inline]
 548     pub fn doc_as_i8(d: Doc) -> i8 {
 549         doc_as_u8(d) as i8
 550     }
 551     #[inline]
 552     pub fn doc_as_i16(d: Doc) -> i16 {
 553         doc_as_u16(d) as i16
 554     }
 555     #[inline]
 556     pub fn doc_as_i32(d: Doc) -> i32 {
 557         doc_as_u32(d) as i32
 558     }
 559     #[inline]
 560     pub fn doc_as_i64(d: Doc) -> i64 {
 561         doc_as_u64(d) as i64
 562     }
 563
 564     pub struct Decoder<'a> {
 565         parent: Doc<'a>,
 566         pos: usize,
 567     }
 568
 569     impl<'doc> Decoder<'doc> {
 570         pub fn new(d: Doc<'doc>) -> Decoder<'doc> {
 571             Decoder {
 572                 parent: d,
 573                 pos: d.start,
 574             }
 575         }
 576
 577         fn next_doc(&mut self, exp_tag: EbmlEncoderTag) -> DecodeResult<Doc<'doc>> {
 578             debug!(". next_doc(exp_tag={:?})", exp_tag);
 579             if self.pos >= self.parent.end {
 580                 return Err(Expected(format!("no more documents in current node!")));
 581             }
 582             let TaggedDoc { tag: r_tag, doc: r_doc } = try!(doc_at(self.parent.data, self.pos));
 583             debug!("self.parent={:?}-{:?} self.pos={:?} r_tag={:?} r_doc={:?}-{:?}",
 584                    self.parent.start,
 585                    self.parent.end,
 586                    self.pos,
 587                    r_tag,
 588                    r_doc.start,
 589                    r_doc.end);
 590             if r_tag != (exp_tag as usize) {
 591                 return Err(Expected(format!("expected EBML doc with tag {:?} but found tag {:?}",
 592                                             exp_tag,
 593                                             r_tag)));
 594             }
 595             if r_doc.end > self.parent.end {
 596                 return Err(Expected(format!("invalid EBML, child extends to {:#x}, parent to \
 597                                              {:#x}",
 598                                             r_doc.end,
 599                                             self.parent.end)));
 600             }
 601             self.pos = r_doc.end;
 602             Ok(r_doc)
 603         }
 604
 605         fn push_doc<T, F>(&mut self, exp_tag: EbmlEncoderTag, f: F) -> DecodeResult<T>
 606             where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult<T>
 607         {
 608             let d = try!(self.next_doc(exp_tag));
 609             let old_parent = self.parent;
 610             let old_pos = self.pos;
 611             self.parent = d;
 612             self.pos = d.start;
 613             let r = try!(f(self));
 614             self.parent = old_parent;
 615             self.pos = old_pos;
 616             Ok(r)
 617         }
 618
 619         fn _next_sub(&mut self) -> DecodeResult<usize> {
 620             // empty vector/map optimization
 621             if self.parent.is_empty() {
 622                 return Ok(0);
 623             }
 624
 625             let TaggedDoc { tag: r_tag, doc: r_doc } = try!(doc_at(self.parent.data, self.pos));
 626             let r = if r_tag == (EsSub8 as usize) {
 627                 doc_as_u8(r_doc) as usize
 628             } else if r_tag == (EsSub32 as usize) {
 629                 doc_as_u32(r_doc) as usize
 630             } else {
 631                 return Err(Expected(format!("expected EBML doc with tag {:?} or {:?} but found \
 632                                              tag {:?}",
 633                                             EsSub8,
 634                                             EsSub32,
 635                                             r_tag)));
 636             };
 637             if r_doc.end > self.parent.end {
 638                 return Err(Expected(format!("invalid EBML, child extends to {:#x}, parent to \
 639                                              {:#x}",
 640                                             r_doc.end,
 641                                             self.parent.end)));
 642             }
 643             self.pos = r_doc.end;
 644             debug!("_next_sub result={:?}", r);
 645             Ok(r)
 646         }
 647
 648         // variable-length unsigned integer with different tags.
 649         // `first_tag` should be a tag for u8 or i8.
 650         // `last_tag` should be the largest allowed integer tag with the matching signedness.
 651         // all tags between them should be valid, in the order of u8, u16, u32 and u64.
 652         fn _next_int(&mut self,
 653                      first_tag: EbmlEncoderTag,
 654                      last_tag: EbmlEncoderTag)
 655                      -> DecodeResult<u64> {
 656             if self.pos >= self.parent.end {
 657                 return Err(Expected(format!("no more documents in current node!")));
 658             }
 659
 660             let TaggedDoc { tag: r_tag, doc: r_doc } = try!(doc_at(self.parent.data, self.pos));
 661             let r = if first_tag as usize <= r_tag && r_tag <= last_tag as usize {
 662                 match r_tag - first_tag as usize {
 663                     0 => doc_as_u8(r_doc) as u64,
 664                     1 => doc_as_u16(r_doc) as u64,
 665                     2 => doc_as_u32(r_doc) as u64,
 666                     3 => doc_as_u64(r_doc),
 667                     _ => unreachable!(),
 668                 }
 669             } else {
 670                 return Err(Expected(format!("expected EBML doc with tag {:?} through {:?} but \
 671                                              found tag {:?}",
 672                                             first_tag,
 673                                             last_tag,
 674                                             r_tag)));
 675             };
 676             if r_doc.end > self.parent.end {
 677                 return Err(Expected(format!("invalid EBML, child extends to {:#x}, parent to \
 678                                              {:#x}",
 679                                             r_doc.end,
 680                                             self.parent.end)));
 681             }
 682             self.pos = r_doc.end;
 683             debug!("_next_int({:?}, {:?}) result={:?}", first_tag, last_tag, r);
 684             Ok(r)
 685         }
 686
 687         pub fn read_opaque<R, F>(&mut self, op: F) -> DecodeResult<R>
 688             where F: FnOnce(&mut opaque::Decoder, Doc) -> DecodeResult<R>
 689         {
 690             let doc = try!(self.next_doc(EsOpaque));
 691
 692             let result = {
 693                 let mut opaque_decoder = opaque::Decoder::new(doc.data, doc.start);
 694                 try!(op(&mut opaque_decoder, doc))
 695             };
 696
 697             Ok(result)
 698         }
 699
 700         pub fn position(&self) -> usize {
 701             self.pos
 702         }
 703
 704         pub fn advance(&mut self, bytes: usize) {
 705             self.pos += bytes;
 706         }
 707     }
 708
 709     impl<'doc> serialize::Decoder for Decoder<'doc> {
 710         type Error = Error;
 711         fn read_nil(&mut self) -> DecodeResult<()> {
 712             Ok(())
 713         }
 714
 715         fn read_u64(&mut self) -> DecodeResult<u64> {
 716             self._next_int(EsU8, EsU64)
 717         }
 718         fn read_u32(&mut self) -> DecodeResult<u32> {
 719             Ok(try!(self._next_int(EsU8, EsU32)) as u32)
 720         }
 721         fn read_u16(&mut self) -> DecodeResult<u16> {
 722             Ok(try!(self._next_int(EsU8, EsU16)) as u16)
 723         }
 724         fn read_u8(&mut self) -> DecodeResult<u8> {
 725             Ok(doc_as_u8(try!(self.next_doc(EsU8))))
 726         }
 727         fn read_uint(&mut self) -> DecodeResult<usize> {
 728             let v = try!(self._next_int(EsU8, EsU64));
 729             if v > (::std::usize::MAX as u64) {
 730                 Err(IntTooBig(v as usize))
 731             } else {
 732                 Ok(v as usize)
 733             }
 734         }
 735
 736         fn read_i64(&mut self) -> DecodeResult<i64> {
 737             Ok(try!(self._next_int(EsI8, EsI64)) as i64)
 738         }
 739         fn read_i32(&mut self) -> DecodeResult<i32> {
 740             Ok(try!(self._next_int(EsI8, EsI32)) as i32)
 741         }
 742         fn read_i16(&mut self) -> DecodeResult<i16> {
 743             Ok(try!(self._next_int(EsI8, EsI16)) as i16)
 744         }
 745         fn read_i8(&mut self) -> DecodeResult<i8> {
 746             Ok(doc_as_u8(try!(self.next_doc(EsI8))) as i8)
 747         }
 748         fn read_int(&mut self) -> DecodeResult<isize> {
 749             let v = try!(self._next_int(EsI8, EsI64)) as i64;
 750             if v > (isize::MAX as i64) || v < (isize::MIN as i64) {
 751                 debug!("FIXME \\#6122: Removing this makes this function miscompile");
 752                 Err(IntTooBig(v as usize))
 753             } else {
 754                 Ok(v as isize)
 755             }
 756         }
 757
 758         fn read_bool(&mut self) -> DecodeResult<bool> {
 759             Ok(doc_as_u8(try!(self.next_doc(EsBool))) != 0)
 760         }
 761
 762         fn read_f64(&mut self) -> DecodeResult<f64> {
 763             let bits = doc_as_u64(try!(self.next_doc(EsF64)));
 764             Ok(unsafe { transmute(bits) })
 765         }
 766         fn read_f32(&mut self) -> DecodeResult<f32> {
 767             let bits = doc_as_u32(try!(self.next_doc(EsF32)));
 768             Ok(unsafe { transmute(bits) })
 769         }
 770         fn read_char(&mut self) -> DecodeResult<char> {
 771             Ok(char::from_u32(doc_as_u32(try!(self.next_doc(EsChar)))).unwrap())
 772         }
 773         fn read_str(&mut self) -> DecodeResult<String> {
 774             Ok(try!(self.next_doc(EsStr)).as_str())
 775         }
 776
 777         // Compound types:
 778         fn read_enum<T, F>(&mut self, name: &str, f: F) -> DecodeResult<T>
 779             where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult<T>
 780         {
 781             debug!("read_enum({})", name);
 782
 783             let doc = try!(self.next_doc(EsEnum));
 784
 785             let (old_parent, old_pos) = (self.parent, self.pos);
 786             self.parent = doc;
 787             self.pos = self.parent.start;
 788
 789             let result = try!(f(self));
 790
 791             self.parent = old_parent;
 792             self.pos = old_pos;
 793             Ok(result)
 794         }
 795
 796         fn read_enum_variant<T, F>(&mut self, _: &[&str], mut f: F) -> DecodeResult<T>
 797             where F: FnMut(&mut Decoder<'doc>, usize) -> DecodeResult<T>
 798         {
 799             debug!("read_enum_variant()");
 800             let idx = try!(self._next_sub());
 801             debug!("  idx={}", idx);
 802
 803             f(self, idx)
 804         }
 805
 806         fn read_enum_variant_arg<T, F>(&mut self, idx: usize, f: F) -> DecodeResult<T>
 807             where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult<T>
 808         {
 809             debug!("read_enum_variant_arg(idx={})", idx);
 810             f(self)
 811         }
 812
 813         fn read_enum_struct_variant<T, F>(&mut self, _: &[&str], mut f: F) -> DecodeResult<T>
 814             where F: FnMut(&mut Decoder<'doc>, usize) -> DecodeResult<T>
 815         {
 816             debug!("read_enum_struct_variant()");
 817             let idx = try!(self._next_sub());
 818             debug!("  idx={}", idx);
 819
 820             f(self, idx)
 821         }
 822
 823         fn read_enum_struct_variant_field<T, F>(&mut self,
 824                                                 name: &str,
 825                                                 idx: usize,
 826                                                 f: F)
 827                                                 -> DecodeResult<T>
 828             where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult<T>
 829         {
 830             debug!("read_enum_struct_variant_arg(name={}, idx={})", name, idx);
 831             f(self)
 832         }
 833
 834         fn read_struct<T, F>(&mut self, name: &str, _: usize, f: F) -> DecodeResult<T>
 835             where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult<T>
 836         {
 837             debug!("read_struct(name={})", name);
 838             f(self)
 839         }
 840
 841         fn read_struct_field<T, F>(&mut self, name: &str, idx: usize, f: F) -> DecodeResult<T>
 842             where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult<T>
 843         {
 844             debug!("read_struct_field(name={}, idx={})", name, idx);
 845             f(self)
 846         }
 847
 848         fn read_tuple<T, F>(&mut self, tuple_len: usize, f: F) -> DecodeResult<T>
 849             where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult<T>
 850         {
 851             debug!("read_tuple()");
 852             self.read_seq(move |d, len| {
 853                 if len == tuple_len {
 854                     f(d)
 855                 } else {
 856                     Err(Expected(format!("Expected tuple of length `{}`, found tuple of length \
 857                                           `{}`",
 858                                          tuple_len,
 859                                          len)))
 860                 }
 861             })
 862         }
 863
 864         fn read_tuple_arg<T, F>(&mut self, idx: usize, f: F) -> DecodeResult<T>
 865             where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult<T>
 866         {
 867             debug!("read_tuple_arg(idx={})", idx);
 868             self.read_seq_elt(idx, f)
 869         }
 870
 871         fn read_tuple_struct<T, F>(&mut self, name: &str, len: usize, f: F) -> DecodeResult<T>
 872             where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult<T>
 873         {
 874             debug!("read_tuple_struct(name={})", name);
 875             self.read_tuple(len, f)
 876         }
 877
 878         fn read_tuple_struct_arg<T, F>(&mut self, idx: usize, f: F) -> DecodeResult<T>
 879             where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult<T>
 880         {
 881             debug!("read_tuple_struct_arg(idx={})", idx);
 882             self.read_tuple_arg(idx, f)
 883         }
 884
 885         fn read_option<T, F>(&mut self, mut f: F) -> DecodeResult<T>
 886             where F: FnMut(&mut Decoder<'doc>, bool) -> DecodeResult<T>
 887         {
 888             debug!("read_option()");
 889             self.read_enum("Option", move |this| {
 890                 this.read_enum_variant(&["None", "Some"], move |this, idx| {
 891                     match idx {
 892                         0 => f(this, false),
 893                         1 => f(this, true),
 894                         _ => Err(Expected(format!("Expected None or Some"))),
 895                     }
 896                 })
 897             })
 898         }
 899
 900         fn read_seq<T, F>(&mut self, f: F) -> DecodeResult<T>
 901             where F: FnOnce(&mut Decoder<'doc>, usize) -> DecodeResult<T>
 902         {
 903             debug!("read_seq()");
 904             self.push_doc(EsVec, move |d| {
 905                 let len = try!(d._next_sub());
 906                 debug!("  len={}", len);
 907                 f(d, len)
 908             })
 909         }
 910
 911         fn read_seq_elt<T, F>(&mut self, idx: usize, f: F) -> DecodeResult<T>
 912             where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult<T>
 913         {
 914             debug!("read_seq_elt(idx={})", idx);
 915             self.push_doc(EsVecElt, f)
 916         }
 917
 918         fn read_map<T, F>(&mut self, f: F) -> DecodeResult<T>
 919             where F: FnOnce(&mut Decoder<'doc>, usize) -> DecodeResult<T>
 920         {
 921             debug!("read_map()");
 922             self.push_doc(EsMap, move |d| {
 923                 let len = try!(d._next_sub());
 924                 debug!("  len={}", len);
 925                 f(d, len)
 926             })
 927         }
 928
 929         fn read_map_elt_key<T, F>(&mut self, idx: usize, f: F) -> DecodeResult<T>
 930             where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult<T>
 931         {
 932             debug!("read_map_elt_key(idx={})", idx);
 933             self.push_doc(EsMapKey, f)
 934         }
 935
 936         fn read_map_elt_val<T, F>(&mut self, idx: usize, f: F) -> DecodeResult<T>
 937             where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult<T>
 938         {
 939             debug!("read_map_elt_val(idx={})", idx);
 940             self.push_doc(EsMapVal, f)
 941         }
 942
 943         fn error(&mut self, err: &str) -> Error {
 944             ApplicationError(err.to_string())
 945         }
 946     }
 947 }
 948
 949 pub mod writer {
 950     use std::mem;
 951     use std::io::prelude::*;
 952     use std::io::{self, SeekFrom, Cursor};
 953
 954     use super::opaque;
 955     use super::{EsVec, EsMap, EsEnum, EsSub8, EsSub32, EsVecElt, EsMapKey, EsU64, EsU32, EsU16,
 956                 EsU8, EsI64, EsI32, EsI16, EsI8, EsBool, EsF64, EsF32, EsChar, EsStr, EsMapVal,
 957                 EsOpaque, NUM_IMPLICIT_TAGS, NUM_TAGS};
 958
 959     use serialize;
 960
 961
 962     pub type EncodeResult = io::Result<()>;
 963
 964     // rbml writing
 965     pub struct Encoder<'a> {
 966         pub writer: &'a mut Cursor<Vec<u8>>,
 967         size_positions: Vec<u64>,
 968         relax_limit: u64, // do not move encoded bytes before this position
 969     }
 970
 971     fn write_tag<W: Write>(w: &mut W, n: usize) -> EncodeResult {
 972         if n < 0xf0 {
 973             w.write_all(&[n as u8])
 974         } else if 0x100 <= n && n < NUM_TAGS {
 975             w.write_all(&[0xf0 | (n >> 8) as u8, n as u8])
 976         } else {
 977             Err(io::Error::new(io::ErrorKind::Other, &format!("invalid tag: {}", n)[..]))
 978         }
 979     }
 980
 981     fn write_sized_vuint<W: Write>(w: &mut W, n: usize, size: usize) -> EncodeResult {
 982         match size {
 983             1 => w.write_all(&[0x80 | (n as u8)]),
 984             2 => w.write_all(&[0x40 | ((n >> 8) as u8), n as u8]),
 985             3 => w.write_all(&[0x20 | ((n >> 16) as u8), (n >> 8) as u8, n as u8]),
 986             4 => w.write_all(&[0x10 | ((n >> 24) as u8), (n >> 16) as u8, (n >> 8) as u8, n as u8]),
 987             _ => Err(io::Error::new(io::ErrorKind::Other, &format!("isize too big: {}", n)[..])),
 988         }
 989     }
 990
 991     pub fn write_vuint<W: Write>(w: &mut W, n: usize) -> EncodeResult {
 992         if n < 0x7f {
 993             return write_sized_vuint(w, n, 1);
 994         }
 995         if n < 0x4000 {
 996             return write_sized_vuint(w, n, 2);
 997         }
 998         if n < 0x200000 {
 999             return write_sized_vuint(w, n, 3);
1000         }
1001         if n < 0x10000000 {
1002             return write_sized_vuint(w, n, 4);
1003         }
1004         Err(io::Error::new(io::ErrorKind::Other, &format!("isize too big: {}", n)[..]))
1005     }
1006
1007     impl<'a> Encoder<'a> {
1008         pub fn new(w: &'a mut Cursor<Vec<u8>>) -> Encoder<'a> {
1009             Encoder {
1010                 writer: w,
1011                 size_positions: vec![],
1012                 relax_limit: 0,
1013             }
1014         }
1015
1016         pub fn start_tag(&mut self, tag_id: usize) -> EncodeResult {
1017             debug!("Start tag {:?}", tag_id);
1018             assert!(tag_id >= NUM_IMPLICIT_TAGS);
1019
1020             // Write the enum ID:
1021             try!(write_tag(self.writer, tag_id));
1022
1023             // Write a placeholder four-byte size.
1024             let cur_pos = try!(self.writer.seek(SeekFrom::Current(0)));
1025             self.size_positions.push(cur_pos);
1026             let zeroes: &[u8] = &[0, 0, 0, 0];
1027             self.writer.write_all(zeroes)
1028         }
1029
1030         pub fn end_tag(&mut self) -> EncodeResult {
1031             let last_size_pos = self.size_positions.pop().unwrap();
1032             let cur_pos = try!(self.writer.seek(SeekFrom::Current(0)));
1033             try!(self.writer.seek(SeekFrom::Start(last_size_pos)));
1034             let size = (cur_pos - last_size_pos - 4) as usize;
1035
1036             // relax the size encoding for small tags (bigger tags are costly to move).
1037             // we should never try to move the stable positions, however.
1038             const RELAX_MAX_SIZE: usize = 0x100;
1039             if size <= RELAX_MAX_SIZE && last_size_pos >= self.relax_limit {
1040                 // we can't alter the buffer in place, so have a temporary buffer
1041                 let mut buf = [0u8; RELAX_MAX_SIZE];
1042                 {
1043                     let last_size_pos = last_size_pos as usize;
1044                     let data = &self.writer.get_ref()[last_size_pos + 4..cur_pos as usize];
1045                     buf[..size].clone_from_slice(data);
1046                 }
1047
1048                 // overwrite the size and data and continue
1049                 try!(write_vuint(self.writer, size));
1050                 try!(self.writer.write_all(&buf[..size]));
1051             } else {
1052                 // overwrite the size with an overlong encoding and skip past the data
1053                 try!(write_sized_vuint(self.writer, size, 4));
1054                 try!(self.writer.seek(SeekFrom::Start(cur_pos)));
1055             }
1056
1057             debug!("End tag (size = {:?})", size);
1058             Ok(())
1059         }
1060
1061         pub fn wr_tag<F>(&mut self, tag_id: usize, blk: F) -> EncodeResult
1062             where F: FnOnce() -> EncodeResult
1063         {
1064             try!(self.start_tag(tag_id));
1065             try!(blk());
1066             self.end_tag()
1067         }
1068
1069         pub fn wr_tagged_bytes(&mut self, tag_id: usize, b: &[u8]) -> EncodeResult {
1070             assert!(tag_id >= NUM_IMPLICIT_TAGS);
1071             try!(write_tag(self.writer, tag_id));
1072             try!(write_vuint(self.writer, b.len()));
1073             self.writer.write_all(b)
1074         }
1075
1076         pub fn wr_tagged_u64(&mut self, tag_id: usize, v: u64) -> EncodeResult {
1077             let bytes: [u8; 8] = unsafe { mem::transmute(v.to_be()) };
1078             // tagged integers are emitted in big-endian, with no
1079             // leading zeros.
1080             let leading_zero_bytes = v.leading_zeros() / 8;
1081             self.wr_tagged_bytes(tag_id, &bytes[leading_zero_bytes as usize..])
1082         }
1083
1084         #[inline]
1085         pub fn wr_tagged_u32(&mut self, tag_id: usize, v: u32) -> EncodeResult {
1086             self.wr_tagged_u64(tag_id, v as u64)
1087         }
1088
1089         #[inline]
1090         pub fn wr_tagged_u16(&mut self, tag_id: usize, v: u16) -> EncodeResult {
1091             self.wr_tagged_u64(tag_id, v as u64)
1092         }
1093
1094         #[inline]
1095         pub fn wr_tagged_u8(&mut self, tag_id: usize, v: u8) -> EncodeResult {
1096             self.wr_tagged_bytes(tag_id, &[v])
1097         }
1098
1099         #[inline]
1100         pub fn wr_tagged_i64(&mut self, tag_id: usize, v: i64) -> EncodeResult {
1101             self.wr_tagged_u64(tag_id, v as u64)
1102         }
1103
1104         #[inline]
1105         pub fn wr_tagged_i32(&mut self, tag_id: usize, v: i32) -> EncodeResult {
1106             self.wr_tagged_u32(tag_id, v as u32)
1107         }
1108
1109         #[inline]
1110         pub fn wr_tagged_i16(&mut self, tag_id: usize, v: i16) -> EncodeResult {
1111             self.wr_tagged_u16(tag_id, v as u16)
1112         }
1113
1114         #[inline]
1115         pub fn wr_tagged_i8(&mut self, tag_id: usize, v: i8) -> EncodeResult {
1116             self.wr_tagged_bytes(tag_id, &[v as u8])
1117         }
1118
1119         pub fn wr_tagged_str(&mut self, tag_id: usize, v: &str) -> EncodeResult {
1120             self.wr_tagged_bytes(tag_id, v.as_bytes())
1121         }
1122
1123         // for auto-serialization
1124         fn wr_tagged_raw_bytes(&mut self, tag_id: usize, b: &[u8]) -> EncodeResult {
1125             try!(write_tag(self.writer, tag_id));
1126             self.writer.write_all(b)
1127         }
1128
1129         fn wr_tagged_raw_u64(&mut self, tag_id: usize, v: u64) -> EncodeResult {
1130             let bytes: [u8; 8] = unsafe { mem::transmute(v.to_be()) };
1131             self.wr_tagged_raw_bytes(tag_id, &bytes)
1132         }
1133
1134         fn wr_tagged_raw_u32(&mut self, tag_id: usize, v: u32) -> EncodeResult {
1135             let bytes: [u8; 4] = unsafe { mem::transmute(v.to_be()) };
1136             self.wr_tagged_raw_bytes(tag_id, &bytes)
1137         }
1138
1139         fn wr_tagged_raw_u16(&mut self, tag_id: usize, v: u16) -> EncodeResult {
1140             let bytes: [u8; 2] = unsafe { mem::transmute(v.to_be()) };
1141             self.wr_tagged_raw_bytes(tag_id, &bytes)
1142         }
1143
1144         fn wr_tagged_raw_u8(&mut self, tag_id: usize, v: u8) -> EncodeResult {
1145             self.wr_tagged_raw_bytes(tag_id, &[v])
1146         }
1147
1148         fn wr_tagged_raw_i64(&mut self, tag_id: usize, v: i64) -> EncodeResult {
1149             self.wr_tagged_raw_u64(tag_id, v as u64)
1150         }
1151
1152         fn wr_tagged_raw_i32(&mut self, tag_id: usize, v: i32) -> EncodeResult {
1153             self.wr_tagged_raw_u32(tag_id, v as u32)
1154         }
1155
1156         fn wr_tagged_raw_i16(&mut self, tag_id: usize, v: i16) -> EncodeResult {
1157             self.wr_tagged_raw_u16(tag_id, v as u16)
1158         }
1159
1160         fn wr_tagged_raw_i8(&mut self, tag_id: usize, v: i8) -> EncodeResult {
1161             self.wr_tagged_raw_bytes(tag_id, &[v as u8])
1162         }
1163
1164         pub fn wr_bytes(&mut self, b: &[u8]) -> EncodeResult {
1165             debug!("Write {:?} bytes", b.len());
1166             self.writer.write_all(b)
1167         }
1168
1169         pub fn wr_str(&mut self, s: &str) -> EncodeResult {
1170             debug!("Write str: {:?}", s);
1171             self.writer.write_all(s.as_bytes())
1172         }
1173
1174         /// Returns the current position while marking it stable, i.e.
1175         /// generated bytes so far wouldn't be affected by relaxation.
1176         pub fn mark_stable_position(&mut self) -> u64 {
1177             let pos = self.writer.seek(SeekFrom::Current(0)).unwrap();
1178             if self.relax_limit < pos {
1179                 self.relax_limit = pos;
1180             }
1181             pos
1182         }
1183     }
1184
1185     impl<'a> Encoder<'a> {
1186         // used internally to emit things like the vector length and so on
1187         fn _emit_tagged_sub(&mut self, v: usize) -> EncodeResult {
1188             if v as u8 as usize == v {
1189                 self.wr_tagged_raw_u8(EsSub8 as usize, v as u8)
1190             } else if v as u32 as usize == v {
1191                 self.wr_tagged_raw_u32(EsSub32 as usize, v as u32)
1192             } else {
1193                 Err(io::Error::new(io::ErrorKind::Other,
1194                                    &format!("length or variant id too big: {}", v)[..]))
1195             }
1196         }
1197
1198         pub fn emit_opaque<F>(&mut self, f: F) -> EncodeResult
1199             where F: FnOnce(&mut opaque::Encoder) -> EncodeResult
1200         {
1201             try!(self.start_tag(EsOpaque as usize));
1202
1203             {
1204                 let mut opaque_encoder = opaque::Encoder::new(self.writer);
1205                 try!(f(&mut opaque_encoder));
1206             }
1207
1208             self.mark_stable_position();
1209             self.end_tag()
1210         }
1211     }
1212
1213     impl<'a> serialize::Encoder for Encoder<'a> {
1214         type Error = io::Error;
1215
1216         fn emit_nil(&mut self) -> EncodeResult {
1217             Ok(())
1218         }
1219
1220         fn emit_uint(&mut self, v: usize) -> EncodeResult {
1221             self.emit_u64(v as u64)
1222         }
1223         fn emit_u64(&mut self, v: u64) -> EncodeResult {
1224             if v as u32 as u64 == v {
1225                 self.emit_u32(v as u32)
1226             } else {
1227                 self.wr_tagged_raw_u64(EsU64 as usize, v)
1228             }
1229         }
1230         fn emit_u32(&mut self, v: u32) -> EncodeResult {
1231             if v as u16 as u32 == v {
1232                 self.emit_u16(v as u16)
1233             } else {
1234                 self.wr_tagged_raw_u32(EsU32 as usize, v)
1235             }
1236         }
1237         fn emit_u16(&mut self, v: u16) -> EncodeResult {
1238             if v as u8 as u16 == v {
1239                 self.emit_u8(v as u8)
1240             } else {
1241                 self.wr_tagged_raw_u16(EsU16 as usize, v)
1242             }
1243         }
1244         fn emit_u8(&mut self, v: u8) -> EncodeResult {
1245             self.wr_tagged_raw_u8(EsU8 as usize, v)
1246         }
1247
1248         fn emit_int(&mut self, v: isize) -> EncodeResult {
1249             self.emit_i64(v as i64)
1250         }
1251         fn emit_i64(&mut self, v: i64) -> EncodeResult {
1252             if v as i32 as i64 == v {
1253                 self.emit_i32(v as i32)
1254             } else {
1255                 self.wr_tagged_raw_i64(EsI64 as usize, v)
1256             }
1257         }
1258         fn emit_i32(&mut self, v: i32) -> EncodeResult {
1259             if v as i16 as i32 == v {
1260                 self.emit_i16(v as i16)
1261             } else {
1262                 self.wr_tagged_raw_i32(EsI32 as usize, v)
1263             }
1264         }
1265         fn emit_i16(&mut self, v: i16) -> EncodeResult {
1266             if v as i8 as i16 == v {
1267                 self.emit_i8(v as i8)
1268             } else {
1269                 self.wr_tagged_raw_i16(EsI16 as usize, v)
1270             }
1271         }
1272         fn emit_i8(&mut self, v: i8) -> EncodeResult {
1273             self.wr_tagged_raw_i8(EsI8 as usize, v)
1274         }
1275
1276         fn emit_bool(&mut self, v: bool) -> EncodeResult {
1277             self.wr_tagged_raw_u8(EsBool as usize, v as u8)
1278         }
1279
1280         fn emit_f64(&mut self, v: f64) -> EncodeResult {
1281             let bits = unsafe { mem::transmute(v) };
1282             self.wr_tagged_raw_u64(EsF64 as usize, bits)
1283         }
1284         fn emit_f32(&mut self, v: f32) -> EncodeResult {
1285             let bits = unsafe { mem::transmute(v) };
1286             self.wr_tagged_raw_u32(EsF32 as usize, bits)
1287         }
1288         fn emit_char(&mut self, v: char) -> EncodeResult {
1289             self.wr_tagged_raw_u32(EsChar as usize, v as u32)
1290         }
1291
1292         fn emit_str(&mut self, v: &str) -> EncodeResult {
1293             self.wr_tagged_str(EsStr as usize, v)
1294         }
1295
1296         fn emit_enum<F>(&mut self, _name: &str, f: F) -> EncodeResult
1297             where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1298         {
1299             try!(self.start_tag(EsEnum as usize));
1300             try!(f(self));
1301             self.end_tag()
1302         }
1303
1304         fn emit_enum_variant<F>(&mut self, _: &str, v_id: usize, _: usize, f: F) -> EncodeResult
1305             where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1306         {
1307             try!(self._emit_tagged_sub(v_id));
1308             f(self)
1309         }
1310
1311         fn emit_enum_variant_arg<F>(&mut self, _: usize, f: F) -> EncodeResult
1312             where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1313         {
1314             f(self)
1315         }
1316
1317         fn emit_enum_struct_variant<F>(&mut self,
1318                                        v_name: &str,
1319                                        v_id: usize,
1320                                        cnt: usize,
1321                                        f: F)
1322                                        -> EncodeResult
1323             where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1324         {
1325             self.emit_enum_variant(v_name, v_id, cnt, f)
1326         }
1327
1328         fn emit_enum_struct_variant_field<F>(&mut self, _: &str, idx: usize, f: F) -> EncodeResult
1329             where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1330         {
1331             self.emit_enum_variant_arg(idx, f)
1332         }
1333
1334         fn emit_struct<F>(&mut self, _: &str, _len: usize, f: F) -> EncodeResult
1335             where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1336         {
1337             f(self)
1338         }
1339
1340         fn emit_struct_field<F>(&mut self, _name: &str, _: usize, f: F) -> EncodeResult
1341             where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1342         {
1343             f(self)
1344         }
1345
1346         fn emit_tuple<F>(&mut self, len: usize, f: F) -> EncodeResult
1347             where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1348         {
1349             self.emit_seq(len, f)
1350         }
1351         fn emit_tuple_arg<F>(&mut self, idx: usize, f: F) -> EncodeResult
1352             where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1353         {
1354             self.emit_seq_elt(idx, f)
1355         }
1356
1357         fn emit_tuple_struct<F>(&mut self, _: &str, len: usize, f: F) -> EncodeResult
1358             where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1359         {
1360             self.emit_seq(len, f)
1361         }
1362         fn emit_tuple_struct_arg<F>(&mut self, idx: usize, f: F) -> EncodeResult
1363             where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1364         {
1365             self.emit_seq_elt(idx, f)
1366         }
1367
1368         fn emit_option<F>(&mut self, f: F) -> EncodeResult
1369             where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1370         {
1371             self.emit_enum("Option", f)
1372         }
1373         fn emit_option_none(&mut self) -> EncodeResult {
1374             self.emit_enum_variant("None", 0, 0, |_| Ok(()))
1375         }
1376         fn emit_option_some<F>(&mut self, f: F) -> EncodeResult
1377             where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1378         {
1379
1380             self.emit_enum_variant("Some", 1, 1, f)
1381         }
1382
1383         fn emit_seq<F>(&mut self, len: usize, f: F) -> EncodeResult
1384             where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1385         {
1386             if len == 0 {
1387                 // empty vector optimization
1388                 return self.wr_tagged_bytes(EsVec as usize, &[]);
1389             }
1390
1391             try!(self.start_tag(EsVec as usize));
1392             try!(self._emit_tagged_sub(len));
1393             try!(f(self));
1394             self.end_tag()
1395         }
1396
1397         fn emit_seq_elt<F>(&mut self, _idx: usize, f: F) -> EncodeResult
1398             where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1399         {
1400
1401             try!(self.start_tag(EsVecElt as usize));
1402             try!(f(self));
1403             self.end_tag()
1404         }
1405
1406         fn emit_map<F>(&mut self, len: usize, f: F) -> EncodeResult
1407             where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1408         {
1409             if len == 0 {
1410                 // empty map optimization
1411                 return self.wr_tagged_bytes(EsMap as usize, &[]);
1412             }
1413
1414             try!(self.start_tag(EsMap as usize));
1415             try!(self._emit_tagged_sub(len));
1416             try!(f(self));
1417             self.end_tag()
1418         }
1419
1420         fn emit_map_elt_key<F>(&mut self, _idx: usize, f: F) -> EncodeResult
1421             where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1422         {
1423
1424             try!(self.start_tag(EsMapKey as usize));
1425             try!(f(self));
1426             self.end_tag()
1427         }
1428
1429         fn emit_map_elt_val<F>(&mut self, _idx: usize, f: F) -> EncodeResult
1430             where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1431         {
1432             try!(self.start_tag(EsMapVal as usize));
1433             try!(f(self));
1434             self.end_tag()
1435         }
1436     }
1437 }
1438
1439 // ___________________________________________________________________________
1440 // Testing
1441
1442 #[cfg(test)]
1443 mod tests {
1444     use super::{Doc, reader, writer};
1445
1446     use serialize::{Encodable, Decodable};
1447
1448     use std::io::Cursor;
1449
1450     #[test]
1451     fn test_vuint_at() {
1452         let data = &[
1453             0x80,
1454             0xff,
1455             0x40, 0x00,
1456             0x7f, 0xff,
1457             0x20, 0x00, 0x00,
1458             0x3f, 0xff, 0xff,
1459             0x10, 0x00, 0x00, 0x00,
1460             0x1f, 0xff, 0xff, 0xff
1461         ];
1462
1463         let mut res: reader::Res;
1464
1465         // Class A
1466         res = reader::vuint_at(data, 0).unwrap();
1467         assert_eq!(res.val, 0);
1468         assert_eq!(res.next, 1);
1469         res = reader::vuint_at(data, res.next).unwrap();
1470         assert_eq!(res.val, (1 << 7) - 1);
1471         assert_eq!(res.next, 2);
1472
1473         // Class B
1474         res = reader::vuint_at(data, res.next).unwrap();
1475         assert_eq!(res.val, 0);
1476         assert_eq!(res.next, 4);
1477         res = reader::vuint_at(data, res.next).unwrap();
1478         assert_eq!(res.val, (1 << 14) - 1);
1479         assert_eq!(res.next, 6);
1480
1481         // Class C
1482         res = reader::vuint_at(data, res.next).unwrap();
1483         assert_eq!(res.val, 0);
1484         assert_eq!(res.next, 9);
1485         res = reader::vuint_at(data, res.next).unwrap();
1486         assert_eq!(res.val, (1 << 21) - 1);
1487         assert_eq!(res.next, 12);
1488
1489         // Class D
1490         res = reader::vuint_at(data, res.next).unwrap();
1491         assert_eq!(res.val, 0);
1492         assert_eq!(res.next, 16);
1493         res = reader::vuint_at(data, res.next).unwrap();
1494         assert_eq!(res.val, (1 << 28) - 1);
1495         assert_eq!(res.next, 20);
1496     }
1497
1498     #[test]
1499     fn test_option_int() {
1500         fn test_v(v: Option<isize>) {
1501             debug!("v == {:?}", v);
1502             let mut wr = Cursor::new(Vec::new());
1503             {
1504                 let mut rbml_w = writer::Encoder::new(&mut wr);
1505                 let _ = v.encode(&mut rbml_w);
1506             }
1507             let rbml_doc = Doc::new(wr.get_ref());
1508             let mut deser = reader::Decoder::new(rbml_doc);
1509             let v1 = Decodable::decode(&mut deser).unwrap();
1510             debug!("v1 == {:?}", v1);
1511             assert_eq!(v, v1);
1512         }
1513
1514         test_v(Some(22));
1515         test_v(None);
1516         test_v(Some(3));
1517     }
1518 }
1519
1520 #[cfg(test)]
1521 mod bench {
1522     #![allow(non_snake_case)]
1523     use test::Bencher;
1524     use super::reader;
1525
1526     #[bench]
1527     pub fn vuint_at_A_aligned(b: &mut Bencher) {
1528         let data = (0..4 * 100)
1529                        .map(|i| {
1530                            match i % 2 {
1531                                0 => 0x80,
1532                                _ => i as u8,
1533                            }
1534                        })
1535                        .collect::<Vec<_>>();
1536         let mut sum = 0;
1537         b.iter(|| {
1538             let mut i = 0;
1539             while i < data.len() {
1540                 sum += reader::vuint_at(&data, i).unwrap().val;
1541                 i += 4;
1542             }
1543         });
1544     }
1545
1546     #[bench]
1547     pub fn vuint_at_A_unaligned(b: &mut Bencher) {
1548         let data = (0..4 * 100 + 1)
1549                        .map(|i| {
1550                            match i % 2 {
1551                                1 => 0x80,
1552                                _ => i as u8,
1553                            }
1554                        })
1555                        .collect::<Vec<_>>();
1556         let mut sum = 0;
1557         b.iter(|| {
1558             let mut i = 1;
1559             while i < data.len() {
1560                 sum += reader::vuint_at(&data, i).unwrap().val;
1561                 i += 4;
1562             }
1563         });
1564     }
1565
1566     #[bench]
1567     pub fn vuint_at_D_aligned(b: &mut Bencher) {
1568         let data = (0..4 * 100)
1569                        .map(|i| {
1570                            match i % 4 {
1571                                0 => 0x10,
1572                                3 => i as u8,
1573                                _ => 0,
1574                            }
1575                        })
1576                        .collect::<Vec<_>>();
1577         let mut sum = 0;
1578         b.iter(|| {
1579             let mut i = 0;
1580             while i < data.len() {
1581                 sum += reader::vuint_at(&data, i).unwrap().val;
1582                 i += 4;
1583             }
1584         });
1585     }
1586
1587     #[bench]
1588     pub fn vuint_at_D_unaligned(b: &mut Bencher) {
1589         let data = (0..4 * 100 + 1)
1590                        .map(|i| {
1591                            match i % 4 {
1592                                1 => 0x10,
1593                                0 => i as u8,
1594                                _ => 0,
1595                            }
1596                        })
1597                        .collect::<Vec<_>>();
1598         let mut sum = 0;
1599         b.iter(|| {
1600             let mut i = 1;
1601             while i < data.len() {
1602                 sum += reader::vuint_at(&data, i).unwrap().val;
1603                 i += 4;
1604             }
1605         });
1606     }
1607 }