src/librbml/lib.rs

   1 // Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
   2 // file at the top-level directory of this distribution and at
   3 // http://rust-lang.org/COPYRIGHT.
   4 //
   5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
   6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
   7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
   8 // option. This file may not be copied, modified, or distributed
   9 // except according to those terms.
  10
  11 //! Really Bad Markup Language (rbml) is an internal serialization format of rustc.
  12 //! This is not intended to be used by users.
  13 //!
  14 //! Originally based on the Extensible Binary Markup Language
  15 //! (ebml; http://www.matroska.org/technical/specs/rfc/index.html),
  16 //! it is now a separate format tuned for the rust object metadata.
  17 //!
  18 //! # Encoding
  19 //!
  20 //! RBML document consists of the tag, length and data.
  21 //! The encoded data can contain multiple RBML documents concatenated.
  22 //!
  23 //! **Tags** are a hint for the following data.
  24 //! Tags are a number from 0x000 to 0xfff, where 0xf0 through 0xff is reserved.
  25 //! Tags less than 0xf0 are encoded in one literal byte.
  26 //! Tags greater than 0xff are encoded in two big-endian bytes,
  27 //! where the tag number is ORed with 0xf000. (E.g. tag 0x123 = `f1 23`)
  28 //!
  29 //! **Lengths** encode the length of the following data.
  30 //! It is a variable-length unsigned isize, and one of the following forms:
  31 //!
  32 //! - `80` through `fe` for lengths up to 0x7e;
  33 //! - `40 ff` through `7f ff` for lengths up to 0x3fff;
  34 //! - `20 40 00` through `3f ff ff` for lengths up to 0x1fffff;
  35 //! - `10 20 00 00` through `1f ff ff ff` for lengths up to 0xfffffff.
  36 //!
  37 //! The "overlong" form is allowed so that the length can be encoded
  38 //! without the prior knowledge of the encoded data.
  39 //! For example, the length 0 can be represented either by `80`, `40 00`,
  40 //! `20 00 00` or `10 00 00 00`.
  41 //! The encoder tries to minimize the length if possible.
  42 //! Also, some predefined tags listed below are so commonly used that
  43 //! their lengths are omitted ("implicit length").
  44 //!
  45 //! **Data** can be either binary bytes or zero or more nested RBML documents.
  46 //! Nested documents cannot overflow, and should be entirely contained
  47 //! within a parent document.
  48 //!
  49 //! # Predefined Tags
  50 //!
  51 //! Most RBML tags are defined by the application.
  52 //! (For the rust object metadata, see also `rustc::metadata::common`.)
  53 //! RBML itself does define a set of predefined tags however,
  54 //! intended for the auto-serialization implementation.
  55 //!
  56 //! Predefined tags with an implicit length:
  57 //!
  58 //! - `U8`  (`00`): 1-byte unsigned integer.
  59 //! - `U16` (`01`): 2-byte big endian unsigned integer.
  60 //! - `U32` (`02`): 4-byte big endian unsigned integer.
  61 //! - `U64` (`03`): 8-byte big endian unsigned integer.
  62 //!   Any of `U*` tags can be used to encode primitive unsigned integer types,
  63 //!   as long as it is no greater than the actual size.
  64 //!   For example, `u8` can only be represented via the `U8` tag.
  65 //!
  66 //! - `I8`  (`04`): 1-byte signed integer.
  67 //! - `I16` (`05`): 2-byte big endian signed integer.
  68 //! - `I32` (`06`): 4-byte big endian signed integer.
  69 //! - `I64` (`07`): 8-byte big endian signed integer.
  70 //!   Similar to `U*` tags. Always uses two's complement encoding.
  71 //!
  72 //! - `Bool` (`08`): 1-byte boolean value, `00` for false and `01` for true.
  73 //!
  74 //! - `Char` (`09`): 4-byte big endian Unicode scalar value.
  75 //!   Surrogate pairs or out-of-bound values are invalid.
  76 //!
  77 //! - `F32` (`0a`): 4-byte big endian unsigned integer representing
  78 //!   IEEE 754 binary32 floating-point format.
  79 //! - `F64` (`0b`): 8-byte big endian unsigned integer representing
  80 //!   IEEE 754 binary64 floating-point format.
  81 //!
  82 //! - `Sub8`  (`0c`): 1-byte unsigned integer for supplementary information.
  83 //! - `Sub32` (`0d`): 4-byte unsigned integer for supplementary information.
  84 //!   Those two tags normally occur as the first subdocument of certain tags,
  85 //!   namely `Enum`, `Vec` and `Map`, to provide a variant or size information.
  86 //!   They can be used interchangeably.
  87 //!
  88 //! Predefined tags with an explicit length:
  89 //!
  90 //! - `Str` (`10`): A UTF-8-encoded string.
  91 //!
  92 //! - `Enum` (`11`): An enum.
  93 //!   The first subdocument should be `Sub*` tags with a variant ID.
  94 //!   Subsequent subdocuments, if any, encode variant arguments.
  95 //!
  96 //! - `Vec` (`12`): A vector (sequence).
  97 //! - `VecElt` (`13`): A vector element.
  98 //!   The first subdocument should be `Sub*` tags with the number of elements.
  99 //!   Subsequent subdocuments should be `VecElt` tag per each element.
 100 //!
 101 //! - `Map` (`14`): A map (associated array).
 102 //! - `MapKey` (`15`): A key part of the map entry.
 103 //! - `MapVal` (`16`): A value part of the map entry.
 104 //!   The first subdocument should be `Sub*` tags with the number of entries.
 105 //!   Subsequent subdocuments should be an alternating sequence of
 106 //!   `MapKey` and `MapVal` tags per each entry.
 107 //!
 108 //! - `Opaque` (`17`): An opaque, custom-format tag.
 109 //!   Used to wrap ordinary custom tags or data in the auto-serialized context.
 110 //!   Rustc typically uses this to encode type information.
 111 //!
 112 //! First 0x20 tags are reserved by RBML; custom tags start at 0x20.
 113
 114 #![crate_name = "rbml"]
 115 #![unstable(feature = "rustc_private", issue = "27812")]
 116 #![crate_type = "rlib"]
 117 #![crate_type = "dylib"]
 118 #![doc(html_logo_url = "https://www.rust-lang.org/logos/rust-logo-128x128-blk-v2.png",
 119        html_favicon_url = "https://doc.rust-lang.org/favicon.ico",
 120        html_root_url = "https://doc.rust-lang.org/nightly/",
 121        html_playground_url = "https://play.rust-lang.org/",
 122        test(attr(deny(warnings))))]
 123 #![cfg_attr(not(stage0), deny(warnings))]
 124
 125 #![feature(rustc_private)]
 126 #![feature(staged_api)]
 127 #![feature(question_mark)]
 128
 129 #![cfg_attr(test, feature(test))]
 130
 131 extern crate serialize;
 132
 133 #[cfg(test)]
 134 extern crate serialize as rustc_serialize; // Used by RustcEncodable
 135
 136 #[macro_use]
 137 extern crate log;
 138
 139 #[cfg(test)]
 140 extern crate test;
 141
 142 pub mod opaque;
 143 pub mod leb128;
 144
 145 pub use self::EbmlEncoderTag::*;
 146 pub use self::Error::*;
 147
 148 use std::str;
 149 use std::fmt;
 150
 151 /// Common data structures
 152 #[derive(Clone, Copy)]
 153 pub struct Doc<'a> {
 154     pub data: &'a [u8],
 155     pub start: usize,
 156     pub end: usize,
 157 }
 158
 159 impl<'doc> Doc<'doc> {
 160     pub fn new(data: &'doc [u8]) -> Doc<'doc> {
 161         Doc {
 162             data: data,
 163             start: 0,
 164             end: data.len(),
 165         }
 166     }
 167
 168     pub fn get(&self, tag: usize) -> Doc<'doc> {
 169         reader::get_doc(*self, tag)
 170     }
 171
 172     pub fn is_empty(&self) -> bool {
 173         self.start == self.end
 174     }
 175
 176     pub fn as_str_slice(&self) -> &'doc str {
 177         str::from_utf8(&self.data[self.start..self.end]).unwrap()
 178     }
 179
 180     pub fn as_str(&self) -> String {
 181         self.as_str_slice().to_string()
 182     }
 183 }
 184
 185 pub struct TaggedDoc<'a> {
 186     tag: usize,
 187     pub doc: Doc<'a>,
 188 }
 189
 190 #[derive(Copy, Clone, Debug)]
 191 pub enum EbmlEncoderTag {
 192     // tags 00..1f are reserved for auto-serialization.
 193     // first NUM_IMPLICIT_TAGS tags are implicitly sized and lengths are not encoded.
 194     EsU8 = 0x00, // + 1 byte
 195     EsU16 = 0x01, // + 2 bytes
 196     EsU32 = 0x02, // + 4 bytes
 197     EsU64 = 0x03, // + 8 bytes
 198     EsI8 = 0x04, // + 1 byte
 199     EsI16 = 0x05, // + 2 bytes
 200     EsI32 = 0x06, // + 4 bytes
 201     EsI64 = 0x07, // + 8 bytes
 202     EsBool = 0x08, // + 1 byte
 203     EsChar = 0x09, // + 4 bytes
 204     EsF32 = 0x0a, // + 4 bytes
 205     EsF64 = 0x0b, // + 8 bytes
 206     EsSub8 = 0x0c, // + 1 byte
 207     EsSub32 = 0x0d, // + 4 bytes
 208     // 0x0e and 0x0f are reserved
 209     EsStr = 0x10,
 210     EsEnum = 0x11, // encodes the variant id as the first EsSub*
 211     EsVec = 0x12, // encodes the # of elements as the first EsSub*
 212     EsVecElt = 0x13,
 213     EsMap = 0x14, // encodes the # of pairs as the first EsSub*
 214     EsMapKey = 0x15,
 215     EsMapVal = 0x16,
 216     EsOpaque = 0x17,
 217 }
 218
 219 const NUM_TAGS: usize = 0x1000;
 220 const NUM_IMPLICIT_TAGS: usize = 0x0e;
 221
 222 #[cfg_attr(rustfmt, rustfmt_skip)]
 223 static TAG_IMPLICIT_LEN: [i8; NUM_IMPLICIT_TAGS] = [
 224     1, 2, 4, 8, // EsU*
 225     1, 2, 4, 8, // ESI*
 226     1, // EsBool
 227     4, // EsChar
 228     4, 8, // EsF*
 229     1, 4, // EsSub*
 230 ];
 231
 232 #[derive(Debug)]
 233 pub enum Error {
 234     IntTooBig(usize),
 235     InvalidTag(usize),
 236     Expected(String),
 237     IoError(std::io::Error),
 238     ApplicationError(String),
 239 }
 240
 241 impl fmt::Display for Error {
 242     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 243         // FIXME: this should be a more useful display form
 244         fmt::Debug::fmt(self, f)
 245     }
 246 }
 247 // --------------------------------------
 248
 249 pub mod reader {
 250     use std::char;
 251
 252     use std::isize;
 253     use std::mem::transmute;
 254
 255     use serialize;
 256
 257     use super::opaque;
 258     use super::{ApplicationError, EsVec, EsMap, EsEnum, EsSub8, EsSub32, EsVecElt, EsMapKey,
 259                 EsU64, EsU32, EsU16, EsU8, EsI64, EsI32, EsI16, EsI8, EsBool, EsF64, EsF32,
 260                 EsChar, EsStr, EsMapVal, EsOpaque, EbmlEncoderTag, Doc, TaggedDoc, Error,
 261                 IntTooBig, InvalidTag, Expected, NUM_IMPLICIT_TAGS, TAG_IMPLICIT_LEN};
 262
 263     pub type DecodeResult<T> = Result<T, Error>;
 264     // rbml reading
 265
 266     macro_rules! try_or {
 267         ($e:expr, $r:expr) => (
 268             match $e {
 269                 Ok(e) => e,
 270                 Err(e) => {
 271                     debug!("ignored error: {:?}", e);
 272                     return $r
 273                 }
 274             }
 275         )
 276     }
 277
 278     #[derive(Copy, Clone)]
 279     pub struct Res {
 280         pub val: usize,
 281         pub next: usize,
 282     }
 283
 284     pub fn tag_at(data: &[u8], start: usize) -> DecodeResult<Res> {
 285         let v = data[start] as usize;
 286         if v < 0xf0 {
 287             Ok(Res {
 288                 val: v,
 289                 next: start + 1,
 290             })
 291         } else if v > 0xf0 {
 292             Ok(Res {
 293                 val: ((v & 0xf) << 8) | data[start + 1] as usize,
 294                 next: start + 2,
 295             })
 296         } else {
 297             // every tag starting with byte 0xf0 is an overlong form, which is prohibited.
 298             Err(InvalidTag(v))
 299         }
 300     }
 301
 302     #[inline(never)]
 303     fn vuint_at_slow(data: &[u8], start: usize) -> DecodeResult<Res> {
 304         let a = data[start];
 305         if a & 0x80 != 0 {
 306             return Ok(Res {
 307                 val: (a & 0x7f) as usize,
 308                 next: start + 1,
 309             });
 310         }
 311         if a & 0x40 != 0 {
 312             return Ok(Res {
 313                 val: ((a & 0x3f) as usize) << 8 | (data[start + 1] as usize),
 314                 next: start + 2,
 315             });
 316         }
 317         if a & 0x20 != 0 {
 318             return Ok(Res {
 319                 val: ((a & 0x1f) as usize) << 16 | (data[start + 1] as usize) << 8 |
 320                      (data[start + 2] as usize),
 321                 next: start + 3,
 322             });
 323         }
 324         if a & 0x10 != 0 {
 325             return Ok(Res {
 326                 val: ((a & 0x0f) as usize) << 24 | (data[start + 1] as usize) << 16 |
 327                      (data[start + 2] as usize) << 8 |
 328                      (data[start + 3] as usize),
 329                 next: start + 4,
 330             });
 331         }
 332         Err(IntTooBig(a as usize))
 333     }
 334
 335     pub fn vuint_at(data: &[u8], start: usize) -> DecodeResult<Res> {
 336         if data.len() - start < 4 {
 337             return vuint_at_slow(data, start);
 338         }
 339
 340         // Lookup table for parsing EBML Element IDs as per
 341         // http://ebml.sourceforge.net/specs/ The Element IDs are parsed by
 342         // reading a big endian u32 positioned at data[start].  Using the four
 343         // most significant bits of the u32 we lookup in the table below how
 344         // the element ID should be derived from it.
 345         //
 346         // The table stores tuples (shift, mask) where shift is the number the
 347         // u32 should be right shifted with and mask is the value the right
 348         // shifted value should be masked with.  If for example the most
 349         // significant bit is set this means it's a class A ID and the u32
 350         // should be right shifted with 24 and masked with 0x7f. Therefore we
 351         // store (24, 0x7f) at index 0x8 - 0xF (four bit numbers where the most
 352         // significant bit is set).
 353         //
 354         // By storing the number of shifts and masks in a table instead of
 355         // checking in order if the most significant bit is set, the second
 356         // most significant bit is set etc. we can replace up to three
 357         // "and+branch" with a single table lookup which gives us a measured
 358         // speedup of around 2x on x86_64.
 359         static SHIFT_MASK_TABLE: [(usize, u32); 16] = [(0, 0x0),
 360                                                        (0, 0x0fffffff),
 361                                                        (8, 0x1fffff),
 362                                                        (8, 0x1fffff),
 363                                                        (16, 0x3fff),
 364                                                        (16, 0x3fff),
 365                                                        (16, 0x3fff),
 366                                                        (16, 0x3fff),
 367                                                        (24, 0x7f),
 368                                                        (24, 0x7f),
 369                                                        (24, 0x7f),
 370                                                        (24, 0x7f),
 371                                                        (24, 0x7f),
 372                                                        (24, 0x7f),
 373                                                        (24, 0x7f),
 374                                                        (24, 0x7f)];
 375
 376         unsafe {
 377             let ptr = data.as_ptr().offset(start as isize) as *const u32;
 378             let val = u32::from_be(*ptr);
 379
 380             let i = (val >> 28) as usize;
 381             let (shift, mask) = SHIFT_MASK_TABLE[i];
 382             Ok(Res {
 383                 val: ((val >> shift) & mask) as usize,
 384                 next: start + ((32 - shift) >> 3),
 385             })
 386         }
 387     }
 388
 389     pub fn tag_len_at(data: &[u8], tag: Res) -> DecodeResult<Res> {
 390         if tag.val < NUM_IMPLICIT_TAGS && TAG_IMPLICIT_LEN[tag.val] >= 0 {
 391             Ok(Res {
 392                 val: TAG_IMPLICIT_LEN[tag.val] as usize,
 393                 next: tag.next,
 394             })
 395         } else {
 396             vuint_at(data, tag.next)
 397         }
 398     }
 399
 400     pub fn doc_at<'a>(data: &'a [u8], start: usize) -> DecodeResult<TaggedDoc<'a>> {
 401         let elt_tag = tag_at(data, start)?;
 402         let elt_size = tag_len_at(data, elt_tag)?;
 403         let end = elt_size.next + elt_size.val;
 404         Ok(TaggedDoc {
 405             tag: elt_tag.val,
 406             doc: Doc {
 407                 data: data,
 408                 start: elt_size.next,
 409                 end: end,
 410             },
 411         })
 412     }
 413
 414     pub fn maybe_get_doc<'a>(d: Doc<'a>, tg: usize) -> Option<Doc<'a>> {
 415         let mut pos = d.start;
 416         while pos < d.end {
 417             let elt_tag = try_or!(tag_at(d.data, pos), None);
 418             let elt_size = try_or!(tag_len_at(d.data, elt_tag), None);
 419             pos = elt_size.next + elt_size.val;
 420             if elt_tag.val == tg {
 421                 return Some(Doc {
 422                     data: d.data,
 423                     start: elt_size.next,
 424                     end: pos,
 425                 });
 426             }
 427         }
 428         None
 429     }
 430
 431     pub fn get_doc<'a>(d: Doc<'a>, tg: usize) -> Doc<'a> {
 432         match maybe_get_doc(d, tg) {
 433             Some(d) => d,
 434             None => {
 435                 error!("failed to find block with tag {:?}", tg);
 436                 panic!();
 437             }
 438         }
 439     }
 440
 441     pub fn docs<'a>(d: Doc<'a>) -> DocsIterator<'a> {
 442         DocsIterator { d: d }
 443     }
 444
 445     pub struct DocsIterator<'a> {
 446         d: Doc<'a>,
 447     }
 448
 449     impl<'a> Iterator for DocsIterator<'a> {
 450         type Item = (usize, Doc<'a>);
 451
 452         fn next(&mut self) -> Option<(usize, Doc<'a>)> {
 453             if self.d.start >= self.d.end {
 454                 return None;
 455             }
 456
 457             let elt_tag = try_or!(tag_at(self.d.data, self.d.start), {
 458                 self.d.start = self.d.end;
 459                 None
 460             });
 461             let elt_size = try_or!(tag_len_at(self.d.data, elt_tag), {
 462                 self.d.start = self.d.end;
 463                 None
 464             });
 465
 466             let end = elt_size.next + elt_size.val;
 467             let doc = Doc {
 468                 data: self.d.data,
 469                 start: elt_size.next,
 470                 end: end,
 471             };
 472
 473             self.d.start = end;
 474             return Some((elt_tag.val, doc));
 475         }
 476     }
 477
 478     pub fn tagged_docs<'a>(d: Doc<'a>, tag: usize) -> TaggedDocsIterator<'a> {
 479         TaggedDocsIterator {
 480             iter: docs(d),
 481             tag: tag,
 482         }
 483     }
 484
 485     pub struct TaggedDocsIterator<'a> {
 486         iter: DocsIterator<'a>,
 487         tag: usize,
 488     }
 489
 490     impl<'a> Iterator for TaggedDocsIterator<'a> {
 491         type Item = Doc<'a>;
 492
 493         fn next(&mut self) -> Option<Doc<'a>> {
 494             while let Some((tag, doc)) = self.iter.next() {
 495                 if tag == self.tag {
 496                     return Some(doc);
 497                 }
 498             }
 499             None
 500         }
 501     }
 502
 503     pub fn with_doc_data<T, F>(d: Doc, f: F) -> T
 504         where F: FnOnce(&[u8]) -> T
 505     {
 506         f(&d.data[d.start..d.end])
 507     }
 508
 509     pub fn doc_as_u8(d: Doc) -> u8 {
 510         assert_eq!(d.end, d.start + 1);
 511         d.data[d.start]
 512     }
 513
 514     pub fn doc_as_u64(d: Doc) -> u64 {
 515         if d.end >= 8 {
 516             // For performance, we read 8 big-endian bytes,
 517             // and mask off the junk if there is any. This
 518             // obviously won't work on the first 8 bytes
 519             // of a file - we will fall of the start
 520             // of the page and segfault.
 521
 522             let mut b = [0; 8];
 523             b.copy_from_slice(&d.data[d.end - 8..d.end]);
 524             let data = unsafe { (*(b.as_ptr() as *const u64)).to_be() };
 525             let len = d.end - d.start;
 526             if len < 8 {
 527                 data & ((1 << (len * 8)) - 1)
 528             } else {
 529                 data
 530             }
 531         } else {
 532             let mut result = 0;
 533             for b in &d.data[d.start..d.end] {
 534                 result = (result << 8) + (*b as u64);
 535             }
 536             result
 537         }
 538     }
 539
 540     #[inline]
 541     pub fn doc_as_u16(d: Doc) -> u16 {
 542         doc_as_u64(d) as u16
 543     }
 544     #[inline]
 545     pub fn doc_as_u32(d: Doc) -> u32 {
 546         doc_as_u64(d) as u32
 547     }
 548
 549     #[inline]
 550     pub fn doc_as_i8(d: Doc) -> i8 {
 551         doc_as_u8(d) as i8
 552     }
 553     #[inline]
 554     pub fn doc_as_i16(d: Doc) -> i16 {
 555         doc_as_u16(d) as i16
 556     }
 557     #[inline]
 558     pub fn doc_as_i32(d: Doc) -> i32 {
 559         doc_as_u32(d) as i32
 560     }
 561     #[inline]
 562     pub fn doc_as_i64(d: Doc) -> i64 {
 563         doc_as_u64(d) as i64
 564     }
 565
 566     pub struct Decoder<'a> {
 567         parent: Doc<'a>,
 568         pos: usize,
 569     }
 570
 571     impl<'doc> Decoder<'doc> {
 572         pub fn new(d: Doc<'doc>) -> Decoder<'doc> {
 573             Decoder {
 574                 parent: d,
 575                 pos: d.start,
 576             }
 577         }
 578
 579         fn next_doc(&mut self, exp_tag: EbmlEncoderTag) -> DecodeResult<Doc<'doc>> {
 580             debug!(". next_doc(exp_tag={:?})", exp_tag);
 581             if self.pos >= self.parent.end {
 582                 return Err(Expected(format!("no more documents in current node!")));
 583             }
 584             let TaggedDoc { tag: r_tag, doc: r_doc } = doc_at(self.parent.data, self.pos)?;
 585             debug!("self.parent={:?}-{:?} self.pos={:?} r_tag={:?} r_doc={:?}-{:?}",
 586                    self.parent.start,
 587                    self.parent.end,
 588                    self.pos,
 589                    r_tag,
 590                    r_doc.start,
 591                    r_doc.end);
 592             if r_tag != (exp_tag as usize) {
 593                 return Err(Expected(format!("expected EBML doc with tag {:?} but found tag {:?}",
 594                                             exp_tag,
 595                                             r_tag)));
 596             }
 597             if r_doc.end > self.parent.end {
 598                 return Err(Expected(format!("invalid EBML, child extends to {:#x}, parent to \
 599                                              {:#x}",
 600                                             r_doc.end,
 601                                             self.parent.end)));
 602             }
 603             self.pos = r_doc.end;
 604             Ok(r_doc)
 605         }
 606
 607         fn push_doc<T, F>(&mut self, exp_tag: EbmlEncoderTag, f: F) -> DecodeResult<T>
 608             where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult<T>
 609         {
 610             let d = self.next_doc(exp_tag)?;
 611             let old_parent = self.parent;
 612             let old_pos = self.pos;
 613             self.parent = d;
 614             self.pos = d.start;
 615             let r = f(self)?;
 616             self.parent = old_parent;
 617             self.pos = old_pos;
 618             Ok(r)
 619         }
 620
 621         fn _next_sub(&mut self) -> DecodeResult<usize> {
 622             // empty vector/map optimization
 623             if self.parent.is_empty() {
 624                 return Ok(0);
 625             }
 626
 627             let TaggedDoc { tag: r_tag, doc: r_doc } = doc_at(self.parent.data, self.pos)?;
 628             let r = if r_tag == (EsSub8 as usize) {
 629                 doc_as_u8(r_doc) as usize
 630             } else if r_tag == (EsSub32 as usize) {
 631                 doc_as_u32(r_doc) as usize
 632             } else {
 633                 return Err(Expected(format!("expected EBML doc with tag {:?} or {:?} but found \
 634                                              tag {:?}",
 635                                             EsSub8,
 636                                             EsSub32,
 637                                             r_tag)));
 638             };
 639             if r_doc.end > self.parent.end {
 640                 return Err(Expected(format!("invalid EBML, child extends to {:#x}, parent to \
 641                                              {:#x}",
 642                                             r_doc.end,
 643                                             self.parent.end)));
 644             }
 645             self.pos = r_doc.end;
 646             debug!("_next_sub result={:?}", r);
 647             Ok(r)
 648         }
 649
 650         // variable-length unsigned integer with different tags.
 651         // `first_tag` should be a tag for u8 or i8.
 652         // `last_tag` should be the largest allowed integer tag with the matching signedness.
 653         // all tags between them should be valid, in the order of u8, u16, u32 and u64.
 654         fn _next_int(&mut self,
 655                      first_tag: EbmlEncoderTag,
 656                      last_tag: EbmlEncoderTag)
 657                      -> DecodeResult<u64> {
 658             if self.pos >= self.parent.end {
 659                 return Err(Expected(format!("no more documents in current node!")));
 660             }
 661
 662             let TaggedDoc { tag: r_tag, doc: r_doc } = doc_at(self.parent.data, self.pos)?;
 663             let r = if first_tag as usize <= r_tag && r_tag <= last_tag as usize {
 664                 match r_tag - first_tag as usize {
 665                     0 => doc_as_u8(r_doc) as u64,
 666                     1 => doc_as_u16(r_doc) as u64,
 667                     2 => doc_as_u32(r_doc) as u64,
 668                     3 => doc_as_u64(r_doc),
 669                     _ => unreachable!(),
 670                 }
 671             } else {
 672                 return Err(Expected(format!("expected EBML doc with tag {:?} through {:?} but \
 673                                              found tag {:?}",
 674                                             first_tag,
 675                                             last_tag,
 676                                             r_tag)));
 677             };
 678             if r_doc.end > self.parent.end {
 679                 return Err(Expected(format!("invalid EBML, child extends to {:#x}, parent to \
 680                                              {:#x}",
 681                                             r_doc.end,
 682                                             self.parent.end)));
 683             }
 684             self.pos = r_doc.end;
 685             debug!("_next_int({:?}, {:?}) result={:?}", first_tag, last_tag, r);
 686             Ok(r)
 687         }
 688
 689         pub fn read_opaque<R, F>(&mut self, op: F) -> DecodeResult<R>
 690             where F: FnOnce(&mut opaque::Decoder, Doc) -> DecodeResult<R>
 691         {
 692             let doc = self.next_doc(EsOpaque)?;
 693
 694             let result = {
 695                 let mut opaque_decoder = opaque::Decoder::new(doc.data, doc.start);
 696                 op(&mut opaque_decoder, doc)?
 697             };
 698
 699             Ok(result)
 700         }
 701
 702         pub fn position(&self) -> usize {
 703             self.pos
 704         }
 705
 706         pub fn advance(&mut self, bytes: usize) {
 707             self.pos += bytes;
 708         }
 709     }
 710
 711     impl<'doc> serialize::Decoder for Decoder<'doc> {
 712         type Error = Error;
 713         fn read_nil(&mut self) -> DecodeResult<()> {
 714             Ok(())
 715         }
 716
 717         fn read_u64(&mut self) -> DecodeResult<u64> {
 718             self._next_int(EsU8, EsU64)
 719         }
 720         fn read_u32(&mut self) -> DecodeResult<u32> {
 721             Ok(self._next_int(EsU8, EsU32)? as u32)
 722         }
 723         fn read_u16(&mut self) -> DecodeResult<u16> {
 724             Ok(self._next_int(EsU8, EsU16)? as u16)
 725         }
 726         fn read_u8(&mut self) -> DecodeResult<u8> {
 727             Ok(doc_as_u8(self.next_doc(EsU8)?))
 728         }
 729         fn read_uint(&mut self) -> DecodeResult<usize> {
 730             let v = self._next_int(EsU8, EsU64)?;
 731             if v > (::std::usize::MAX as u64) {
 732                 Err(IntTooBig(v as usize))
 733             } else {
 734                 Ok(v as usize)
 735             }
 736         }
 737
 738         fn read_i64(&mut self) -> DecodeResult<i64> {
 739             Ok(self._next_int(EsI8, EsI64)? as i64)
 740         }
 741         fn read_i32(&mut self) -> DecodeResult<i32> {
 742             Ok(self._next_int(EsI8, EsI32)? as i32)
 743         }
 744         fn read_i16(&mut self) -> DecodeResult<i16> {
 745             Ok(self._next_int(EsI8, EsI16)? as i16)
 746         }
 747         fn read_i8(&mut self) -> DecodeResult<i8> {
 748             Ok(doc_as_u8(self.next_doc(EsI8)?) as i8)
 749         }
 750         fn read_int(&mut self) -> DecodeResult<isize> {
 751             let v = self._next_int(EsI8, EsI64)? as i64;
 752             if v > (isize::MAX as i64) || v < (isize::MIN as i64) {
 753                 debug!("FIXME \\#6122: Removing this makes this function miscompile");
 754                 Err(IntTooBig(v as usize))
 755             } else {
 756                 Ok(v as isize)
 757             }
 758         }
 759
 760         fn read_bool(&mut self) -> DecodeResult<bool> {
 761             Ok(doc_as_u8(self.next_doc(EsBool)?) != 0)
 762         }
 763
 764         fn read_f64(&mut self) -> DecodeResult<f64> {
 765             let bits = doc_as_u64(self.next_doc(EsF64)?);
 766             Ok(unsafe { transmute(bits) })
 767         }
 768         fn read_f32(&mut self) -> DecodeResult<f32> {
 769             let bits = doc_as_u32(self.next_doc(EsF32)?);
 770             Ok(unsafe { transmute(bits) })
 771         }
 772         fn read_char(&mut self) -> DecodeResult<char> {
 773             Ok(char::from_u32(doc_as_u32(self.next_doc(EsChar)?)).unwrap())
 774         }
 775         fn read_str(&mut self) -> DecodeResult<String> {
 776             Ok(self.next_doc(EsStr)?.as_str())
 777         }
 778
 779         // Compound types:
 780         fn read_enum<T, F>(&mut self, name: &str, f: F) -> DecodeResult<T>
 781             where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult<T>
 782         {
 783             debug!("read_enum({})", name);
 784
 785             let doc = self.next_doc(EsEnum)?;
 786
 787             let (old_parent, old_pos) = (self.parent, self.pos);
 788             self.parent = doc;
 789             self.pos = self.parent.start;
 790
 791             let result = f(self)?;
 792
 793             self.parent = old_parent;
 794             self.pos = old_pos;
 795             Ok(result)
 796         }
 797
 798         fn read_enum_variant<T, F>(&mut self, _: &[&str], mut f: F) -> DecodeResult<T>
 799             where F: FnMut(&mut Decoder<'doc>, usize) -> DecodeResult<T>
 800         {
 801             debug!("read_enum_variant()");
 802             let idx = self._next_sub()?;
 803             debug!("  idx={}", idx);
 804
 805             f(self, idx)
 806         }
 807
 808         fn read_enum_variant_arg<T, F>(&mut self, idx: usize, f: F) -> DecodeResult<T>
 809             where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult<T>
 810         {
 811             debug!("read_enum_variant_arg(idx={})", idx);
 812             f(self)
 813         }
 814
 815         fn read_enum_struct_variant<T, F>(&mut self, _: &[&str], mut f: F) -> DecodeResult<T>
 816             where F: FnMut(&mut Decoder<'doc>, usize) -> DecodeResult<T>
 817         {
 818             debug!("read_enum_struct_variant()");
 819             let idx = self._next_sub()?;
 820             debug!("  idx={}", idx);
 821
 822             f(self, idx)
 823         }
 824
 825         fn read_enum_struct_variant_field<T, F>(&mut self,
 826                                                 name: &str,
 827                                                 idx: usize,
 828                                                 f: F)
 829                                                 -> DecodeResult<T>
 830             where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult<T>
 831         {
 832             debug!("read_enum_struct_variant_arg(name={}, idx={})", name, idx);
 833             f(self)
 834         }
 835
 836         fn read_struct<T, F>(&mut self, name: &str, _: usize, f: F) -> DecodeResult<T>
 837             where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult<T>
 838         {
 839             debug!("read_struct(name={})", name);
 840             f(self)
 841         }
 842
 843         fn read_struct_field<T, F>(&mut self, name: &str, idx: usize, f: F) -> DecodeResult<T>
 844             where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult<T>
 845         {
 846             debug!("read_struct_field(name={}, idx={})", name, idx);
 847             f(self)
 848         }
 849
 850         fn read_tuple<T, F>(&mut self, tuple_len: usize, f: F) -> DecodeResult<T>
 851             where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult<T>
 852         {
 853             debug!("read_tuple()");
 854             self.read_seq(move |d, len| {
 855                 if len == tuple_len {
 856                     f(d)
 857                 } else {
 858                     Err(Expected(format!("Expected tuple of length `{}`, found tuple of length \
 859                                           `{}`",
 860                                          tuple_len,
 861                                          len)))
 862                 }
 863             })
 864         }
 865
 866         fn read_tuple_arg<T, F>(&mut self, idx: usize, f: F) -> DecodeResult<T>
 867             where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult<T>
 868         {
 869             debug!("read_tuple_arg(idx={})", idx);
 870             self.read_seq_elt(idx, f)
 871         }
 872
 873         fn read_tuple_struct<T, F>(&mut self, name: &str, len: usize, f: F) -> DecodeResult<T>
 874             where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult<T>
 875         {
 876             debug!("read_tuple_struct(name={})", name);
 877             self.read_tuple(len, f)
 878         }
 879
 880         fn read_tuple_struct_arg<T, F>(&mut self, idx: usize, f: F) -> DecodeResult<T>
 881             where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult<T>
 882         {
 883             debug!("read_tuple_struct_arg(idx={})", idx);
 884             self.read_tuple_arg(idx, f)
 885         }
 886
 887         fn read_option<T, F>(&mut self, mut f: F) -> DecodeResult<T>
 888             where F: FnMut(&mut Decoder<'doc>, bool) -> DecodeResult<T>
 889         {
 890             debug!("read_option()");
 891             self.read_enum("Option", move |this| {
 892                 this.read_enum_variant(&["None", "Some"], move |this, idx| {
 893                     match idx {
 894                         0 => f(this, false),
 895                         1 => f(this, true),
 896                         _ => Err(Expected(format!("Expected None or Some"))),
 897                     }
 898                 })
 899             })
 900         }
 901
 902         fn read_seq<T, F>(&mut self, f: F) -> DecodeResult<T>
 903             where F: FnOnce(&mut Decoder<'doc>, usize) -> DecodeResult<T>
 904         {
 905             debug!("read_seq()");
 906             self.push_doc(EsVec, move |d| {
 907                 let len = d._next_sub()?;
 908                 debug!("  len={}", len);
 909                 f(d, len)
 910             })
 911         }
 912
 913         fn read_seq_elt<T, F>(&mut self, idx: usize, f: F) -> DecodeResult<T>
 914             where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult<T>
 915         {
 916             debug!("read_seq_elt(idx={})", idx);
 917             self.push_doc(EsVecElt, f)
 918         }
 919
 920         fn read_map<T, F>(&mut self, f: F) -> DecodeResult<T>
 921             where F: FnOnce(&mut Decoder<'doc>, usize) -> DecodeResult<T>
 922         {
 923             debug!("read_map()");
 924             self.push_doc(EsMap, move |d| {
 925                 let len = d._next_sub()?;
 926                 debug!("  len={}", len);
 927                 f(d, len)
 928             })
 929         }
 930
 931         fn read_map_elt_key<T, F>(&mut self, idx: usize, f: F) -> DecodeResult<T>
 932             where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult<T>
 933         {
 934             debug!("read_map_elt_key(idx={})", idx);
 935             self.push_doc(EsMapKey, f)
 936         }
 937
 938         fn read_map_elt_val<T, F>(&mut self, idx: usize, f: F) -> DecodeResult<T>
 939             where F: FnOnce(&mut Decoder<'doc>) -> DecodeResult<T>
 940         {
 941             debug!("read_map_elt_val(idx={})", idx);
 942             self.push_doc(EsMapVal, f)
 943         }
 944
 945         fn error(&mut self, err: &str) -> Error {
 946             ApplicationError(err.to_string())
 947         }
 948     }
 949 }
 950
 951 pub mod writer {
 952     use std::mem;
 953     use std::io::prelude::*;
 954     use std::io::{self, SeekFrom, Cursor};
 955
 956     use super::opaque;
 957     use super::{EsVec, EsMap, EsEnum, EsSub8, EsSub32, EsVecElt, EsMapKey, EsU64, EsU32, EsU16,
 958                 EsU8, EsI64, EsI32, EsI16, EsI8, EsBool, EsF64, EsF32, EsChar, EsStr, EsMapVal,
 959                 EsOpaque, NUM_IMPLICIT_TAGS, NUM_TAGS};
 960
 961     use serialize;
 962
 963
 964     pub type EncodeResult = io::Result<()>;
 965
 966     // rbml writing
 967     pub struct Encoder<'a> {
 968         pub writer: &'a mut Cursor<Vec<u8>>,
 969         size_positions: Vec<u64>,
 970         relax_limit: u64, // do not move encoded bytes before this position
 971     }
 972
 973     fn write_tag<W: Write>(w: &mut W, n: usize) -> EncodeResult {
 974         if n < 0xf0 {
 975             w.write_all(&[n as u8])
 976         } else if 0x100 <= n && n < NUM_TAGS {
 977             w.write_all(&[0xf0 | (n >> 8) as u8, n as u8])
 978         } else {
 979             Err(io::Error::new(io::ErrorKind::Other, &format!("invalid tag: {}", n)[..]))
 980         }
 981     }
 982
 983     fn write_sized_vuint<W: Write>(w: &mut W, n: usize, size: usize) -> EncodeResult {
 984         match size {
 985             1 => w.write_all(&[0x80 | (n as u8)]),
 986             2 => w.write_all(&[0x40 | ((n >> 8) as u8), n as u8]),
 987             3 => w.write_all(&[0x20 | ((n >> 16) as u8), (n >> 8) as u8, n as u8]),
 988             4 => w.write_all(&[0x10 | ((n >> 24) as u8), (n >> 16) as u8, (n >> 8) as u8, n as u8]),
 989             _ => Err(io::Error::new(io::ErrorKind::Other, &format!("isize too big: {}", n)[..])),
 990         }
 991     }
 992
 993     pub fn write_vuint<W: Write>(w: &mut W, n: usize) -> EncodeResult {
 994         if n < 0x7f {
 995             return write_sized_vuint(w, n, 1);
 996         }
 997         if n < 0x4000 {
 998             return write_sized_vuint(w, n, 2);
 999         }
1000         if n < 0x200000 {
1001             return write_sized_vuint(w, n, 3);
1002         }
1003         if n < 0x10000000 {
1004             return write_sized_vuint(w, n, 4);
1005         }
1006         Err(io::Error::new(io::ErrorKind::Other, &format!("isize too big: {}", n)[..]))
1007     }
1008
1009     impl<'a> Encoder<'a> {
1010         pub fn new(w: &'a mut Cursor<Vec<u8>>) -> Encoder<'a> {
1011             Encoder {
1012                 writer: w,
1013                 size_positions: vec![],
1014                 relax_limit: 0,
1015             }
1016         }
1017
1018         pub fn start_tag(&mut self, tag_id: usize) -> EncodeResult {
1019             debug!("Start tag {:?}", tag_id);
1020             assert!(tag_id >= NUM_IMPLICIT_TAGS);
1021
1022             // Write the enum ID:
1023             write_tag(self.writer, tag_id)?;
1024
1025             // Write a placeholder four-byte size.
1026             let cur_pos = self.writer.seek(SeekFrom::Current(0))?;
1027             self.size_positions.push(cur_pos);
1028             let zeroes: &[u8] = &[0, 0, 0, 0];
1029             self.writer.write_all(zeroes)
1030         }
1031
1032         pub fn end_tag(&mut self) -> EncodeResult {
1033             let last_size_pos = self.size_positions.pop().unwrap();
1034             let cur_pos = self.writer.seek(SeekFrom::Current(0))?;
1035             self.writer.seek(SeekFrom::Start(last_size_pos))?;
1036             let size = (cur_pos - last_size_pos - 4) as usize;
1037
1038             // relax the size encoding for small tags (bigger tags are costly to move).
1039             // we should never try to move the stable positions, however.
1040             const RELAX_MAX_SIZE: usize = 0x100;
1041             if size <= RELAX_MAX_SIZE && last_size_pos >= self.relax_limit {
1042                 // we can't alter the buffer in place, so have a temporary buffer
1043                 let mut buf = [0u8; RELAX_MAX_SIZE];
1044                 {
1045                     let last_size_pos = last_size_pos as usize;
1046                     let data = &self.writer.get_ref()[last_size_pos + 4..cur_pos as usize];
1047                     buf[..size].copy_from_slice(data);
1048                 }
1049
1050                 // overwrite the size and data and continue
1051                 write_vuint(self.writer, size)?;
1052                 self.writer.write_all(&buf[..size])?;
1053             } else {
1054                 // overwrite the size with an overlong encoding and skip past the data
1055                 write_sized_vuint(self.writer, size, 4)?;
1056                 self.writer.seek(SeekFrom::Start(cur_pos))?;
1057             }
1058
1059             debug!("End tag (size = {:?})", size);
1060             Ok(())
1061         }
1062
1063         pub fn wr_tag<F>(&mut self, tag_id: usize, blk: F) -> EncodeResult
1064             where F: FnOnce() -> EncodeResult
1065         {
1066             self.start_tag(tag_id)?;
1067             blk()?;
1068             self.end_tag()
1069         }
1070
1071         pub fn wr_tagged_bytes(&mut self, tag_id: usize, b: &[u8]) -> EncodeResult {
1072             assert!(tag_id >= NUM_IMPLICIT_TAGS);
1073             write_tag(self.writer, tag_id)?;
1074             write_vuint(self.writer, b.len())?;
1075             self.writer.write_all(b)
1076         }
1077
1078         pub fn wr_tagged_u64(&mut self, tag_id: usize, v: u64) -> EncodeResult {
1079             let bytes: [u8; 8] = unsafe { mem::transmute(v.to_be()) };
1080             // tagged integers are emitted in big-endian, with no
1081             // leading zeros.
1082             let leading_zero_bytes = v.leading_zeros() / 8;
1083             self.wr_tagged_bytes(tag_id, &bytes[leading_zero_bytes as usize..])
1084         }
1085
1086         #[inline]
1087         pub fn wr_tagged_u32(&mut self, tag_id: usize, v: u32) -> EncodeResult {
1088             self.wr_tagged_u64(tag_id, v as u64)
1089         }
1090
1091         #[inline]
1092         pub fn wr_tagged_u16(&mut self, tag_id: usize, v: u16) -> EncodeResult {
1093             self.wr_tagged_u64(tag_id, v as u64)
1094         }
1095
1096         #[inline]
1097         pub fn wr_tagged_u8(&mut self, tag_id: usize, v: u8) -> EncodeResult {
1098             self.wr_tagged_bytes(tag_id, &[v])
1099         }
1100
1101         #[inline]
1102         pub fn wr_tagged_i64(&mut self, tag_id: usize, v: i64) -> EncodeResult {
1103             self.wr_tagged_u64(tag_id, v as u64)
1104         }
1105
1106         #[inline]
1107         pub fn wr_tagged_i32(&mut self, tag_id: usize, v: i32) -> EncodeResult {
1108             self.wr_tagged_u32(tag_id, v as u32)
1109         }
1110
1111         #[inline]
1112         pub fn wr_tagged_i16(&mut self, tag_id: usize, v: i16) -> EncodeResult {
1113             self.wr_tagged_u16(tag_id, v as u16)
1114         }
1115
1116         #[inline]
1117         pub fn wr_tagged_i8(&mut self, tag_id: usize, v: i8) -> EncodeResult {
1118             self.wr_tagged_bytes(tag_id, &[v as u8])
1119         }
1120
1121         pub fn wr_tagged_str(&mut self, tag_id: usize, v: &str) -> EncodeResult {
1122             self.wr_tagged_bytes(tag_id, v.as_bytes())
1123         }
1124
1125         // for auto-serialization
1126         fn wr_tagged_raw_bytes(&mut self, tag_id: usize, b: &[u8]) -> EncodeResult {
1127             write_tag(self.writer, tag_id)?;
1128             self.writer.write_all(b)
1129         }
1130
1131         fn wr_tagged_raw_u64(&mut self, tag_id: usize, v: u64) -> EncodeResult {
1132             let bytes: [u8; 8] = unsafe { mem::transmute(v.to_be()) };
1133             self.wr_tagged_raw_bytes(tag_id, &bytes)
1134         }
1135
1136         fn wr_tagged_raw_u32(&mut self, tag_id: usize, v: u32) -> EncodeResult {
1137             let bytes: [u8; 4] = unsafe { mem::transmute(v.to_be()) };
1138             self.wr_tagged_raw_bytes(tag_id, &bytes)
1139         }
1140
1141         fn wr_tagged_raw_u16(&mut self, tag_id: usize, v: u16) -> EncodeResult {
1142             let bytes: [u8; 2] = unsafe { mem::transmute(v.to_be()) };
1143             self.wr_tagged_raw_bytes(tag_id, &bytes)
1144         }
1145
1146         fn wr_tagged_raw_u8(&mut self, tag_id: usize, v: u8) -> EncodeResult {
1147             self.wr_tagged_raw_bytes(tag_id, &[v])
1148         }
1149
1150         fn wr_tagged_raw_i64(&mut self, tag_id: usize, v: i64) -> EncodeResult {
1151             self.wr_tagged_raw_u64(tag_id, v as u64)
1152         }
1153
1154         fn wr_tagged_raw_i32(&mut self, tag_id: usize, v: i32) -> EncodeResult {
1155             self.wr_tagged_raw_u32(tag_id, v as u32)
1156         }
1157
1158         fn wr_tagged_raw_i16(&mut self, tag_id: usize, v: i16) -> EncodeResult {
1159             self.wr_tagged_raw_u16(tag_id, v as u16)
1160         }
1161
1162         fn wr_tagged_raw_i8(&mut self, tag_id: usize, v: i8) -> EncodeResult {
1163             self.wr_tagged_raw_bytes(tag_id, &[v as u8])
1164         }
1165
1166         pub fn wr_bytes(&mut self, b: &[u8]) -> EncodeResult {
1167             debug!("Write {:?} bytes", b.len());
1168             self.writer.write_all(b)
1169         }
1170
1171         pub fn wr_str(&mut self, s: &str) -> EncodeResult {
1172             debug!("Write str: {:?}", s);
1173             self.writer.write_all(s.as_bytes())
1174         }
1175
1176         /// Returns the current position while marking it stable, i.e.
1177         /// generated bytes so far wouldn't be affected by relaxation.
1178         pub fn mark_stable_position(&mut self) -> u64 {
1179             let pos = self.writer.seek(SeekFrom::Current(0)).unwrap();
1180             if self.relax_limit < pos {
1181                 self.relax_limit = pos;
1182             }
1183             pos
1184         }
1185     }
1186
1187     impl<'a> Encoder<'a> {
1188         // used internally to emit things like the vector length and so on
1189         fn _emit_tagged_sub(&mut self, v: usize) -> EncodeResult {
1190             if v as u8 as usize == v {
1191                 self.wr_tagged_raw_u8(EsSub8 as usize, v as u8)
1192             } else if v as u32 as usize == v {
1193                 self.wr_tagged_raw_u32(EsSub32 as usize, v as u32)
1194             } else {
1195                 Err(io::Error::new(io::ErrorKind::Other,
1196                                    &format!("length or variant id too big: {}", v)[..]))
1197             }
1198         }
1199
1200         pub fn emit_opaque<F>(&mut self, f: F) -> EncodeResult
1201             where F: FnOnce(&mut opaque::Encoder) -> EncodeResult
1202         {
1203             self.start_tag(EsOpaque as usize)?;
1204
1205             {
1206                 let mut opaque_encoder = opaque::Encoder::new(self.writer);
1207                 f(&mut opaque_encoder)?;
1208             }
1209
1210             self.mark_stable_position();
1211             self.end_tag()
1212         }
1213     }
1214
1215     impl<'a> serialize::Encoder for Encoder<'a> {
1216         type Error = io::Error;
1217
1218         fn emit_nil(&mut self) -> EncodeResult {
1219             Ok(())
1220         }
1221
1222         fn emit_uint(&mut self, v: usize) -> EncodeResult {
1223             self.emit_u64(v as u64)
1224         }
1225         fn emit_u64(&mut self, v: u64) -> EncodeResult {
1226             if v as u32 as u64 == v {
1227                 self.emit_u32(v as u32)
1228             } else {
1229                 self.wr_tagged_raw_u64(EsU64 as usize, v)
1230             }
1231         }
1232         fn emit_u32(&mut self, v: u32) -> EncodeResult {
1233             if v as u16 as u32 == v {
1234                 self.emit_u16(v as u16)
1235             } else {
1236                 self.wr_tagged_raw_u32(EsU32 as usize, v)
1237             }
1238         }
1239         fn emit_u16(&mut self, v: u16) -> EncodeResult {
1240             if v as u8 as u16 == v {
1241                 self.emit_u8(v as u8)
1242             } else {
1243                 self.wr_tagged_raw_u16(EsU16 as usize, v)
1244             }
1245         }
1246         fn emit_u8(&mut self, v: u8) -> EncodeResult {
1247             self.wr_tagged_raw_u8(EsU8 as usize, v)
1248         }
1249
1250         fn emit_int(&mut self, v: isize) -> EncodeResult {
1251             self.emit_i64(v as i64)
1252         }
1253         fn emit_i64(&mut self, v: i64) -> EncodeResult {
1254             if v as i32 as i64 == v {
1255                 self.emit_i32(v as i32)
1256             } else {
1257                 self.wr_tagged_raw_i64(EsI64 as usize, v)
1258             }
1259         }
1260         fn emit_i32(&mut self, v: i32) -> EncodeResult {
1261             if v as i16 as i32 == v {
1262                 self.emit_i16(v as i16)
1263             } else {
1264                 self.wr_tagged_raw_i32(EsI32 as usize, v)
1265             }
1266         }
1267         fn emit_i16(&mut self, v: i16) -> EncodeResult {
1268             if v as i8 as i16 == v {
1269                 self.emit_i8(v as i8)
1270             } else {
1271                 self.wr_tagged_raw_i16(EsI16 as usize, v)
1272             }
1273         }
1274         fn emit_i8(&mut self, v: i8) -> EncodeResult {
1275             self.wr_tagged_raw_i8(EsI8 as usize, v)
1276         }
1277
1278         fn emit_bool(&mut self, v: bool) -> EncodeResult {
1279             self.wr_tagged_raw_u8(EsBool as usize, v as u8)
1280         }
1281
1282         fn emit_f64(&mut self, v: f64) -> EncodeResult {
1283             let bits = unsafe { mem::transmute(v) };
1284             self.wr_tagged_raw_u64(EsF64 as usize, bits)
1285         }
1286         fn emit_f32(&mut self, v: f32) -> EncodeResult {
1287             let bits = unsafe { mem::transmute(v) };
1288             self.wr_tagged_raw_u32(EsF32 as usize, bits)
1289         }
1290         fn emit_char(&mut self, v: char) -> EncodeResult {
1291             self.wr_tagged_raw_u32(EsChar as usize, v as u32)
1292         }
1293
1294         fn emit_str(&mut self, v: &str) -> EncodeResult {
1295             self.wr_tagged_str(EsStr as usize, v)
1296         }
1297
1298         fn emit_enum<F>(&mut self, _name: &str, f: F) -> EncodeResult
1299             where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1300         {
1301             self.start_tag(EsEnum as usize)?;
1302             f(self)?;
1303             self.end_tag()
1304         }
1305
1306         fn emit_enum_variant<F>(&mut self, _: &str, v_id: usize, _: usize, f: F) -> EncodeResult
1307             where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1308         {
1309             self._emit_tagged_sub(v_id)?;
1310             f(self)
1311         }
1312
1313         fn emit_enum_variant_arg<F>(&mut self, _: usize, f: F) -> EncodeResult
1314             where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1315         {
1316             f(self)
1317         }
1318
1319         fn emit_enum_struct_variant<F>(&mut self,
1320                                        v_name: &str,
1321                                        v_id: usize,
1322                                        cnt: usize,
1323                                        f: F)
1324                                        -> EncodeResult
1325             where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1326         {
1327             self.emit_enum_variant(v_name, v_id, cnt, f)
1328         }
1329
1330         fn emit_enum_struct_variant_field<F>(&mut self, _: &str, idx: usize, f: F) -> EncodeResult
1331             where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1332         {
1333             self.emit_enum_variant_arg(idx, f)
1334         }
1335
1336         fn emit_struct<F>(&mut self, _: &str, _len: usize, f: F) -> EncodeResult
1337             where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1338         {
1339             f(self)
1340         }
1341
1342         fn emit_struct_field<F>(&mut self, _name: &str, _: usize, f: F) -> EncodeResult
1343             where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1344         {
1345             f(self)
1346         }
1347
1348         fn emit_tuple<F>(&mut self, len: usize, f: F) -> EncodeResult
1349             where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1350         {
1351             self.emit_seq(len, f)
1352         }
1353         fn emit_tuple_arg<F>(&mut self, idx: usize, f: F) -> EncodeResult
1354             where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1355         {
1356             self.emit_seq_elt(idx, f)
1357         }
1358
1359         fn emit_tuple_struct<F>(&mut self, _: &str, len: usize, f: F) -> EncodeResult
1360             where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1361         {
1362             self.emit_seq(len, f)
1363         }
1364         fn emit_tuple_struct_arg<F>(&mut self, idx: usize, f: F) -> EncodeResult
1365             where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1366         {
1367             self.emit_seq_elt(idx, f)
1368         }
1369
1370         fn emit_option<F>(&mut self, f: F) -> EncodeResult
1371             where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1372         {
1373             self.emit_enum("Option", f)
1374         }
1375         fn emit_option_none(&mut self) -> EncodeResult {
1376             self.emit_enum_variant("None", 0, 0, |_| Ok(()))
1377         }
1378         fn emit_option_some<F>(&mut self, f: F) -> EncodeResult
1379             where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1380         {
1381
1382             self.emit_enum_variant("Some", 1, 1, f)
1383         }
1384
1385         fn emit_seq<F>(&mut self, len: usize, f: F) -> EncodeResult
1386             where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1387         {
1388             if len == 0 {
1389                 // empty vector optimization
1390                 return self.wr_tagged_bytes(EsVec as usize, &[]);
1391             }
1392
1393             self.start_tag(EsVec as usize)?;
1394             self._emit_tagged_sub(len)?;
1395             f(self)?;
1396             self.end_tag()
1397         }
1398
1399         fn emit_seq_elt<F>(&mut self, _idx: usize, f: F) -> EncodeResult
1400             where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1401         {
1402
1403             self.start_tag(EsVecElt as usize)?;
1404             f(self)?;
1405             self.end_tag()
1406         }
1407
1408         fn emit_map<F>(&mut self, len: usize, f: F) -> EncodeResult
1409             where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1410         {
1411             if len == 0 {
1412                 // empty map optimization
1413                 return self.wr_tagged_bytes(EsMap as usize, &[]);
1414             }
1415
1416             self.start_tag(EsMap as usize)?;
1417             self._emit_tagged_sub(len)?;
1418             f(self)?;
1419             self.end_tag()
1420         }
1421
1422         fn emit_map_elt_key<F>(&mut self, _idx: usize, f: F) -> EncodeResult
1423             where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1424         {
1425
1426             self.start_tag(EsMapKey as usize)?;
1427             f(self)?;
1428             self.end_tag()
1429         }
1430
1431         fn emit_map_elt_val<F>(&mut self, _idx: usize, f: F) -> EncodeResult
1432             where F: FnOnce(&mut Encoder<'a>) -> EncodeResult
1433         {
1434             self.start_tag(EsMapVal as usize)?;
1435             f(self)?;
1436             self.end_tag()
1437         }
1438     }
1439 }
1440
1441 // ___________________________________________________________________________
1442 // Testing
1443
1444 #[cfg(test)]
1445 mod tests {
1446     use super::{Doc, reader, writer};
1447
1448     use serialize::{Encodable, Decodable};
1449
1450     use std::io::Cursor;
1451
1452     #[test]
1453     fn test_vuint_at() {
1454         let data = &[
1455             0x80,
1456             0xff,
1457             0x40, 0x00,
1458             0x7f, 0xff,
1459             0x20, 0x00, 0x00,
1460             0x3f, 0xff, 0xff,
1461             0x10, 0x00, 0x00, 0x00,
1462             0x1f, 0xff, 0xff, 0xff
1463         ];
1464
1465         let mut res: reader::Res;
1466
1467         // Class A
1468         res = reader::vuint_at(data, 0).unwrap();
1469         assert_eq!(res.val, 0);
1470         assert_eq!(res.next, 1);
1471         res = reader::vuint_at(data, res.next).unwrap();
1472         assert_eq!(res.val, (1 << 7) - 1);
1473         assert_eq!(res.next, 2);
1474
1475         // Class B
1476         res = reader::vuint_at(data, res.next).unwrap();
1477         assert_eq!(res.val, 0);
1478         assert_eq!(res.next, 4);
1479         res = reader::vuint_at(data, res.next).unwrap();
1480         assert_eq!(res.val, (1 << 14) - 1);
1481         assert_eq!(res.next, 6);
1482
1483         // Class C
1484         res = reader::vuint_at(data, res.next).unwrap();
1485         assert_eq!(res.val, 0);
1486         assert_eq!(res.next, 9);
1487         res = reader::vuint_at(data, res.next).unwrap();
1488         assert_eq!(res.val, (1 << 21) - 1);
1489         assert_eq!(res.next, 12);
1490
1491         // Class D
1492         res = reader::vuint_at(data, res.next).unwrap();
1493         assert_eq!(res.val, 0);
1494         assert_eq!(res.next, 16);
1495         res = reader::vuint_at(data, res.next).unwrap();
1496         assert_eq!(res.val, (1 << 28) - 1);
1497         assert_eq!(res.next, 20);
1498     }
1499
1500     #[test]
1501     fn test_option_int() {
1502         fn test_v(v: Option<isize>) {
1503             debug!("v == {:?}", v);
1504             let mut wr = Cursor::new(Vec::new());
1505             {
1506                 let mut rbml_w = writer::Encoder::new(&mut wr);
1507                 let _ = v.encode(&mut rbml_w);
1508             }
1509             let rbml_doc = Doc::new(wr.get_ref());
1510             let mut deser = reader::Decoder::new(rbml_doc);
1511             let v1 = Decodable::decode(&mut deser).unwrap();
1512             debug!("v1 == {:?}", v1);
1513             assert_eq!(v, v1);
1514         }
1515
1516         test_v(Some(22));
1517         test_v(None);
1518         test_v(Some(3));
1519     }
1520 }
1521
1522 #[cfg(test)]
1523 mod bench {
1524     #![allow(non_snake_case)]
1525     use test::Bencher;
1526     use super::reader;
1527
1528     #[bench]
1529     pub fn vuint_at_A_aligned(b: &mut Bencher) {
1530         let data = (0..4 * 100)
1531                        .map(|i| {
1532                            match i % 2 {
1533                                0 => 0x80,
1534                                _ => i as u8,
1535                            }
1536                        })
1537                        .collect::<Vec<_>>();
1538         let mut sum = 0;
1539         b.iter(|| {
1540             let mut i = 0;
1541             while i < data.len() {
1542                 sum += reader::vuint_at(&data, i).unwrap().val;
1543                 i += 4;
1544             }
1545         });
1546     }
1547
1548     #[bench]
1549     pub fn vuint_at_A_unaligned(b: &mut Bencher) {
1550         let data = (0..4 * 100 + 1)
1551                        .map(|i| {
1552                            match i % 2 {
1553                                1 => 0x80,
1554                                _ => i as u8,
1555                            }
1556                        })
1557                        .collect::<Vec<_>>();
1558         let mut sum = 0;
1559         b.iter(|| {
1560             let mut i = 1;
1561             while i < data.len() {
1562                 sum += reader::vuint_at(&data, i).unwrap().val;
1563                 i += 4;
1564             }
1565         });
1566     }
1567
1568     #[bench]
1569     pub fn vuint_at_D_aligned(b: &mut Bencher) {
1570         let data = (0..4 * 100)
1571                        .map(|i| {
1572                            match i % 4 {
1573                                0 => 0x10,
1574                                3 => i as u8,
1575                                _ => 0,
1576                            }
1577                        })
1578                        .collect::<Vec<_>>();
1579         let mut sum = 0;
1580         b.iter(|| {
1581             let mut i = 0;
1582             while i < data.len() {
1583                 sum += reader::vuint_at(&data, i).unwrap().val;
1584                 i += 4;
1585             }
1586         });
1587     }
1588
1589     #[bench]
1590     pub fn vuint_at_D_unaligned(b: &mut Bencher) {
1591         let data = (0..4 * 100 + 1)
1592                        .map(|i| {
1593                            match i % 4 {
1594                                1 => 0x10,
1595                                0 => i as u8,
1596                                _ => 0,
1597                            }
1598                        })
1599                        .collect::<Vec<_>>();
1600         let mut sum = 0;
1601         b.iter(|| {
1602             let mut i = 1;
1603             while i < data.len() {
1604                 sum += reader::vuint_at(&data, i).unwrap().val;
1605                 i += 4;
1606             }
1607         });
1608     }
1609 }