src/libserialize/ebml.rs

   1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
   2 // file at the top-level directory of this distribution and at
   3 // http://rust-lang.org/COPYRIGHT.
   4 //
   5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
   6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
   7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
   8 // option. This file may not be copied, modified, or distributed
   9 // except according to those terms.
  10
  11 #![allow(missing_doc)]
  12
  13 use std::io;
  14 use std::str;
  15
  16 // Simple Extensible Binary Markup Language (ebml) reader and writer on a
  17 // cursor model. See the specification here:
  18 //     http://www.matroska.org/technical/specs/rfc/index.html
  19
  20 // Common data structures
  21 #[deriving(Clone)]
  22 pub struct Doc<'a> {
  23     pub data: &'a [u8],
  24     pub start: uint,
  25     pub end: uint,
  26 }
  27
  28 impl<'doc> Doc<'doc> {
  29     pub fn get<'a>(&'a self, tag: uint) -> Doc<'a> {
  30         reader::get_doc(*self, tag)
  31     }
  32
  33     pub fn as_str_slice<'a>(&'a self) -> &'a str {
  34         str::from_utf8(self.data.slice(self.start, self.end)).unwrap()
  35     }
  36
  37     pub fn as_str(&self) -> String {
  38         self.as_str_slice().to_string()
  39     }
  40 }
  41
  42 pub struct TaggedDoc<'a> {
  43     tag: uint,
  44     pub doc: Doc<'a>,
  45 }
  46
  47 #[deriving(Show)]
  48 pub enum EbmlEncoderTag {
  49     EsUint,     // 0
  50     EsU64,      // 1
  51     EsU32,      // 2
  52     EsU16,      // 3
  53     EsU8,       // 4
  54     EsInt,      // 5
  55     EsI64,      // 6
  56     EsI32,      // 7
  57     EsI16,      // 8
  58     EsI8,       // 9
  59     EsBool,     // 10
  60     EsChar,     // 11
  61     EsStr,      // 12
  62     EsF64,      // 13
  63     EsF32,      // 14
  64     EsFloat,    // 15
  65     EsEnum,     // 16
  66     EsEnumVid,  // 17
  67     EsEnumBody, // 18
  68     EsVec,      // 19
  69     EsVecLen,   // 20
  70     EsVecElt,   // 21
  71     EsMap,      // 22
  72     EsMapLen,   // 23
  73     EsMapKey,   // 24
  74     EsMapVal,   // 25
  75
  76     EsOpaque,
  77
  78     EsLabel, // Used only when debugging
  79 }
  80
  81 #[deriving(Show)]
  82 pub enum Error {
  83     IntTooBig(uint),
  84     Expected(String),
  85     IoError(io::IoError)
  86 }
  87 // --------------------------------------
  88
  89 pub mod reader {
  90     use std::char;
  91
  92     use std::mem::transmute;
  93     use std::int;
  94     use std::option::{None, Option, Some};
  95     use std::io::extensions::u64_from_be_bytes;
  96
  97     use serialize;
  98
  99     use super::{ EsVec, EsMap, EsEnum, EsVecLen, EsVecElt, EsMapLen, EsMapKey,
 100         EsEnumVid, EsU64, EsU32, EsU16, EsU8, EsInt, EsI64, EsI32, EsI16, EsI8,
 101         EsBool, EsF64, EsF32, EsChar, EsStr, EsMapVal, EsEnumBody, EsUint,
 102         EsOpaque, EsLabel, EbmlEncoderTag, Doc, TaggedDoc, Error, IntTooBig,
 103         Expected };
 104
 105     pub type DecodeResult<T> = Result<T, Error>;
 106     // ebml reading
 107
 108     macro_rules! try_or(
 109         ($e:expr, $r:expr) => (
 110             match $e {
 111                 Ok(e) => e,
 112                 Err(e) => {
 113                     debug!("ignored error: {}", e);
 114                     return $r
 115                 }
 116             }
 117         )
 118     )
 119
 120     pub struct Res {
 121         pub val: uint,
 122         pub next: uint
 123     }
 124
 125     #[inline(never)]
 126     fn vuint_at_slow(data: &[u8], start: uint) -> DecodeResult<Res> {
 127         let a = data[start];
 128         if a & 0x80u8 != 0u8 {
 129             return Ok(Res {val: (a & 0x7fu8) as uint, next: start + 1u});
 130         }
 131         if a & 0x40u8 != 0u8 {
 132             return Ok(Res {val: ((a & 0x3fu8) as uint) << 8u |
 133                         (data[start + 1u] as uint),
 134                     next: start + 2u});
 135         }
 136         if a & 0x20u8 != 0u8 {
 137             return Ok(Res {val: ((a & 0x1fu8) as uint) << 16u |
 138                         (data[start + 1u] as uint) << 8u |
 139                         (data[start + 2u] as uint),
 140                     next: start + 3u});
 141         }
 142         if a & 0x10u8 != 0u8 {
 143             return Ok(Res {val: ((a & 0x0fu8) as uint) << 24u |
 144                         (data[start + 1u] as uint) << 16u |
 145                         (data[start + 2u] as uint) << 8u |
 146                         (data[start + 3u] as uint),
 147                     next: start + 4u});
 148         }
 149         Err(IntTooBig(a as uint))
 150     }
 151
 152     pub fn vuint_at(data: &[u8], start: uint) -> DecodeResult<Res> {
 153         use std::mem::from_be32;
 154
 155         if data.len() - start < 4 {
 156             return vuint_at_slow(data, start);
 157         }
 158
 159         // Lookup table for parsing EBML Element IDs as per http://ebml.sourceforge.net/specs/
 160         // The Element IDs are parsed by reading a big endian u32 positioned at data[start].
 161         // Using the four most significant bits of the u32 we lookup in the table below how the
 162         // element ID should be derived from it.
 163         //
 164         // The table stores tuples (shift, mask) where shift is the number the u32 should be right
 165         // shifted with and mask is the value the right shifted value should be masked with.
 166         // If for example the most significant bit is set this means it's a class A ID and the u32
 167         // should be right shifted with 24 and masked with 0x7f. Therefore we store (24, 0x7f) at
 168         // index 0x8 - 0xF (four bit numbers where the most significant bit is set).
 169         //
 170         // By storing the number of shifts and masks in a table instead of checking in order if
 171         // the most significant bit is set, the second most significant bit is set etc. we can
 172         // replace up to three "and+branch" with a single table lookup which gives us a measured
 173         // speedup of around 2x on x86_64.
 174         static SHIFT_MASK_TABLE: [(u32, u32), ..16] = [
 175             (0, 0x0), (0, 0x0fffffff),
 176             (8, 0x1fffff), (8, 0x1fffff),
 177             (16, 0x3fff), (16, 0x3fff), (16, 0x3fff), (16, 0x3fff),
 178             (24, 0x7f), (24, 0x7f), (24, 0x7f), (24, 0x7f),
 179             (24, 0x7f), (24, 0x7f), (24, 0x7f), (24, 0x7f)
 180         ];
 181
 182         unsafe {
 183             let ptr = data.as_ptr().offset(start as int) as *u32;
 184             let val = from_be32(*ptr);
 185
 186             let i = (val >> 28u) as uint;
 187             let (shift, mask) = SHIFT_MASK_TABLE[i];
 188             Ok(Res {
 189                 val: ((val >> shift) & mask) as uint,
 190                 next: start + (((32 - shift) >> 3) as uint)
 191             })
 192         }
 193     }
 194
 195     pub fn Doc<'a>(data: &'a [u8]) -> Doc<'a> {
 196         Doc { data: data, start: 0u, end: data.len() }
 197     }
 198
 199     pub fn doc_at<'a>(data: &'a [u8], start: uint) -> DecodeResult<TaggedDoc<'a>> {
 200         let elt_tag = try!(vuint_at(data, start));
 201         let elt_size = try!(vuint_at(data, elt_tag.next));
 202         let end = elt_size.next + elt_size.val;
 203         Ok(TaggedDoc {
 204             tag: elt_tag.val,
 205             doc: Doc { data: data, start: elt_size.next, end: end }
 206         })
 207     }
 208
 209     pub fn maybe_get_doc<'a>(d: Doc<'a>, tg: uint) -> Option<Doc<'a>> {
 210         let mut pos = d.start;
 211         while pos < d.end {
 212             let elt_tag = try_or!(vuint_at(d.data, pos), None);
 213             let elt_size = try_or!(vuint_at(d.data, elt_tag.next), None);
 214             pos = elt_size.next + elt_size.val;
 215             if elt_tag.val == tg {
 216                 return Some(Doc { data: d.data, start: elt_size.next,
 217                                   end: pos });
 218             }
 219         }
 220         None
 221     }
 222
 223     pub fn get_doc<'a>(d: Doc<'a>, tg: uint) -> Doc<'a> {
 224         match maybe_get_doc(d, tg) {
 225             Some(d) => d,
 226             None => {
 227                 error!("failed to find block with tag {}", tg);
 228                 fail!();
 229             }
 230         }
 231     }
 232
 233     pub fn docs<'a>(d: Doc<'a>, it: |uint, Doc<'a>| -> bool) -> bool {
 234         let mut pos = d.start;
 235         while pos < d.end {
 236             let elt_tag = try_or!(vuint_at(d.data, pos), false);
 237             let elt_size = try_or!(vuint_at(d.data, elt_tag.next), false);
 238             pos = elt_size.next + elt_size.val;
 239             let doc = Doc { data: d.data, start: elt_size.next, end: pos };
 240             if !it(elt_tag.val, doc) {
 241                 return false;
 242             }
 243         }
 244         return true;
 245     }
 246
 247     pub fn tagged_docs<'a>(d: Doc<'a>, tg: uint, it: |Doc<'a>| -> bool) -> bool {
 248         let mut pos = d.start;
 249         while pos < d.end {
 250             let elt_tag = try_or!(vuint_at(d.data, pos), false);
 251             let elt_size = try_or!(vuint_at(d.data, elt_tag.next), false);
 252             pos = elt_size.next + elt_size.val;
 253             if elt_tag.val == tg {
 254                 let doc = Doc { data: d.data, start: elt_size.next,
 255                                 end: pos };
 256                 if !it(doc) {
 257                     return false;
 258                 }
 259             }
 260         }
 261         return true;
 262     }
 263
 264     pub fn with_doc_data<'a, T>(d: Doc<'a>, f: |x: &'a [u8]| -> T) -> T {
 265         f(d.data.slice(d.start, d.end))
 266     }
 267
 268
 269     pub fn doc_as_u8(d: Doc) -> u8 {
 270         assert_eq!(d.end, d.start + 1u);
 271         d.data[d.start]
 272     }
 273
 274     pub fn doc_as_u16(d: Doc) -> u16 {
 275         assert_eq!(d.end, d.start + 2u);
 276         u64_from_be_bytes(d.data, d.start, 2u) as u16
 277     }
 278
 279     pub fn doc_as_u32(d: Doc) -> u32 {
 280         assert_eq!(d.end, d.start + 4u);
 281         u64_from_be_bytes(d.data, d.start, 4u) as u32
 282     }
 283
 284     pub fn doc_as_u64(d: Doc) -> u64 {
 285         assert_eq!(d.end, d.start + 8u);
 286         u64_from_be_bytes(d.data, d.start, 8u)
 287     }
 288
 289     pub fn doc_as_i8(d: Doc) -> i8 { doc_as_u8(d) as i8 }
 290     pub fn doc_as_i16(d: Doc) -> i16 { doc_as_u16(d) as i16 }
 291     pub fn doc_as_i32(d: Doc) -> i32 { doc_as_u32(d) as i32 }
 292     pub fn doc_as_i64(d: Doc) -> i64 { doc_as_u64(d) as i64 }
 293
 294     pub struct Decoder<'a> {
 295         parent: Doc<'a>,
 296         pos: uint,
 297     }
 298
 299     pub fn Decoder<'a>(d: Doc<'a>) -> Decoder<'a> {
 300         Decoder {
 301             parent: d,
 302             pos: d.start
 303         }
 304     }
 305
 306     impl<'doc> Decoder<'doc> {
 307         fn _check_label(&mut self, lbl: &str) -> DecodeResult<()> {
 308             if self.pos < self.parent.end {
 309                 let TaggedDoc { tag: r_tag, doc: r_doc } =
 310                     try!(doc_at(self.parent.data, self.pos));
 311
 312                 if r_tag == (EsLabel as uint) {
 313                     self.pos = r_doc.end;
 314                     let str = r_doc.as_str_slice();
 315                     if lbl != str {
 316                         return Err(Expected(format!("Expected label {} but \
 317                                                      found {}", lbl, str)));
 318                     }
 319                 }
 320             }
 321             Ok(())
 322         }
 323
 324         fn next_doc(&mut self, exp_tag: EbmlEncoderTag) -> DecodeResult<Doc<'doc>> {
 325             debug!(". next_doc(exp_tag={})", exp_tag);
 326             if self.pos >= self.parent.end {
 327                 return Err(Expected(format!("no more documents in \
 328                                              current node!")));
 329             }
 330             let TaggedDoc { tag: r_tag, doc: r_doc } =
 331                 try!(doc_at(self.parent.data, self.pos));
 332             debug!("self.parent={}-{} self.pos={} r_tag={} r_doc={}-{}",
 333                    self.parent.start,
 334                    self.parent.end,
 335                    self.pos,
 336                    r_tag,
 337                    r_doc.start,
 338                    r_doc.end);
 339             if r_tag != (exp_tag as uint) {
 340                 return Err(Expected(format!("expected EBML doc with tag {} but \
 341                                              found tag {}", exp_tag, r_tag)));
 342             }
 343             if r_doc.end > self.parent.end {
 344                 return Err(Expected(format!("invalid EBML, child extends to \
 345                                              {:#x}, parent to {:#x}",
 346                                             r_doc.end, self.parent.end)));
 347             }
 348             self.pos = r_doc.end;
 349             Ok(r_doc)
 350         }
 351
 352         fn push_doc<T>(&mut self, exp_tag: EbmlEncoderTag,
 353                        f: |&mut Decoder<'doc>| -> DecodeResult<T>) -> DecodeResult<T> {
 354             let d = try!(self.next_doc(exp_tag));
 355             let old_parent = self.parent;
 356             let old_pos = self.pos;
 357             self.parent = d;
 358             self.pos = d.start;
 359             let r = try!(f(self));
 360             self.parent = old_parent;
 361             self.pos = old_pos;
 362             Ok(r)
 363         }
 364
 365         fn _next_uint(&mut self, exp_tag: EbmlEncoderTag) -> DecodeResult<uint> {
 366             let r = doc_as_u32(try!(self.next_doc(exp_tag)));
 367             debug!("_next_uint exp_tag={} result={}", exp_tag, r);
 368             Ok(r as uint)
 369         }
 370
 371         pub fn read_opaque<R>(&mut self,
 372                               op: |&mut Decoder<'doc>, Doc| -> DecodeResult<R>) -> DecodeResult<R> {
 373             let doc = try!(self.next_doc(EsOpaque));
 374
 375             let (old_parent, old_pos) = (self.parent, self.pos);
 376             self.parent = doc;
 377             self.pos = doc.start;
 378
 379             let result = try!(op(self, doc));
 380
 381             self.parent = old_parent;
 382             self.pos = old_pos;
 383             Ok(result)
 384         }
 385     }
 386
 387     impl<'doc> serialize::Decoder<Error> for Decoder<'doc> {
 388         fn read_nil(&mut self) -> DecodeResult<()> { Ok(()) }
 389
 390         fn read_u64(&mut self) -> DecodeResult<u64> { Ok(doc_as_u64(try!(self.next_doc(EsU64)))) }
 391         fn read_u32(&mut self) -> DecodeResult<u32> { Ok(doc_as_u32(try!(self.next_doc(EsU32)))) }
 392         fn read_u16(&mut self) -> DecodeResult<u16> { Ok(doc_as_u16(try!(self.next_doc(EsU16)))) }
 393         fn read_u8 (&mut self) -> DecodeResult<u8 > { Ok(doc_as_u8 (try!(self.next_doc(EsU8 )))) }
 394         fn read_uint(&mut self) -> DecodeResult<uint> {
 395             let v = doc_as_u64(try!(self.next_doc(EsUint)));
 396             if v > (::std::uint::MAX as u64) {
 397                 Err(IntTooBig(v as uint))
 398             } else {
 399                 Ok(v as uint)
 400             }
 401         }
 402
 403         fn read_i64(&mut self) -> DecodeResult<i64> {
 404             Ok(doc_as_u64(try!(self.next_doc(EsI64))) as i64)
 405         }
 406         fn read_i32(&mut self) -> DecodeResult<i32> {
 407             Ok(doc_as_u32(try!(self.next_doc(EsI32))) as i32)
 408         }
 409         fn read_i16(&mut self) -> DecodeResult<i16> {
 410             Ok(doc_as_u16(try!(self.next_doc(EsI16))) as i16)
 411         }
 412         fn read_i8 (&mut self) -> DecodeResult<i8> {
 413             Ok(doc_as_u8(try!(self.next_doc(EsI8 ))) as i8)
 414         }
 415         fn read_int(&mut self) -> DecodeResult<int> {
 416             let v = doc_as_u64(try!(self.next_doc(EsInt))) as i64;
 417             if v > (int::MAX as i64) || v < (int::MIN as i64) {
 418                 debug!("FIXME \\#6122: Removing this makes this function miscompile");
 419                 Err(IntTooBig(v as uint))
 420             } else {
 421                 Ok(v as int)
 422             }
 423         }
 424
 425         fn read_bool(&mut self) -> DecodeResult<bool> {
 426             Ok(doc_as_u8(try!(self.next_doc(EsBool))) != 0)
 427         }
 428
 429         fn read_f64(&mut self) -> DecodeResult<f64> {
 430             let bits = doc_as_u64(try!(self.next_doc(EsF64)));
 431             Ok(unsafe { transmute(bits) })
 432         }
 433         fn read_f32(&mut self) -> DecodeResult<f32> {
 434             let bits = doc_as_u32(try!(self.next_doc(EsF32)));
 435             Ok(unsafe { transmute(bits) })
 436         }
 437         fn read_char(&mut self) -> DecodeResult<char> {
 438             Ok(char::from_u32(doc_as_u32(try!(self.next_doc(EsChar)))).unwrap())
 439         }
 440         fn read_str(&mut self) -> DecodeResult<String> {
 441             Ok(try!(self.next_doc(EsStr)).as_str())
 442         }
 443
 444         // Compound types:
 445         fn read_enum<T>(&mut self,
 446                         name: &str,
 447                         f: |&mut Decoder<'doc>| -> DecodeResult<T>) -> DecodeResult<T> {
 448             debug!("read_enum({})", name);
 449             try!(self._check_label(name));
 450
 451             let doc = try!(self.next_doc(EsEnum));
 452
 453             let (old_parent, old_pos) = (self.parent, self.pos);
 454             self.parent = doc;
 455             self.pos = self.parent.start;
 456
 457             let result = try!(f(self));
 458
 459             self.parent = old_parent;
 460             self.pos = old_pos;
 461             Ok(result)
 462         }
 463
 464         fn read_enum_variant<T>(&mut self,
 465                                 _: &[&str],
 466                                 f: |&mut Decoder<'doc>, uint| -> DecodeResult<T>)
 467                                 -> DecodeResult<T> {
 468             debug!("read_enum_variant()");
 469             let idx = try!(self._next_uint(EsEnumVid));
 470             debug!("  idx={}", idx);
 471
 472             let doc = try!(self.next_doc(EsEnumBody));
 473
 474             let (old_parent, old_pos) = (self.parent, self.pos);
 475             self.parent = doc;
 476             self.pos = self.parent.start;
 477
 478             let result = try!(f(self, idx));
 479
 480             self.parent = old_parent;
 481             self.pos = old_pos;
 482             Ok(result)
 483         }
 484
 485         fn read_enum_variant_arg<T>(&mut self,
 486                                     idx: uint,
 487                                     f: |&mut Decoder<'doc>| -> DecodeResult<T>) -> DecodeResult<T> {
 488             debug!("read_enum_variant_arg(idx={})", idx);
 489             f(self)
 490         }
 491
 492         fn read_enum_struct_variant<T>(&mut self,
 493                                        _: &[&str],
 494                                        f: |&mut Decoder<'doc>, uint| -> DecodeResult<T>)
 495                                        -> DecodeResult<T> {
 496             debug!("read_enum_struct_variant()");
 497             let idx = try!(self._next_uint(EsEnumVid));
 498             debug!("  idx={}", idx);
 499
 500             let doc = try!(self.next_doc(EsEnumBody));
 501
 502             let (old_parent, old_pos) = (self.parent, self.pos);
 503             self.parent = doc;
 504             self.pos = self.parent.start;
 505
 506             let result = try!(f(self, idx));
 507
 508             self.parent = old_parent;
 509             self.pos = old_pos;
 510             Ok(result)
 511         }
 512
 513         fn read_enum_struct_variant_field<T>(&mut self,
 514                                              name: &str,
 515                                              idx: uint,
 516                                              f: |&mut Decoder<'doc>| -> DecodeResult<T>)
 517                                              -> DecodeResult<T> {
 518             debug!("read_enum_struct_variant_arg(name={}, idx={})", name, idx);
 519             f(self)
 520         }
 521
 522         fn read_struct<T>(&mut self,
 523                           name: &str,
 524                           _: uint,
 525                           f: |&mut Decoder<'doc>| -> DecodeResult<T>)
 526                           -> DecodeResult<T> {
 527             debug!("read_struct(name={})", name);
 528             f(self)
 529         }
 530
 531         fn read_struct_field<T>(&mut self,
 532                                 name: &str,
 533                                 idx: uint,
 534                                 f: |&mut Decoder<'doc>| -> DecodeResult<T>)
 535                                 -> DecodeResult<T> {
 536             debug!("read_struct_field(name={}, idx={})", name, idx);
 537             try!(self._check_label(name));
 538             f(self)
 539         }
 540
 541         fn read_tuple<T>(&mut self,
 542                          f: |&mut Decoder<'doc>, uint| -> DecodeResult<T>) -> DecodeResult<T> {
 543             debug!("read_tuple()");
 544             self.read_seq(f)
 545         }
 546
 547         fn read_tuple_arg<T>(&mut self, idx: uint, f: |&mut Decoder<'doc>| -> DecodeResult<T>)
 548                              -> DecodeResult<T> {
 549             debug!("read_tuple_arg(idx={})", idx);
 550             self.read_seq_elt(idx, f)
 551         }
 552
 553         fn read_tuple_struct<T>(&mut self,
 554                                 name: &str,
 555                                 f: |&mut Decoder<'doc>, uint| -> DecodeResult<T>)
 556                                 -> DecodeResult<T> {
 557             debug!("read_tuple_struct(name={})", name);
 558             self.read_tuple(f)
 559         }
 560
 561         fn read_tuple_struct_arg<T>(&mut self,
 562                                     idx: uint,
 563                                     f: |&mut Decoder<'doc>| -> DecodeResult<T>)
 564                                     -> DecodeResult<T> {
 565             debug!("read_tuple_struct_arg(idx={})", idx);
 566             self.read_tuple_arg(idx, f)
 567         }
 568
 569         fn read_option<T>(&mut self,
 570                           f: |&mut Decoder<'doc>, bool| -> DecodeResult<T>) -> DecodeResult<T> {
 571             debug!("read_option()");
 572             self.read_enum("Option", |this| {
 573                 this.read_enum_variant(["None", "Some"], |this, idx| {
 574                     match idx {
 575                         0 => f(this, false),
 576                         1 => f(this, true),
 577                         _ => {
 578                             Err(Expected(format!("Expected None or Some")))
 579                         }
 580                     }
 581                 })
 582             })
 583         }
 584
 585         fn read_seq<T>(&mut self,
 586                        f: |&mut Decoder<'doc>, uint| -> DecodeResult<T>) -> DecodeResult<T> {
 587             debug!("read_seq()");
 588             self.push_doc(EsVec, |d| {
 589                 let len = try!(d._next_uint(EsVecLen));
 590                 debug!("  len={}", len);
 591                 f(d, len)
 592             })
 593         }
 594
 595         fn read_seq_elt<T>(&mut self, idx: uint, f: |&mut Decoder<'doc>| -> DecodeResult<T>)
 596                            -> DecodeResult<T> {
 597             debug!("read_seq_elt(idx={})", idx);
 598             self.push_doc(EsVecElt, f)
 599         }
 600
 601         fn read_map<T>(&mut self,
 602                        f: |&mut Decoder<'doc>, uint| -> DecodeResult<T>) -> DecodeResult<T> {
 603             debug!("read_map()");
 604             self.push_doc(EsMap, |d| {
 605                 let len = try!(d._next_uint(EsMapLen));
 606                 debug!("  len={}", len);
 607                 f(d, len)
 608             })
 609         }
 610
 611         fn read_map_elt_key<T>(&mut self, idx: uint, f: |&mut Decoder<'doc>| -> DecodeResult<T>)
 612                                -> DecodeResult<T> {
 613             debug!("read_map_elt_key(idx={})", idx);
 614             self.push_doc(EsMapKey, f)
 615         }
 616
 617         fn read_map_elt_val<T>(&mut self, idx: uint, f: |&mut Decoder<'doc>| -> DecodeResult<T>)
 618                                -> DecodeResult<T> {
 619             debug!("read_map_elt_val(idx={})", idx);
 620             self.push_doc(EsMapVal, f)
 621         }
 622     }
 623 }
 624
 625 pub mod writer {
 626     use std::clone::Clone;
 627     use std::io::extensions::u64_to_be_bytes;
 628     use std::io::{Writer, Seek};
 629     use std::io;
 630     use std::mem;
 631
 632     use super::{ EsVec, EsMap, EsEnum, EsVecLen, EsVecElt, EsMapLen, EsMapKey,
 633         EsEnumVid, EsU64, EsU32, EsU16, EsU8, EsInt, EsI64, EsI32, EsI16, EsI8,
 634         EsBool, EsF64, EsF32, EsChar, EsStr, EsMapVal, EsEnumBody, EsUint,
 635         EsOpaque, EsLabel, EbmlEncoderTag };
 636
 637     use serialize;
 638
 639
 640     pub type EncodeResult = io::IoResult<()>;
 641
 642     // ebml writing
 643     pub struct Encoder<'a, W> {
 644         pub writer: &'a mut W,
 645         size_positions: Vec<uint>,
 646     }
 647
 648     fn write_sized_vuint<W: Writer>(w: &mut W, n: uint, size: uint) -> EncodeResult {
 649         match size {
 650             1u => w.write(&[0x80u8 | (n as u8)]),
 651             2u => w.write(&[0x40u8 | ((n >> 8_u) as u8), n as u8]),
 652             3u => w.write(&[0x20u8 | ((n >> 16_u) as u8), (n >> 8_u) as u8,
 653                             n as u8]),
 654             4u => w.write(&[0x10u8 | ((n >> 24_u) as u8), (n >> 16_u) as u8,
 655                             (n >> 8_u) as u8, n as u8]),
 656             _ => Err(io::IoError {
 657                 kind: io::OtherIoError,
 658                 desc: "int too big",
 659                 detail: Some(format!("{}", n))
 660             })
 661         }
 662     }
 663
 664     fn write_vuint<W: Writer>(w: &mut W, n: uint) -> EncodeResult {
 665         if n < 0x7f_u { return write_sized_vuint(w, n, 1u); }
 666         if n < 0x4000_u { return write_sized_vuint(w, n, 2u); }
 667         if n < 0x200000_u { return write_sized_vuint(w, n, 3u); }
 668         if n < 0x10000000_u { return write_sized_vuint(w, n, 4u); }
 669         Err(io::IoError {
 670             kind: io::OtherIoError,
 671             desc: "int too big",
 672             detail: Some(format!("{}", n))
 673         })
 674     }
 675
 676     pub fn Encoder<'a, W: Writer + Seek>(w: &'a mut W) -> Encoder<'a, W> {
 677         Encoder {
 678             writer: w,
 679             size_positions: vec!(),
 680         }
 681     }
 682
 683     // FIXME (#2741): Provide a function to write the standard ebml header.
 684     impl<'a, W: Writer + Seek> Encoder<'a, W> {
 685         /// FIXME(pcwalton): Workaround for badness in trans. DO NOT USE ME.
 686         pub unsafe fn unsafe_clone(&self) -> Encoder<'a, W> {
 687             Encoder {
 688                 writer: mem::transmute_copy(&self.writer),
 689                 size_positions: self.size_positions.clone(),
 690             }
 691         }
 692
 693         pub fn start_tag(&mut self, tag_id: uint) -> EncodeResult {
 694             debug!("Start tag {}", tag_id);
 695
 696             // Write the enum ID:
 697             try!(write_vuint(self.writer, tag_id));
 698
 699             // Write a placeholder four-byte size.
 700             self.size_positions.push(try!(self.writer.tell()) as uint);
 701             let zeroes: &[u8] = &[0u8, 0u8, 0u8, 0u8];
 702             self.writer.write(zeroes)
 703         }
 704
 705         pub fn end_tag(&mut self) -> EncodeResult {
 706             let last_size_pos = self.size_positions.pop().unwrap();
 707             let cur_pos = try!(self.writer.tell());
 708             try!(self.writer.seek(last_size_pos as i64, io::SeekSet));
 709             let size = cur_pos as uint - last_size_pos - 4;
 710             try!(write_sized_vuint(self.writer, size, 4u));
 711             let r = try!(self.writer.seek(cur_pos as i64, io::SeekSet));
 712
 713             debug!("End tag (size = {})", size);
 714             Ok(r)
 715         }
 716
 717         pub fn wr_tag(&mut self, tag_id: uint, blk: || -> EncodeResult) -> EncodeResult {
 718             try!(self.start_tag(tag_id));
 719             try!(blk());
 720             self.end_tag()
 721         }
 722
 723         pub fn wr_tagged_bytes(&mut self, tag_id: uint, b: &[u8]) -> EncodeResult {
 724             try!(write_vuint(self.writer, tag_id));
 725             try!(write_vuint(self.writer, b.len()));
 726             self.writer.write(b)
 727         }
 728
 729         pub fn wr_tagged_u64(&mut self, tag_id: uint, v: u64) -> EncodeResult {
 730             u64_to_be_bytes(v, 8u, |v| {
 731                 self.wr_tagged_bytes(tag_id, v)
 732             })
 733         }
 734
 735         pub fn wr_tagged_u32(&mut self, tag_id: uint, v: u32)  -> EncodeResult{
 736             u64_to_be_bytes(v as u64, 4u, |v| {
 737                 self.wr_tagged_bytes(tag_id, v)
 738             })
 739         }
 740
 741         pub fn wr_tagged_u16(&mut self, tag_id: uint, v: u16) -> EncodeResult {
 742             u64_to_be_bytes(v as u64, 2u, |v| {
 743                 self.wr_tagged_bytes(tag_id, v)
 744             })
 745         }
 746
 747         pub fn wr_tagged_u8(&mut self, tag_id: uint, v: u8) -> EncodeResult {
 748             self.wr_tagged_bytes(tag_id, &[v])
 749         }
 750
 751         pub fn wr_tagged_i64(&mut self, tag_id: uint, v: i64) -> EncodeResult {
 752             u64_to_be_bytes(v as u64, 8u, |v| {
 753                 self.wr_tagged_bytes(tag_id, v)
 754             })
 755         }
 756
 757         pub fn wr_tagged_i32(&mut self, tag_id: uint, v: i32) -> EncodeResult {
 758             u64_to_be_bytes(v as u64, 4u, |v| {
 759                 self.wr_tagged_bytes(tag_id, v)
 760             })
 761         }
 762
 763         pub fn wr_tagged_i16(&mut self, tag_id: uint, v: i16) -> EncodeResult {
 764             u64_to_be_bytes(v as u64, 2u, |v| {
 765                 self.wr_tagged_bytes(tag_id, v)
 766             })
 767         }
 768
 769         pub fn wr_tagged_i8(&mut self, tag_id: uint, v: i8) -> EncodeResult {
 770             self.wr_tagged_bytes(tag_id, &[v as u8])
 771         }
 772
 773         pub fn wr_tagged_str(&mut self, tag_id: uint, v: &str) -> EncodeResult {
 774             self.wr_tagged_bytes(tag_id, v.as_bytes())
 775         }
 776
 777         pub fn wr_bytes(&mut self, b: &[u8]) -> EncodeResult {
 778             debug!("Write {} bytes", b.len());
 779             self.writer.write(b)
 780         }
 781
 782         pub fn wr_str(&mut self, s: &str) -> EncodeResult {
 783             debug!("Write str: {}", s);
 784             self.writer.write(s.as_bytes())
 785         }
 786     }
 787
 788     // FIXME (#2743): optionally perform "relaxations" on end_tag to more
 789     // efficiently encode sizes; this is a fixed point iteration
 790
 791     // Set to true to generate more debugging in EBML code.
 792     // Totally lame approach.
 793     static DEBUG: bool = true;
 794
 795     impl<'a, W: Writer + Seek> Encoder<'a, W> {
 796         // used internally to emit things like the vector length and so on
 797         fn _emit_tagged_uint(&mut self, t: EbmlEncoderTag, v: uint) -> EncodeResult {
 798             assert!(v <= 0xFFFF_FFFF_u);
 799             self.wr_tagged_u32(t as uint, v as u32)
 800         }
 801
 802         fn _emit_label(&mut self, label: &str) -> EncodeResult {
 803             // There are various strings that we have access to, such as
 804             // the name of a record field, which do not actually appear in
 805             // the encoded EBML (normally).  This is just for
 806             // efficiency.  When debugging, though, we can emit such
 807             // labels and then they will be checked by decoder to
 808             // try and check failures more quickly.
 809             if DEBUG { self.wr_tagged_str(EsLabel as uint, label) }
 810             else { Ok(()) }
 811         }
 812
 813         pub fn emit_opaque(&mut self, f: |&mut Encoder<W>| -> EncodeResult) -> EncodeResult {
 814             try!(self.start_tag(EsOpaque as uint));
 815             try!(f(self));
 816             self.end_tag()
 817         }
 818     }
 819
 820     impl<'a, W: Writer + Seek> serialize::Encoder<io::IoError> for Encoder<'a, W> {
 821         fn emit_nil(&mut self) -> EncodeResult {
 822             Ok(())
 823         }
 824
 825         fn emit_uint(&mut self, v: uint) -> EncodeResult {
 826             self.wr_tagged_u64(EsUint as uint, v as u64)
 827         }
 828         fn emit_u64(&mut self, v: u64) -> EncodeResult {
 829             self.wr_tagged_u64(EsU64 as uint, v)
 830         }
 831         fn emit_u32(&mut self, v: u32) -> EncodeResult {
 832             self.wr_tagged_u32(EsU32 as uint, v)
 833         }
 834         fn emit_u16(&mut self, v: u16) -> EncodeResult {
 835             self.wr_tagged_u16(EsU16 as uint, v)
 836         }
 837         fn emit_u8(&mut self, v: u8) -> EncodeResult {
 838             self.wr_tagged_u8(EsU8 as uint, v)
 839         }
 840
 841         fn emit_int(&mut self, v: int) -> EncodeResult {
 842             self.wr_tagged_i64(EsInt as uint, v as i64)
 843         }
 844         fn emit_i64(&mut self, v: i64) -> EncodeResult {
 845             self.wr_tagged_i64(EsI64 as uint, v)
 846         }
 847         fn emit_i32(&mut self, v: i32) -> EncodeResult {
 848             self.wr_tagged_i32(EsI32 as uint, v)
 849         }
 850         fn emit_i16(&mut self, v: i16) -> EncodeResult {
 851             self.wr_tagged_i16(EsI16 as uint, v)
 852         }
 853         fn emit_i8(&mut self, v: i8) -> EncodeResult {
 854             self.wr_tagged_i8(EsI8 as uint, v)
 855         }
 856
 857         fn emit_bool(&mut self, v: bool) -> EncodeResult {
 858             self.wr_tagged_u8(EsBool as uint, v as u8)
 859         }
 860
 861         fn emit_f64(&mut self, v: f64) -> EncodeResult {
 862             let bits = unsafe { mem::transmute(v) };
 863             self.wr_tagged_u64(EsF64 as uint, bits)
 864         }
 865         fn emit_f32(&mut self, v: f32) -> EncodeResult {
 866             let bits = unsafe { mem::transmute(v) };
 867             self.wr_tagged_u32(EsF32 as uint, bits)
 868         }
 869         fn emit_char(&mut self, v: char) -> EncodeResult {
 870             self.wr_tagged_u32(EsChar as uint, v as u32)
 871         }
 872
 873         fn emit_str(&mut self, v: &str) -> EncodeResult {
 874             self.wr_tagged_str(EsStr as uint, v)
 875         }
 876
 877         fn emit_enum(&mut self,
 878                      name: &str,
 879                      f: |&mut Encoder<'a, W>| -> EncodeResult) -> EncodeResult {
 880             try!(self._emit_label(name));
 881             try!(self.start_tag(EsEnum as uint));
 882             try!(f(self));
 883             self.end_tag()
 884         }
 885
 886         fn emit_enum_variant(&mut self,
 887                              _: &str,
 888                              v_id: uint,
 889                              _: uint,
 890                              f: |&mut Encoder<'a, W>| -> EncodeResult) -> EncodeResult {
 891             try!(self._emit_tagged_uint(EsEnumVid, v_id));
 892             try!(self.start_tag(EsEnumBody as uint));
 893             try!(f(self));
 894             self.end_tag()
 895         }
 896
 897         fn emit_enum_variant_arg(&mut self,
 898                                  _: uint,
 899                                  f: |&mut Encoder<'a, W>| -> EncodeResult) -> EncodeResult {
 900             f(self)
 901         }
 902
 903         fn emit_enum_struct_variant(&mut self,
 904                                     v_name: &str,
 905                                     v_id: uint,
 906                                     cnt: uint,
 907                                     f: |&mut Encoder<'a, W>| -> EncodeResult) -> EncodeResult {
 908             self.emit_enum_variant(v_name, v_id, cnt, f)
 909         }
 910
 911         fn emit_enum_struct_variant_field(&mut self,
 912                                           _: &str,
 913                                           idx: uint,
 914                                           f: |&mut Encoder<'a, W>| -> EncodeResult)
 915             -> EncodeResult {
 916             self.emit_enum_variant_arg(idx, f)
 917         }
 918
 919         fn emit_struct(&mut self,
 920                        _: &str,
 921                        _len: uint,
 922                        f: |&mut Encoder<'a, W>| -> EncodeResult) -> EncodeResult {
 923             f(self)
 924         }
 925
 926         fn emit_struct_field(&mut self,
 927                              name: &str,
 928                              _: uint,
 929                              f: |&mut Encoder<'a, W>| -> EncodeResult) -> EncodeResult {
 930             try!(self._emit_label(name));
 931             f(self)
 932         }
 933
 934         fn emit_tuple(&mut self,
 935                       len: uint,
 936                       f: |&mut Encoder<'a, W>| -> EncodeResult) -> EncodeResult {
 937             self.emit_seq(len, f)
 938         }
 939         fn emit_tuple_arg(&mut self,
 940                           idx: uint,
 941                           f: |&mut Encoder<'a, W>| -> EncodeResult) -> EncodeResult {
 942             self.emit_seq_elt(idx, f)
 943         }
 944
 945         fn emit_tuple_struct(&mut self,
 946                              _: &str,
 947                              len: uint,
 948                              f: |&mut Encoder<'a, W>| -> EncodeResult) -> EncodeResult {
 949             self.emit_seq(len, f)
 950         }
 951         fn emit_tuple_struct_arg(&mut self,
 952                                  idx: uint,
 953                                  f: |&mut Encoder<'a, W>| -> EncodeResult) -> EncodeResult {
 954             self.emit_seq_elt(idx, f)
 955         }
 956
 957         fn emit_option(&mut self,
 958                        f: |&mut Encoder<'a, W>| -> EncodeResult) -> EncodeResult {
 959             self.emit_enum("Option", f)
 960         }
 961         fn emit_option_none(&mut self) -> EncodeResult {
 962             self.emit_enum_variant("None", 0, 0, |_| Ok(()))
 963         }
 964         fn emit_option_some(&mut self,
 965                             f: |&mut Encoder<'a, W>| -> EncodeResult) -> EncodeResult {
 966
 967             self.emit_enum_variant("Some", 1, 1, f)
 968         }
 969
 970         fn emit_seq(&mut self,
 971                     len: uint,
 972                     f: |&mut Encoder<'a, W>| -> EncodeResult) -> EncodeResult {
 973
 974             try!(self.start_tag(EsVec as uint));
 975             try!(self._emit_tagged_uint(EsVecLen, len));
 976             try!(f(self));
 977             self.end_tag()
 978         }
 979
 980         fn emit_seq_elt(&mut self,
 981                         _idx: uint,
 982                         f: |&mut Encoder<'a, W>| -> EncodeResult) -> EncodeResult {
 983
 984             try!(self.start_tag(EsVecElt as uint));
 985             try!(f(self));
 986             self.end_tag()
 987         }
 988
 989         fn emit_map(&mut self,
 990                     len: uint,
 991                     f: |&mut Encoder<'a, W>| -> EncodeResult) -> EncodeResult {
 992
 993             try!(self.start_tag(EsMap as uint));
 994             try!(self._emit_tagged_uint(EsMapLen, len));
 995             try!(f(self));
 996             self.end_tag()
 997         }
 998
 999         fn emit_map_elt_key(&mut self,
1000                             _idx: uint,
1001                             f: |&mut Encoder<'a, W>| -> EncodeResult) -> EncodeResult {
1002
1003             try!(self.start_tag(EsMapKey as uint));
1004             try!(f(self));
1005             self.end_tag()
1006         }
1007
1008         fn emit_map_elt_val(&mut self,
1009                             _idx: uint,
1010                             f: |&mut Encoder<'a, W>| -> EncodeResult) -> EncodeResult {
1011             try!(self.start_tag(EsMapVal as uint));
1012             try!(f(self));
1013             self.end_tag()
1014         }
1015     }
1016 }
1017
1018 // ___________________________________________________________________________
1019 // Testing
1020
1021 #[cfg(test)]
1022 mod tests {
1023     use ebml::reader;
1024     use ebml::writer;
1025     use {Encodable, Decodable};
1026
1027     use std::io::MemWriter;
1028     use std::option::{None, Option, Some};
1029
1030     #[test]
1031     fn test_vuint_at() {
1032         let data = [
1033             0x80,
1034             0xff,
1035             0x40, 0x00,
1036             0x7f, 0xff,
1037             0x20, 0x00, 0x00,
1038             0x3f, 0xff, 0xff,
1039             0x10, 0x00, 0x00, 0x00,
1040             0x1f, 0xff, 0xff, 0xff
1041         ];
1042
1043         let mut res: reader::Res;
1044
1045         // Class A
1046         res = reader::vuint_at(data, 0).unwrap();
1047         assert_eq!(res.val, 0);
1048         assert_eq!(res.next, 1);
1049         res = reader::vuint_at(data, res.next).unwrap();
1050         assert_eq!(res.val, (1 << 7) - 1);
1051         assert_eq!(res.next, 2);
1052
1053         // Class B
1054         res = reader::vuint_at(data, res.next).unwrap();
1055         assert_eq!(res.val, 0);
1056         assert_eq!(res.next, 4);
1057         res = reader::vuint_at(data, res.next).unwrap();
1058         assert_eq!(res.val, (1 << 14) - 1);
1059         assert_eq!(res.next, 6);
1060
1061         // Class C
1062         res = reader::vuint_at(data, res.next).unwrap();
1063         assert_eq!(res.val, 0);
1064         assert_eq!(res.next, 9);
1065         res = reader::vuint_at(data, res.next).unwrap();
1066         assert_eq!(res.val, (1 << 21) - 1);
1067         assert_eq!(res.next, 12);
1068
1069         // Class D
1070         res = reader::vuint_at(data, res.next).unwrap();
1071         assert_eq!(res.val, 0);
1072         assert_eq!(res.next, 16);
1073         res = reader::vuint_at(data, res.next).unwrap();
1074         assert_eq!(res.val, (1 << 28) - 1);
1075         assert_eq!(res.next, 20);
1076     }
1077
1078     #[test]
1079     fn test_option_int() {
1080         fn test_v(v: Option<int>) {
1081             debug!("v == {}", v);
1082             let mut wr = MemWriter::new();
1083             {
1084                 let mut ebml_w = writer::Encoder(&mut wr);
1085                 let _ = v.encode(&mut ebml_w);
1086             }
1087             let ebml_doc = reader::Doc(wr.get_ref());
1088             let mut deser = reader::Decoder(ebml_doc);
1089             let v1 = Decodable::decode(&mut deser).unwrap();
1090             debug!("v1 == {}", v1);
1091             assert_eq!(v, v1);
1092         }
1093
1094         test_v(Some(22));
1095         test_v(None);
1096         test_v(Some(3));
1097     }
1098 }
1099
1100 #[cfg(test)]
1101 mod bench {
1102     extern crate test;
1103     use self::test::Bencher;
1104     use ebml::reader;
1105
1106     #[bench]
1107     pub fn vuint_at_A_aligned(b: &mut Bencher) {
1108         let data = Vec::from_fn(4*100, |i| {
1109             match i % 2 {
1110               0 => 0x80u8,
1111               _ => i as u8,
1112             }
1113         });
1114         let mut sum = 0u;
1115         b.iter(|| {
1116             let mut i = 0;
1117             while i < data.len() {
1118                 sum += reader::vuint_at(data.as_slice(), i).unwrap().val;
1119                 i += 4;
1120             }
1121         });
1122     }
1123
1124     #[bench]
1125     pub fn vuint_at_A_unaligned(b: &mut Bencher) {
1126         let data = Vec::from_fn(4*100+1, |i| {
1127             match i % 2 {
1128               1 => 0x80u8,
1129               _ => i as u8
1130             }
1131         });
1132         let mut sum = 0u;
1133         b.iter(|| {
1134             let mut i = 1;
1135             while i < data.len() {
1136                 sum += reader::vuint_at(data.as_slice(), i).unwrap().val;
1137                 i += 4;
1138             }
1139         });
1140     }
1141
1142     #[bench]
1143     pub fn vuint_at_D_aligned(b: &mut Bencher) {
1144         let data = Vec::from_fn(4*100, |i| {
1145             match i % 4 {
1146               0 => 0x10u8,
1147               3 => i as u8,
1148               _ => 0u8
1149             }
1150         });
1151         let mut sum = 0u;
1152         b.iter(|| {
1153             let mut i = 0;
1154             while i < data.len() {
1155                 sum += reader::vuint_at(data.as_slice(), i).unwrap().val;
1156                 i += 4;
1157             }
1158         });
1159     }
1160
1161     #[bench]
1162     pub fn vuint_at_D_unaligned(b: &mut Bencher) {
1163         let data = Vec::from_fn(4*100+1, |i| {
1164             match i % 4 {
1165               1 => 0x10u8,
1166               0 => i as u8,
1167               _ => 0u8
1168             }
1169         });
1170         let mut sum = 0u;
1171         b.iter(|| {
1172             let mut i = 1;
1173             while i < data.len() {
1174                 sum += reader::vuint_at(data.as_slice(), i).unwrap().val;
1175                 i += 4;
1176             }
1177         });
1178     }
1179 }