src/libserialize/ebml.rs

   1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
   2 // file at the top-level directory of this distribution and at
   3 // http://rust-lang.org/COPYRIGHT.
   4 //
   5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
   6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
   7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
   8 // option. This file may not be copied, modified, or distributed
   9 // except according to those terms.
  10
  11 #![allow(missing_doc)]
  12
  13 use std::io;
  14 use std::str;
  15
  16 // Simple Extensible Binary Markup Language (ebml) reader and writer on a
  17 // cursor model. See the specification here:
  18 //     http://www.matroska.org/technical/specs/rfc/index.html
  19
  20 // Common data structures
  21 #[deriving(Clone)]
  22 pub struct Doc<'a> {
  23     data: &'a [u8],
  24     start: uint,
  25     end: uint,
  26 }
  27
  28 impl<'doc> Doc<'doc> {
  29     pub fn get<'a>(&'a self, tag: uint) -> Doc<'a> {
  30         reader::get_doc(*self, tag)
  31     }
  32
  33     pub fn as_str_slice<'a>(&'a self) -> &'a str {
  34         str::from_utf8(self.data.slice(self.start, self.end)).unwrap()
  35     }
  36
  37     pub fn as_str(&self) -> ~str {
  38         self.as_str_slice().to_owned()
  39     }
  40 }
  41
  42 pub struct TaggedDoc<'a> {
  43     priv tag: uint,
  44     doc: Doc<'a>,
  45 }
  46
  47 pub enum EbmlEncoderTag {
  48     EsUint,     // 0
  49     EsU64,      // 1
  50     EsU32,      // 2
  51     EsU16,      // 3
  52     EsU8,       // 4
  53     EsInt,      // 5
  54     EsI64,      // 6
  55     EsI32,      // 7
  56     EsI16,      // 8
  57     EsI8,       // 9
  58     EsBool,     // 10
  59     EsChar,     // 11
  60     EsStr,      // 12
  61     EsF64,      // 13
  62     EsF32,      // 14
  63     EsFloat,    // 15
  64     EsEnum,     // 16
  65     EsEnumVid,  // 17
  66     EsEnumBody, // 18
  67     EsVec,      // 19
  68     EsVecLen,   // 20
  69     EsVecElt,   // 21
  70     EsMap,      // 22
  71     EsMapLen,   // 23
  72     EsMapKey,   // 24
  73     EsMapVal,   // 25
  74
  75     EsOpaque,
  76
  77     EsLabel, // Used only when debugging
  78 }
  79
  80 #[deriving(Show)]
  81 pub enum Error {
  82     IntTooBig(uint),
  83     Expected(~str),
  84     IoError(io::IoError)
  85 }
  86 // --------------------------------------
  87
  88 pub mod reader {
  89     use std::char;
  90
  91     use std::cast::transmute;
  92     use std::int;
  93     use std::option::{None, Option, Some};
  94     use std::io::extensions::u64_from_be_bytes;
  95
  96     use serialize;
  97
  98     use super::{ EsVec, EsMap, EsEnum, EsVecLen, EsVecElt, EsMapLen, EsMapKey,
  99         EsEnumVid, EsU64, EsU32, EsU16, EsU8, EsInt, EsI64, EsI32, EsI16, EsI8,
 100         EsBool, EsF64, EsF32, EsChar, EsStr, EsMapVal, EsEnumBody, EsUint,
 101         EsOpaque, EsLabel, EbmlEncoderTag, Doc, TaggedDoc, Error, IntTooBig,
 102         Expected };
 103
 104     pub type DecodeResult<T> = Result<T, Error>;
 105     // ebml reading
 106
 107     macro_rules! try_or(
 108         ($e:expr, $r:expr) => (
 109             match $e {
 110                 Ok(e) => e,
 111                 Err(e) => {
 112                     debug!("ignored error: {}", e);
 113                     return $r
 114                 }
 115             }
 116         )
 117     )
 118
 119     pub struct Res {
 120         val: uint,
 121         next: uint
 122     }
 123
 124     #[inline(never)]
 125     fn vuint_at_slow(data: &[u8], start: uint) -> DecodeResult<Res> {
 126         let a = data[start];
 127         if a & 0x80u8 != 0u8 {
 128             return Ok(Res {val: (a & 0x7fu8) as uint, next: start + 1u});
 129         }
 130         if a & 0x40u8 != 0u8 {
 131             return Ok(Res {val: ((a & 0x3fu8) as uint) << 8u |
 132                         (data[start + 1u] as uint),
 133                     next: start + 2u});
 134         }
 135         if a & 0x20u8 != 0u8 {
 136             return Ok(Res {val: ((a & 0x1fu8) as uint) << 16u |
 137                         (data[start + 1u] as uint) << 8u |
 138                         (data[start + 2u] as uint),
 139                     next: start + 3u});
 140         }
 141         if a & 0x10u8 != 0u8 {
 142             return Ok(Res {val: ((a & 0x0fu8) as uint) << 24u |
 143                         (data[start + 1u] as uint) << 16u |
 144                         (data[start + 2u] as uint) << 8u |
 145                         (data[start + 3u] as uint),
 146                     next: start + 4u});
 147         }
 148         Err(IntTooBig(a as uint))
 149     }
 150
 151     pub fn vuint_at(data: &[u8], start: uint) -> DecodeResult<Res> {
 152         use std::mem::from_be32;
 153
 154         if data.len() - start < 4 {
 155             return vuint_at_slow(data, start);
 156         }
 157
 158         // Lookup table for parsing EBML Element IDs as per http://ebml.sourceforge.net/specs/
 159         // The Element IDs are parsed by reading a big endian u32 positioned at data[start].
 160         // Using the four most significant bits of the u32 we lookup in the table below how the
 161         // element ID should be derived from it.
 162         //
 163         // The table stores tuples (shift, mask) where shift is the number the u32 should be right
 164         // shifted with and mask is the value the right shifted value should be masked with.
 165         // If for example the most significant bit is set this means it's a class A ID and the u32
 166         // should be right shifted with 24 and masked with 0x7f. Therefore we store (24, 0x7f) at
 167         // index 0x8 - 0xF (four bit numbers where the most significant bit is set).
 168         //
 169         // By storing the number of shifts and masks in a table instead of checking in order if
 170         // the most significant bit is set, the second most significant bit is set etc. we can
 171         // replace up to three "and+branch" with a single table lookup which gives us a measured
 172         // speedup of around 2x on x86_64.
 173         static SHIFT_MASK_TABLE: [(u32, u32), ..16] = [
 174             (0, 0x0), (0, 0x0fffffff),
 175             (8, 0x1fffff), (8, 0x1fffff),
 176             (16, 0x3fff), (16, 0x3fff), (16, 0x3fff), (16, 0x3fff),
 177             (24, 0x7f), (24, 0x7f), (24, 0x7f), (24, 0x7f),
 178             (24, 0x7f), (24, 0x7f), (24, 0x7f), (24, 0x7f)
 179         ];
 180
 181         unsafe {
 182             let ptr = data.as_ptr().offset(start as int) as *i32;
 183             let val = from_be32(*ptr) as u32;
 184
 185             let i = (val >> 28u) as uint;
 186             let (shift, mask) = SHIFT_MASK_TABLE[i];
 187             Ok(Res {
 188                 val: ((val >> shift) & mask) as uint,
 189                 next: start + (((32 - shift) >> 3) as uint)
 190             })
 191         }
 192     }
 193
 194     pub fn Doc<'a>(data: &'a [u8]) -> Doc<'a> {
 195         Doc { data: data, start: 0u, end: data.len() }
 196     }
 197
 198     pub fn doc_at<'a>(data: &'a [u8], start: uint) -> DecodeResult<TaggedDoc<'a>> {
 199         let elt_tag = try!(vuint_at(data, start));
 200         let elt_size = try!(vuint_at(data, elt_tag.next));
 201         let end = elt_size.next + elt_size.val;
 202         Ok(TaggedDoc {
 203             tag: elt_tag.val,
 204             doc: Doc { data: data, start: elt_size.next, end: end }
 205         })
 206     }
 207
 208     pub fn maybe_get_doc<'a>(d: Doc<'a>, tg: uint) -> Option<Doc<'a>> {
 209         let mut pos = d.start;
 210         while pos < d.end {
 211             let elt_tag = try_or!(vuint_at(d.data, pos), None);
 212             let elt_size = try_or!(vuint_at(d.data, elt_tag.next), None);
 213             pos = elt_size.next + elt_size.val;
 214             if elt_tag.val == tg {
 215                 return Some(Doc { data: d.data, start: elt_size.next,
 216                                   end: pos });
 217             }
 218         }
 219         None
 220     }
 221
 222     pub fn get_doc<'a>(d: Doc<'a>, tg: uint) -> Doc<'a> {
 223         match maybe_get_doc(d, tg) {
 224             Some(d) => d,
 225             None => {
 226                 error!("failed to find block with tag {}", tg);
 227                 fail!();
 228             }
 229         }
 230     }
 231
 232     pub fn docs<'a>(d: Doc<'a>, it: |uint, Doc<'a>| -> bool) -> bool {
 233         let mut pos = d.start;
 234         while pos < d.end {
 235             let elt_tag = try_or!(vuint_at(d.data, pos), false);
 236             let elt_size = try_or!(vuint_at(d.data, elt_tag.next), false);
 237             pos = elt_size.next + elt_size.val;
 238             let doc = Doc { data: d.data, start: elt_size.next, end: pos };
 239             if !it(elt_tag.val, doc) {
 240                 return false;
 241             }
 242         }
 243         return true;
 244     }
 245
 246     pub fn tagged_docs<'a>(d: Doc<'a>, tg: uint, it: |Doc<'a>| -> bool) -> bool {
 247         let mut pos = d.start;
 248         while pos < d.end {
 249             let elt_tag = try_or!(vuint_at(d.data, pos), false);
 250             let elt_size = try_or!(vuint_at(d.data, elt_tag.next), false);
 251             pos = elt_size.next + elt_size.val;
 252             if elt_tag.val == tg {
 253                 let doc = Doc { data: d.data, start: elt_size.next,
 254                                 end: pos };
 255                 if !it(doc) {
 256                     return false;
 257                 }
 258             }
 259         }
 260         return true;
 261     }
 262
 263     pub fn with_doc_data<'a, T>(d: Doc<'a>, f: |x: &'a [u8]| -> T) -> T {
 264         f(d.data.slice(d.start, d.end))
 265     }
 266
 267
 268     pub fn doc_as_u8(d: Doc) -> u8 {
 269         assert_eq!(d.end, d.start + 1u);
 270         d.data[d.start]
 271     }
 272
 273     pub fn doc_as_u16(d: Doc) -> u16 {
 274         assert_eq!(d.end, d.start + 2u);
 275         u64_from_be_bytes(d.data, d.start, 2u) as u16
 276     }
 277
 278     pub fn doc_as_u32(d: Doc) -> u32 {
 279         assert_eq!(d.end, d.start + 4u);
 280         u64_from_be_bytes(d.data, d.start, 4u) as u32
 281     }
 282
 283     pub fn doc_as_u64(d: Doc) -> u64 {
 284         assert_eq!(d.end, d.start + 8u);
 285         u64_from_be_bytes(d.data, d.start, 8u)
 286     }
 287
 288     pub fn doc_as_i8(d: Doc) -> i8 { doc_as_u8(d) as i8 }
 289     pub fn doc_as_i16(d: Doc) -> i16 { doc_as_u16(d) as i16 }
 290     pub fn doc_as_i32(d: Doc) -> i32 { doc_as_u32(d) as i32 }
 291     pub fn doc_as_i64(d: Doc) -> i64 { doc_as_u64(d) as i64 }
 292
 293     pub struct Decoder<'a> {
 294         priv parent: Doc<'a>,
 295         priv pos: uint,
 296     }
 297
 298     pub fn Decoder<'a>(d: Doc<'a>) -> Decoder<'a> {
 299         Decoder {
 300             parent: d,
 301             pos: d.start
 302         }
 303     }
 304
 305     impl<'doc> Decoder<'doc> {
 306         fn _check_label(&mut self, lbl: &str) -> DecodeResult<()> {
 307             if self.pos < self.parent.end {
 308                 let TaggedDoc { tag: r_tag, doc: r_doc } =
 309                     try!(doc_at(self.parent.data, self.pos));
 310
 311                 if r_tag == (EsLabel as uint) {
 312                     self.pos = r_doc.end;
 313                     let str = r_doc.as_str_slice();
 314                     if lbl != str {
 315                         return Err(Expected(format!("Expected label {} but found {}", lbl, str)));
 316                     }
 317                 }
 318             }
 319             Ok(())
 320         }
 321
 322         fn next_doc(&mut self, exp_tag: EbmlEncoderTag) -> DecodeResult<Doc<'doc>> {
 323             debug!(". next_doc(exp_tag={:?})", exp_tag);
 324             if self.pos >= self.parent.end {
 325                 return Err(Expected(format!("no more documents in current node!")));
 326             }
 327             let TaggedDoc { tag: r_tag, doc: r_doc } =
 328                 try!(doc_at(self.parent.data, self.pos));
 329             debug!("self.parent={}-{} self.pos={} r_tag={} r_doc={}-{}",
 330                    self.parent.start,
 331                    self.parent.end,
 332                    self.pos,
 333                    r_tag,
 334                    r_doc.start,
 335                    r_doc.end);
 336             if r_tag != (exp_tag as uint) {
 337                 return Err(Expected(format!("expected EBML doc with tag {:?} but found tag {:?}",
 338                        exp_tag, r_tag)));
 339             }
 340             if r_doc.end > self.parent.end {
 341                 return Err(Expected(format!("invalid EBML, child extends to {:#x}, parent to {:#x}",
 342                       r_doc.end, self.parent.end)));
 343             }
 344             self.pos = r_doc.end;
 345             Ok(r_doc)
 346         }
 347
 348         fn push_doc<T>(&mut self, exp_tag: EbmlEncoderTag,
 349                        f: |&mut Decoder<'doc>| -> DecodeResult<T>) -> DecodeResult<T> {
 350             let d = try!(self.next_doc(exp_tag));
 351             let old_parent = self.parent;
 352             let old_pos = self.pos;
 353             self.parent = d;
 354             self.pos = d.start;
 355             let r = try!(f(self));
 356             self.parent = old_parent;
 357             self.pos = old_pos;
 358             Ok(r)
 359         }
 360
 361         fn _next_uint(&mut self, exp_tag: EbmlEncoderTag) -> DecodeResult<uint> {
 362             let r = doc_as_u32(try!(self.next_doc(exp_tag)));
 363             debug!("_next_uint exp_tag={:?} result={}", exp_tag, r);
 364             Ok(r as uint)
 365         }
 366
 367         pub fn read_opaque<R>(&mut self,
 368                               op: |&mut Decoder<'doc>, Doc| -> DecodeResult<R>) -> DecodeResult<R> {
 369             let doc = try!(self.next_doc(EsOpaque));
 370
 371             let (old_parent, old_pos) = (self.parent, self.pos);
 372             self.parent = doc;
 373             self.pos = doc.start;
 374
 375             let result = try!(op(self, doc));
 376
 377             self.parent = old_parent;
 378             self.pos = old_pos;
 379             Ok(result)
 380         }
 381     }
 382
 383     impl<'doc> serialize::Decoder<Error> for Decoder<'doc> {
 384         fn read_nil(&mut self) -> DecodeResult<()> { Ok(()) }
 385
 386         fn read_u64(&mut self) -> DecodeResult<u64> { Ok(doc_as_u64(try!(self.next_doc(EsU64)))) }
 387         fn read_u32(&mut self) -> DecodeResult<u32> { Ok(doc_as_u32(try!(self.next_doc(EsU32)))) }
 388         fn read_u16(&mut self) -> DecodeResult<u16> { Ok(doc_as_u16(try!(self.next_doc(EsU16)))) }
 389         fn read_u8 (&mut self) -> DecodeResult<u8 > { Ok(doc_as_u8 (try!(self.next_doc(EsU8 )))) }
 390         fn read_uint(&mut self) -> DecodeResult<uint> {
 391             let v = doc_as_u64(try!(self.next_doc(EsUint)));
 392             if v > (::std::uint::MAX as u64) {
 393                 Err(IntTooBig(v as uint))
 394             } else {
 395                 Ok(v as uint)
 396             }
 397         }
 398
 399         fn read_i64(&mut self) -> DecodeResult<i64> {
 400             Ok(doc_as_u64(try!(self.next_doc(EsI64))) as i64)
 401         }
 402         fn read_i32(&mut self) -> DecodeResult<i32> {
 403             Ok(doc_as_u32(try!(self.next_doc(EsI32))) as i32)
 404         }
 405         fn read_i16(&mut self) -> DecodeResult<i16> {
 406             Ok(doc_as_u16(try!(self.next_doc(EsI16))) as i16)
 407         }
 408         fn read_i8 (&mut self) -> DecodeResult<i8> {
 409             Ok(doc_as_u8(try!(self.next_doc(EsI8 ))) as i8)
 410         }
 411         fn read_int(&mut self) -> DecodeResult<int> {
 412             let v = doc_as_u64(try!(self.next_doc(EsInt))) as i64;
 413             if v > (int::MAX as i64) || v < (int::MIN as i64) {
 414                 debug!("FIXME \\#6122: Removing this makes this function miscompile");
 415                 Err(IntTooBig(v as uint))
 416             } else {
 417                 Ok(v as int)
 418             }
 419         }
 420
 421         fn read_bool(&mut self) -> DecodeResult<bool> {
 422             Ok(doc_as_u8(try!(self.next_doc(EsBool))) != 0)
 423         }
 424
 425         fn read_f64(&mut self) -> DecodeResult<f64> {
 426             let bits = doc_as_u64(try!(self.next_doc(EsF64)));
 427             Ok(unsafe { transmute(bits) })
 428         }
 429         fn read_f32(&mut self) -> DecodeResult<f32> {
 430             let bits = doc_as_u32(try!(self.next_doc(EsF32)));
 431             Ok(unsafe { transmute(bits) })
 432         }
 433         fn read_char(&mut self) -> DecodeResult<char> {
 434             Ok(char::from_u32(doc_as_u32(try!(self.next_doc(EsChar)))).unwrap())
 435         }
 436         fn read_str(&mut self) -> DecodeResult<~str> {
 437             Ok(try!(self.next_doc(EsStr)).as_str())
 438         }
 439
 440         // Compound types:
 441         fn read_enum<T>(&mut self,
 442                         name: &str,
 443                         f: |&mut Decoder<'doc>| -> DecodeResult<T>) -> DecodeResult<T> {
 444             debug!("read_enum({})", name);
 445             try!(self._check_label(name));
 446
 447             let doc = try!(self.next_doc(EsEnum));
 448
 449             let (old_parent, old_pos) = (self.parent, self.pos);
 450             self.parent = doc;
 451             self.pos = self.parent.start;
 452
 453             let result = try!(f(self));
 454
 455             self.parent = old_parent;
 456             self.pos = old_pos;
 457             Ok(result)
 458         }
 459
 460         fn read_enum_variant<T>(&mut self,
 461                                 _: &[&str],
 462                                 f: |&mut Decoder<'doc>, uint| -> DecodeResult<T>)
 463                                 -> DecodeResult<T> {
 464             debug!("read_enum_variant()");
 465             let idx = try!(self._next_uint(EsEnumVid));
 466             debug!("  idx={}", idx);
 467
 468             let doc = try!(self.next_doc(EsEnumBody));
 469
 470             let (old_parent, old_pos) = (self.parent, self.pos);
 471             self.parent = doc;
 472             self.pos = self.parent.start;
 473
 474             let result = try!(f(self, idx));
 475
 476             self.parent = old_parent;
 477             self.pos = old_pos;
 478             Ok(result)
 479         }
 480
 481         fn read_enum_variant_arg<T>(&mut self,
 482                                     idx: uint,
 483                                     f: |&mut Decoder<'doc>| -> DecodeResult<T>) -> DecodeResult<T> {
 484             debug!("read_enum_variant_arg(idx={})", idx);
 485             f(self)
 486         }
 487
 488         fn read_enum_struct_variant<T>(&mut self,
 489                                        _: &[&str],
 490                                        f: |&mut Decoder<'doc>, uint| -> DecodeResult<T>)
 491                                        -> DecodeResult<T> {
 492             debug!("read_enum_struct_variant()");
 493             let idx = try!(self._next_uint(EsEnumVid));
 494             debug!("  idx={}", idx);
 495
 496             let doc = try!(self.next_doc(EsEnumBody));
 497
 498             let (old_parent, old_pos) = (self.parent, self.pos);
 499             self.parent = doc;
 500             self.pos = self.parent.start;
 501
 502             let result = try!(f(self, idx));
 503
 504             self.parent = old_parent;
 505             self.pos = old_pos;
 506             Ok(result)
 507         }
 508
 509         fn read_enum_struct_variant_field<T>(&mut self,
 510                                              name: &str,
 511                                              idx: uint,
 512                                              f: |&mut Decoder<'doc>| -> DecodeResult<T>)
 513                                              -> DecodeResult<T> {
 514             debug!("read_enum_struct_variant_arg(name={}, idx={})", name, idx);
 515             f(self)
 516         }
 517
 518         fn read_struct<T>(&mut self,
 519                           name: &str,
 520                           _: uint,
 521                           f: |&mut Decoder<'doc>| -> DecodeResult<T>)
 522                           -> DecodeResult<T> {
 523             debug!("read_struct(name={})", name);
 524             f(self)
 525         }
 526
 527         fn read_struct_field<T>(&mut self,
 528                                 name: &str,
 529                                 idx: uint,
 530                                 f: |&mut Decoder<'doc>| -> DecodeResult<T>)
 531                                 -> DecodeResult<T> {
 532             debug!("read_struct_field(name={}, idx={})", name, idx);
 533             try!(self._check_label(name));
 534             f(self)
 535         }
 536
 537         fn read_tuple<T>(&mut self,
 538                          f: |&mut Decoder<'doc>, uint| -> DecodeResult<T>) -> DecodeResult<T> {
 539             debug!("read_tuple()");
 540             self.read_seq(f)
 541         }
 542
 543         fn read_tuple_arg<T>(&mut self, idx: uint, f: |&mut Decoder<'doc>| -> DecodeResult<T>)
 544                              -> DecodeResult<T> {
 545             debug!("read_tuple_arg(idx={})", idx);
 546             self.read_seq_elt(idx, f)
 547         }
 548
 549         fn read_tuple_struct<T>(&mut self,
 550                                 name: &str,
 551                                 f: |&mut Decoder<'doc>, uint| -> DecodeResult<T>)
 552                                 -> DecodeResult<T> {
 553             debug!("read_tuple_struct(name={})", name);
 554             self.read_tuple(f)
 555         }
 556
 557         fn read_tuple_struct_arg<T>(&mut self,
 558                                     idx: uint,
 559                                     f: |&mut Decoder<'doc>| -> DecodeResult<T>)
 560                                     -> DecodeResult<T> {
 561             debug!("read_tuple_struct_arg(idx={})", idx);
 562             self.read_tuple_arg(idx, f)
 563         }
 564
 565         fn read_option<T>(&mut self,
 566                           f: |&mut Decoder<'doc>, bool| -> DecodeResult<T>) -> DecodeResult<T> {
 567             debug!("read_option()");
 568             self.read_enum("Option", |this| {
 569                 this.read_enum_variant(["None", "Some"], |this, idx| {
 570                     match idx {
 571                         0 => f(this, false),
 572                         1 => f(this, true),
 573                         _ => Err(Expected(format!("Expected None or Some"))),
 574                     }
 575                 })
 576             })
 577         }
 578
 579         fn read_seq<T>(&mut self,
 580                        f: |&mut Decoder<'doc>, uint| -> DecodeResult<T>) -> DecodeResult<T> {
 581             debug!("read_seq()");
 582             self.push_doc(EsVec, |d| {
 583                 let len = try!(d._next_uint(EsVecLen));
 584                 debug!("  len={}", len);
 585                 f(d, len)
 586             })
 587         }
 588
 589         fn read_seq_elt<T>(&mut self, idx: uint, f: |&mut Decoder<'doc>| -> DecodeResult<T>)
 590                            -> DecodeResult<T> {
 591             debug!("read_seq_elt(idx={})", idx);
 592             self.push_doc(EsVecElt, f)
 593         }
 594
 595         fn read_map<T>(&mut self,
 596                        f: |&mut Decoder<'doc>, uint| -> DecodeResult<T>) -> DecodeResult<T> {
 597             debug!("read_map()");
 598             self.push_doc(EsMap, |d| {
 599                 let len = try!(d._next_uint(EsMapLen));
 600                 debug!("  len={}", len);
 601                 f(d, len)
 602             })
 603         }
 604
 605         fn read_map_elt_key<T>(&mut self, idx: uint, f: |&mut Decoder<'doc>| -> DecodeResult<T>)
 606                                -> DecodeResult<T> {
 607             debug!("read_map_elt_key(idx={})", idx);
 608             self.push_doc(EsMapKey, f)
 609         }
 610
 611         fn read_map_elt_val<T>(&mut self, idx: uint, f: |&mut Decoder<'doc>| -> DecodeResult<T>)
 612                                -> DecodeResult<T> {
 613             debug!("read_map_elt_val(idx={})", idx);
 614             self.push_doc(EsMapVal, f)
 615         }
 616     }
 617 }
 618
 619 pub mod writer {
 620     use std::cast;
 621     use std::clone::Clone;
 622     use std::io;
 623     use std::io::{Writer, Seek};
 624     use std::io::extensions::u64_to_be_bytes;
 625
 626     use super::{ EsVec, EsMap, EsEnum, EsVecLen, EsVecElt, EsMapLen, EsMapKey,
 627         EsEnumVid, EsU64, EsU32, EsU16, EsU8, EsInt, EsI64, EsI32, EsI16, EsI8,
 628         EsBool, EsF64, EsF32, EsChar, EsStr, EsMapVal, EsEnumBody, EsUint,
 629         EsOpaque, EsLabel, EbmlEncoderTag };
 630
 631     use serialize;
 632
 633
 634     pub type EncodeResult = io::IoResult<()>;
 635
 636     // ebml writing
 637     pub struct Encoder<'a, W> {
 638         writer: &'a mut W,
 639         priv size_positions: ~[uint],
 640     }
 641
 642     fn write_sized_vuint<W: Writer>(w: &mut W, n: uint, size: uint) -> EncodeResult {
 643         match size {
 644             1u => w.write(&[0x80u8 | (n as u8)]),
 645             2u => w.write(&[0x40u8 | ((n >> 8_u) as u8), n as u8]),
 646             3u => w.write(&[0x20u8 | ((n >> 16_u) as u8), (n >> 8_u) as u8,
 647                             n as u8]),
 648             4u => w.write(&[0x10u8 | ((n >> 24_u) as u8), (n >> 16_u) as u8,
 649                             (n >> 8_u) as u8, n as u8]),
 650             _ => Err(io::IoError {
 651                 kind: io::OtherIoError,
 652                 desc: "int too big",
 653                 detail: Some(format!("{}", n))
 654             })
 655         }
 656     }
 657
 658     fn write_vuint<W: Writer>(w: &mut W, n: uint) -> EncodeResult {
 659         if n < 0x7f_u { return write_sized_vuint(w, n, 1u); }
 660         if n < 0x4000_u { return write_sized_vuint(w, n, 2u); }
 661         if n < 0x200000_u { return write_sized_vuint(w, n, 3u); }
 662         if n < 0x10000000_u { return write_sized_vuint(w, n, 4u); }
 663         Err(io::IoError {
 664             kind: io::OtherIoError,
 665             desc: "int too big",
 666             detail: Some(format!("{}", n))
 667         })
 668     }
 669
 670     pub fn Encoder<'a, W: Writer + Seek>(w: &'a mut W) -> Encoder<'a, W> {
 671         let size_positions: ~[uint] = ~[];
 672         Encoder {
 673             writer: w,
 674             size_positions: size_positions,
 675         }
 676     }
 677
 678     // FIXME (#2741): Provide a function to write the standard ebml header.
 679     impl<'a, W: Writer + Seek> Encoder<'a, W> {
 680         /// FIXME(pcwalton): Workaround for badness in trans. DO NOT USE ME.
 681         pub unsafe fn unsafe_clone(&self) -> Encoder<'a, W> {
 682             Encoder {
 683                 writer: cast::transmute_copy(&self.writer),
 684                 size_positions: self.size_positions.clone(),
 685             }
 686         }
 687
 688         pub fn start_tag(&mut self, tag_id: uint) -> EncodeResult {
 689             debug!("Start tag {}", tag_id);
 690
 691             // Write the enum ID:
 692             try!(write_vuint(self.writer, tag_id));
 693
 694             // Write a placeholder four-byte size.
 695             self.size_positions.push(try!(self.writer.tell()) as uint);
 696             let zeroes: &[u8] = &[0u8, 0u8, 0u8, 0u8];
 697             self.writer.write(zeroes)
 698         }
 699
 700         pub fn end_tag(&mut self) -> EncodeResult {
 701             let last_size_pos = self.size_positions.pop().unwrap();
 702             let cur_pos = try!(self.writer.tell());
 703             try!(self.writer.seek(last_size_pos as i64, io::SeekSet));
 704             let size = cur_pos as uint - last_size_pos - 4;
 705             try!(write_sized_vuint(self.writer, size, 4u));
 706             let r = try!(self.writer.seek(cur_pos as i64, io::SeekSet));
 707
 708             debug!("End tag (size = {})", size);
 709             Ok(r)
 710         }
 711
 712         pub fn wr_tag(&mut self, tag_id: uint, blk: || -> EncodeResult) -> EncodeResult {
 713             try!(self.start_tag(tag_id));
 714             try!(blk());
 715             self.end_tag()
 716         }
 717
 718         pub fn wr_tagged_bytes(&mut self, tag_id: uint, b: &[u8]) -> EncodeResult {
 719             try!(write_vuint(self.writer, tag_id));
 720             try!(write_vuint(self.writer, b.len()));
 721             self.writer.write(b)
 722         }
 723
 724         pub fn wr_tagged_u64(&mut self, tag_id: uint, v: u64) -> EncodeResult {
 725             u64_to_be_bytes(v, 8u, |v| {
 726                 self.wr_tagged_bytes(tag_id, v)
 727             })
 728         }
 729
 730         pub fn wr_tagged_u32(&mut self, tag_id: uint, v: u32)  -> EncodeResult{
 731             u64_to_be_bytes(v as u64, 4u, |v| {
 732                 self.wr_tagged_bytes(tag_id, v)
 733             })
 734         }
 735
 736         pub fn wr_tagged_u16(&mut self, tag_id: uint, v: u16) -> EncodeResult {
 737             u64_to_be_bytes(v as u64, 2u, |v| {
 738                 self.wr_tagged_bytes(tag_id, v)
 739             })
 740         }
 741
 742         pub fn wr_tagged_u8(&mut self, tag_id: uint, v: u8) -> EncodeResult {
 743             self.wr_tagged_bytes(tag_id, &[v])
 744         }
 745
 746         pub fn wr_tagged_i64(&mut self, tag_id: uint, v: i64) -> EncodeResult {
 747             u64_to_be_bytes(v as u64, 8u, |v| {
 748                 self.wr_tagged_bytes(tag_id, v)
 749             })
 750         }
 751
 752         pub fn wr_tagged_i32(&mut self, tag_id: uint, v: i32) -> EncodeResult {
 753             u64_to_be_bytes(v as u64, 4u, |v| {
 754                 self.wr_tagged_bytes(tag_id, v)
 755             })
 756         }
 757
 758         pub fn wr_tagged_i16(&mut self, tag_id: uint, v: i16) -> EncodeResult {
 759             u64_to_be_bytes(v as u64, 2u, |v| {
 760                 self.wr_tagged_bytes(tag_id, v)
 761             })
 762         }
 763
 764         pub fn wr_tagged_i8(&mut self, tag_id: uint, v: i8) -> EncodeResult {
 765             self.wr_tagged_bytes(tag_id, &[v as u8])
 766         }
 767
 768         pub fn wr_tagged_str(&mut self, tag_id: uint, v: &str) -> EncodeResult {
 769             self.wr_tagged_bytes(tag_id, v.as_bytes())
 770         }
 771
 772         pub fn wr_bytes(&mut self, b: &[u8]) -> EncodeResult {
 773             debug!("Write {} bytes", b.len());
 774             self.writer.write(b)
 775         }
 776
 777         pub fn wr_str(&mut self, s: &str) -> EncodeResult {
 778             debug!("Write str: {}", s);
 779             self.writer.write(s.as_bytes())
 780         }
 781     }
 782
 783     // FIXME (#2743): optionally perform "relaxations" on end_tag to more
 784     // efficiently encode sizes; this is a fixed point iteration
 785
 786     // Set to true to generate more debugging in EBML code.
 787     // Totally lame approach.
 788     static DEBUG: bool = true;
 789
 790     impl<'a, W: Writer + Seek> Encoder<'a, W> {
 791         // used internally to emit things like the vector length and so on
 792         fn _emit_tagged_uint(&mut self, t: EbmlEncoderTag, v: uint) -> EncodeResult {
 793             assert!(v <= 0xFFFF_FFFF_u);
 794             self.wr_tagged_u32(t as uint, v as u32)
 795         }
 796
 797         fn _emit_label(&mut self, label: &str) -> EncodeResult {
 798             // There are various strings that we have access to, such as
 799             // the name of a record field, which do not actually appear in
 800             // the encoded EBML (normally).  This is just for
 801             // efficiency.  When debugging, though, we can emit such
 802             // labels and then they will be checked by decoder to
 803             // try and check failures more quickly.
 804             if DEBUG { self.wr_tagged_str(EsLabel as uint, label) }
 805             else { Ok(()) }
 806         }
 807
 808         pub fn emit_opaque(&mut self, f: |&mut Encoder<W>| -> EncodeResult) -> EncodeResult {
 809             try!(self.start_tag(EsOpaque as uint));
 810             try!(f(self));
 811             self.end_tag()
 812         }
 813     }
 814
 815     impl<'a, W: Writer + Seek> serialize::Encoder<io::IoError> for Encoder<'a, W> {
 816         fn emit_nil(&mut self) -> EncodeResult {
 817             Ok(())
 818         }
 819
 820         fn emit_uint(&mut self, v: uint) -> EncodeResult {
 821             self.wr_tagged_u64(EsUint as uint, v as u64)
 822         }
 823         fn emit_u64(&mut self, v: u64) -> EncodeResult {
 824             self.wr_tagged_u64(EsU64 as uint, v)
 825         }
 826         fn emit_u32(&mut self, v: u32) -> EncodeResult {
 827             self.wr_tagged_u32(EsU32 as uint, v)
 828         }
 829         fn emit_u16(&mut self, v: u16) -> EncodeResult {
 830             self.wr_tagged_u16(EsU16 as uint, v)
 831         }
 832         fn emit_u8(&mut self, v: u8) -> EncodeResult {
 833             self.wr_tagged_u8(EsU8 as uint, v)
 834         }
 835
 836         fn emit_int(&mut self, v: int) -> EncodeResult {
 837             self.wr_tagged_i64(EsInt as uint, v as i64)
 838         }
 839         fn emit_i64(&mut self, v: i64) -> EncodeResult {
 840             self.wr_tagged_i64(EsI64 as uint, v)
 841         }
 842         fn emit_i32(&mut self, v: i32) -> EncodeResult {
 843             self.wr_tagged_i32(EsI32 as uint, v)
 844         }
 845         fn emit_i16(&mut self, v: i16) -> EncodeResult {
 846             self.wr_tagged_i16(EsI16 as uint, v)
 847         }
 848         fn emit_i8(&mut self, v: i8) -> EncodeResult {
 849             self.wr_tagged_i8(EsI8 as uint, v)
 850         }
 851
 852         fn emit_bool(&mut self, v: bool) -> EncodeResult {
 853             self.wr_tagged_u8(EsBool as uint, v as u8)
 854         }
 855
 856         fn emit_f64(&mut self, v: f64) -> EncodeResult {
 857             let bits = unsafe { cast::transmute(v) };
 858             self.wr_tagged_u64(EsF64 as uint, bits)
 859         }
 860         fn emit_f32(&mut self, v: f32) -> EncodeResult {
 861             let bits = unsafe { cast::transmute(v) };
 862             self.wr_tagged_u32(EsF32 as uint, bits)
 863         }
 864         fn emit_char(&mut self, v: char) -> EncodeResult {
 865             self.wr_tagged_u32(EsChar as uint, v as u32)
 866         }
 867
 868         fn emit_str(&mut self, v: &str) -> EncodeResult {
 869             self.wr_tagged_str(EsStr as uint, v)
 870         }
 871
 872         fn emit_enum(&mut self,
 873                      name: &str,
 874                      f: |&mut Encoder<'a, W>| -> EncodeResult) -> EncodeResult {
 875             try!(self._emit_label(name));
 876             try!(self.start_tag(EsEnum as uint));
 877             try!(f(self));
 878             self.end_tag()
 879         }
 880
 881         fn emit_enum_variant(&mut self,
 882                              _: &str,
 883                              v_id: uint,
 884                              _: uint,
 885                              f: |&mut Encoder<'a, W>| -> EncodeResult) -> EncodeResult {
 886             try!(self._emit_tagged_uint(EsEnumVid, v_id));
 887             try!(self.start_tag(EsEnumBody as uint));
 888             try!(f(self));
 889             self.end_tag()
 890         }
 891
 892         fn emit_enum_variant_arg(&mut self,
 893                                  _: uint,
 894                                  f: |&mut Encoder<'a, W>| -> EncodeResult) -> EncodeResult {
 895             f(self)
 896         }
 897
 898         fn emit_enum_struct_variant(&mut self,
 899                                     v_name: &str,
 900                                     v_id: uint,
 901                                     cnt: uint,
 902                                     f: |&mut Encoder<'a, W>| -> EncodeResult) -> EncodeResult {
 903             self.emit_enum_variant(v_name, v_id, cnt, f)
 904         }
 905
 906         fn emit_enum_struct_variant_field(&mut self,
 907                                           _: &str,
 908                                           idx: uint,
 909                                           f: |&mut Encoder<'a, W>| -> EncodeResult)
 910             -> EncodeResult {
 911             self.emit_enum_variant_arg(idx, f)
 912         }
 913
 914         fn emit_struct(&mut self,
 915                        _: &str,
 916                        _len: uint,
 917                        f: |&mut Encoder<'a, W>| -> EncodeResult) -> EncodeResult {
 918             f(self)
 919         }
 920
 921         fn emit_struct_field(&mut self,
 922                              name: &str,
 923                              _: uint,
 924                              f: |&mut Encoder<'a, W>| -> EncodeResult) -> EncodeResult {
 925             try!(self._emit_label(name));
 926             f(self)
 927         }
 928
 929         fn emit_tuple(&mut self,
 930                       len: uint,
 931                       f: |&mut Encoder<'a, W>| -> EncodeResult) -> EncodeResult {
 932             self.emit_seq(len, f)
 933         }
 934         fn emit_tuple_arg(&mut self,
 935                           idx: uint,
 936                           f: |&mut Encoder<'a, W>| -> EncodeResult) -> EncodeResult {
 937             self.emit_seq_elt(idx, f)
 938         }
 939
 940         fn emit_tuple_struct(&mut self,
 941                              _: &str,
 942                              len: uint,
 943                              f: |&mut Encoder<'a, W>| -> EncodeResult) -> EncodeResult {
 944             self.emit_seq(len, f)
 945         }
 946         fn emit_tuple_struct_arg(&mut self,
 947                                  idx: uint,
 948                                  f: |&mut Encoder<'a, W>| -> EncodeResult) -> EncodeResult {
 949             self.emit_seq_elt(idx, f)
 950         }
 951
 952         fn emit_option(&mut self,
 953                        f: |&mut Encoder<'a, W>| -> EncodeResult) -> EncodeResult {
 954             self.emit_enum("Option", f)
 955         }
 956         fn emit_option_none(&mut self) -> EncodeResult {
 957             self.emit_enum_variant("None", 0, 0, |_| Ok(()))
 958         }
 959         fn emit_option_some(&mut self,
 960                             f: |&mut Encoder<'a, W>| -> EncodeResult) -> EncodeResult {
 961
 962             self.emit_enum_variant("Some", 1, 1, f)
 963         }
 964
 965         fn emit_seq(&mut self,
 966                     len: uint,
 967                     f: |&mut Encoder<'a, W>| -> EncodeResult) -> EncodeResult {
 968
 969             try!(self.start_tag(EsVec as uint));
 970             try!(self._emit_tagged_uint(EsVecLen, len));
 971             try!(f(self));
 972             self.end_tag()
 973         }
 974
 975         fn emit_seq_elt(&mut self,
 976                         _idx: uint,
 977                         f: |&mut Encoder<'a, W>| -> EncodeResult) -> EncodeResult {
 978
 979             try!(self.start_tag(EsVecElt as uint));
 980             try!(f(self));
 981             self.end_tag()
 982         }
 983
 984         fn emit_map(&mut self,
 985                     len: uint,
 986                     f: |&mut Encoder<'a, W>| -> EncodeResult) -> EncodeResult {
 987
 988             try!(self.start_tag(EsMap as uint));
 989             try!(self._emit_tagged_uint(EsMapLen, len));
 990             try!(f(self));
 991             self.end_tag()
 992         }
 993
 994         fn emit_map_elt_key(&mut self,
 995                             _idx: uint,
 996                             f: |&mut Encoder<'a, W>| -> EncodeResult) -> EncodeResult {
 997
 998             try!(self.start_tag(EsMapKey as uint));
 999             try!(f(self));
1000             self.end_tag()
1001         }
1002
1003         fn emit_map_elt_val(&mut self,
1004                             _idx: uint,
1005                             f: |&mut Encoder<'a, W>| -> EncodeResult) -> EncodeResult {
1006             try!(self.start_tag(EsMapVal as uint));
1007             try!(f(self));
1008             self.end_tag()
1009         }
1010     }
1011 }
1012
1013 // ___________________________________________________________________________
1014 // Testing
1015
1016 #[cfg(test)]
1017 mod tests {
1018     use ebml::reader;
1019     use ebml::writer;
1020     use {Encodable, Decodable};
1021
1022     use std::io::MemWriter;
1023     use std::option::{None, Option, Some};
1024
1025     #[test]
1026     fn test_vuint_at() {
1027         let data = [
1028             0x80,
1029             0xff,
1030             0x40, 0x00,
1031             0x7f, 0xff,
1032             0x20, 0x00, 0x00,
1033             0x3f, 0xff, 0xff,
1034             0x10, 0x00, 0x00, 0x00,
1035             0x1f, 0xff, 0xff, 0xff
1036         ];
1037
1038         let mut res: reader::Res;
1039
1040         // Class A
1041         res = reader::vuint_at(data, 0).unwrap();
1042         assert_eq!(res.val, 0);
1043         assert_eq!(res.next, 1);
1044         res = reader::vuint_at(data, res.next).unwrap();
1045         assert_eq!(res.val, (1 << 7) - 1);
1046         assert_eq!(res.next, 2);
1047
1048         // Class B
1049         res = reader::vuint_at(data, res.next).unwrap();
1050         assert_eq!(res.val, 0);
1051         assert_eq!(res.next, 4);
1052         res = reader::vuint_at(data, res.next).unwrap();
1053         assert_eq!(res.val, (1 << 14) - 1);
1054         assert_eq!(res.next, 6);
1055
1056         // Class C
1057         res = reader::vuint_at(data, res.next).unwrap();
1058         assert_eq!(res.val, 0);
1059         assert_eq!(res.next, 9);
1060         res = reader::vuint_at(data, res.next).unwrap();
1061         assert_eq!(res.val, (1 << 21) - 1);
1062         assert_eq!(res.next, 12);
1063
1064         // Class D
1065         res = reader::vuint_at(data, res.next).unwrap();
1066         assert_eq!(res.val, 0);
1067         assert_eq!(res.next, 16);
1068         res = reader::vuint_at(data, res.next).unwrap();
1069         assert_eq!(res.val, (1 << 28) - 1);
1070         assert_eq!(res.next, 20);
1071     }
1072
1073     #[test]
1074     fn test_option_int() {
1075         fn test_v(v: Option<int>) {
1076             debug!("v == {:?}", v);
1077             let mut wr = MemWriter::new();
1078             {
1079                 let mut ebml_w = writer::Encoder(&mut wr);
1080                 let _ = v.encode(&mut ebml_w);
1081             }
1082             let ebml_doc = reader::Doc(wr.get_ref());
1083             let mut deser = reader::Decoder(ebml_doc);
1084             let v1 = Decodable::decode(&mut deser).unwrap();
1085             debug!("v1 == {:?}", v1);
1086             assert_eq!(v, v1);
1087         }
1088
1089         test_v(Some(22));
1090         test_v(None);
1091         test_v(Some(3));
1092     }
1093 }
1094
1095 #[cfg(test)]
1096 mod bench {
1097     extern crate test;
1098     use self::test::BenchHarness;
1099     use ebml::reader;
1100
1101     #[bench]
1102     pub fn vuint_at_A_aligned(bh: &mut BenchHarness) {
1103         use std::slice;
1104         let data = slice::from_fn(4*100, |i| {
1105             match i % 2 {
1106               0 => 0x80u8,
1107               _ => i as u8,
1108             }
1109         });
1110         let mut sum = 0u;
1111         bh.iter(|| {
1112             let mut i = 0;
1113             while i < data.len() {
1114                 sum += reader::vuint_at(data, i).unwrap().val;
1115                 i += 4;
1116             }
1117         });
1118     }
1119
1120     #[bench]
1121     pub fn vuint_at_A_unaligned(bh: &mut BenchHarness) {
1122         use std::slice;
1123         let data = slice::from_fn(4*100+1, |i| {
1124             match i % 2 {
1125               1 => 0x80u8,
1126               _ => i as u8
1127             }
1128         });
1129         let mut sum = 0u;
1130         bh.iter(|| {
1131             let mut i = 1;
1132             while i < data.len() {
1133                 sum += reader::vuint_at(data, i).unwrap().val;
1134                 i += 4;
1135             }
1136         });
1137     }
1138
1139     #[bench]
1140     pub fn vuint_at_D_aligned(bh: &mut BenchHarness) {
1141         use std::slice;
1142         let data = slice::from_fn(4*100, |i| {
1143             match i % 4 {
1144               0 => 0x10u8,
1145               3 => i as u8,
1146               _ => 0u8
1147             }
1148         });
1149         let mut sum = 0u;
1150         bh.iter(|| {
1151             let mut i = 0;
1152             while i < data.len() {
1153                 sum += reader::vuint_at(data, i).unwrap().val;
1154                 i += 4;
1155             }
1156         });
1157     }
1158
1159     #[bench]
1160     pub fn vuint_at_D_unaligned(bh: &mut BenchHarness) {
1161         use std::slice;
1162         let data = slice::from_fn(4*100+1, |i| {
1163             match i % 4 {
1164               1 => 0x10u8,
1165               0 => i as u8,
1166               _ => 0u8
1167             }
1168         });
1169         let mut sum = 0u;
1170         bh.iter(|| {
1171             let mut i = 1;
1172             while i < data.len() {
1173                 sum += reader::vuint_at(data, i).unwrap().val;
1174                 i += 4;
1175             }
1176         });
1177     }
1178 }