// option. This file may not be copied, modified, or distributed
// except according to those terms.
+use middle::def_id::{DefId, DefIndex};
+use rbml;
+use rbml::writer::Encoder;
use std::io::{Cursor, Write};
use std::slice;
use std::u32;
-use syntax::ast::NodeId;
-#[derive(Copy, Clone, PartialEq, PartialOrd, Eq, Ord)]
-pub struct IndexEntry {
- pub node: NodeId,
- pub pos: u64
-}
-
-#[derive(Debug)]
-pub struct IndexArrayEntry {
- bits: u32,
- first_pos: u32
+/// As part of the metadata, we generate an index that stores, for
+/// each DefIndex, the position of the corresponding RBML document (if
+/// any). This is just a big `[u32]` slice, where an entry of
+/// `u32::MAX` indicates that there is no RBML document. This little
+/// struct just stores the offsets within the metadata of the start
+/// and end of this slice. These are actually part of an RBML
+/// document, but for looking things up in the metadata, we just
+/// discard the RBML positioning and jump directly to the data.
+pub struct Index {
+ data_start: usize,
+ data_end: usize,
}
-impl IndexArrayEntry {
- fn encode_to<W: Write>(&self, b: &mut W) {
- write_be_u32(b, self.bits);
- write_be_u32(b, self.first_pos);
+impl Index {
+ /// Given the RBML doc representing the index, save the offests
+ /// for later.
+ pub fn from_rbml(index: rbml::Doc) -> Index {
+ Index { data_start: index.start, data_end: index.end }
}
- fn decode_from(b: &[u32]) -> Self {
- IndexArrayEntry {
- bits: b[0].to_be(),
- first_pos: b[1].to_be()
+ /// Given the metadata, extract out the offset of a particular
+ /// DefIndex (if any).
+ #[inline(never)]
+ pub fn lookup_item(&self, bytes: &[u8], def_index: DefIndex) -> Option<u32> {
+ let words = bytes_to_words(&bytes[self.data_start..self.data_end]);
+ let index = def_index.as_usize();
+
+ debug!("lookup_item: index={:?} words.len={:?}",
+ index, words.len());
+
+ let position = u32::from_be(words[index]);
+ if position == u32::MAX {
+ debug!("lookup_item: position=u32::MAX");
+ None
+ } else {
+ debug!("lookup_item: position={:?}", position);
+ Some(position)
}
}
}
-/// The Item Index
-///
-/// This index maps the NodeId of each item to its location in the
-/// metadata.
-///
-/// The index is a sparse bit-vector consisting of a index-array
-/// and a position-array. Each entry in the index-array handles 32 nodes.
-/// The first word is a bit-array consisting of the nodes that hold items,
-/// the second is the index of the first of the items in the position-array.
-/// If there is a large set of non-item trailing nodes, they can be omitted
-/// from the index-array.
-///
-/// The index is serialized as an array of big-endian 32-bit words.
-/// The first word is the number of items in the position-array.
-/// Then, for each item, its position in the metadata follows.
-/// After that the index-array is stored.
-///
-/// struct index {
-/// u32 item_count;
-/// u32 items[self.item_count];
-/// struct { u32 bits; u32 offset; } positions[..];
-/// }
-pub struct Index {
- position_start: usize,
- index_start: usize,
- index_end: usize,
+/// While we are generating the metadata, we also track the position
+/// of each DefIndex. It is not required that all definitions appear
+/// in the metadata, nor that they are serialized in order, and
+/// therefore we first allocate the vector here and fill it with
+/// `u32::MAX`. Whenever an index is visited, we fill in the
+/// appropriate spot by calling `record_position`. We should never
+/// visit the same index twice.
+pub struct IndexData {
+ positions: Vec<u32>,
}
-pub fn write_index(mut entries: Vec<IndexEntry>, buf: &mut Cursor<Vec<u8>>) {
- assert!(entries.len() < u32::MAX as usize);
- entries.sort();
-
- let mut last_entry = IndexArrayEntry { bits: 0, first_pos: 0 };
-
- write_be_u32(buf, entries.len() as u32);
- for &IndexEntry { pos, .. } in &entries {
- assert!(pos < u32::MAX as u64);
- write_be_u32(buf, pos as u32);
+impl IndexData {
+ pub fn new(max_index: usize) -> IndexData {
+ IndexData {
+ positions: vec![u32::MAX; max_index]
+ }
}
- let mut pos_in_index_array = 0;
- for (i, &IndexEntry { node, .. }) in entries.iter().enumerate() {
- let (x, s) = (node / 32 as u32, node % 32 as u32);
- while x > pos_in_index_array {
- pos_in_index_array += 1;
- last_entry.encode_to(buf);
- last_entry = IndexArrayEntry { bits: 0, first_pos: i as u32 };
- }
- last_entry.bits |= 1<<s;
+ pub fn record(&mut self, def_id: DefId, encoder: &mut Encoder) {
+ assert!(def_id.is_local());
+ self.record_index(def_id.index, encoder)
}
- last_entry.encode_to(buf);
- info!("write_index: {} items, {} array entries",
- entries.len(), pos_in_index_array);
-}
+ pub fn record_index(&mut self, item: DefIndex, encoder: &mut Encoder) {
+ let item = item.as_usize();
-impl Index {
- fn lookup_index(&self, index: &[u32], i: u32) -> Option<IndexArrayEntry> {
- let ix = (i as usize)*2;
- if ix >= index.len() {
- None
- } else {
- Some(IndexArrayEntry::decode_from(&index[ix..ix+2]))
- }
- }
+ let position = encoder.mark_stable_position();
- fn item_from_pos(&self, positions: &[u32], pos: u32) -> u32 {
- positions[pos as usize].to_be()
- }
+ assert!(position < (u32::MAX as u64));
+ let position = position as u32;
- #[inline(never)]
- pub fn lookup_item(&self, buf: &[u8], node: NodeId) -> Option<u32> {
- let index = bytes_to_words(&buf[self.index_start..self.index_end]);
- let positions = bytes_to_words(&buf[self.position_start..self.index_start]);
- let (x, s) = (node / 32 as u32, node % 32 as u32);
- let result = match self.lookup_index(index, x) {
- Some(IndexArrayEntry { bits, first_pos }) => {
- let bit = 1<<s;
- if bits & bit == 0 {
- None
- } else {
- let prev_nodes_for_entry = (bits&(bit-1)).count_ones();
- Some(self.item_from_pos(
- positions,
- first_pos+prev_nodes_for_entry))
- }
- }
- None => None // trailing zero
- };
- debug!("lookup_item({:?}) = {:?}", node, result);
- result
+ assert!(self.positions[item] == u32::MAX,
+ "recorded position for item {:?} twice, first at {:?} and now at {:?}",
+ item, self.positions[item], position);
+
+ self.positions[item] = position;
}
- pub fn from_buf(buf: &[u8], start: usize, end: usize) -> Self {
- let buf = bytes_to_words(&buf[start..end]);
- let position_count = buf[0].to_be() as usize;
- let position_len = position_count*4;
- info!("loaded index - position: {}-{}-{}", start, start+position_len, end);
- debug!("index contents are {:?}",
- buf.iter().map(|b| format!("{:08x}", b)).collect::<Vec<_>>().concat());
- assert!(end-4-start >= position_len);
- assert_eq!((end-4-start-position_len)%8, 0);
- Index {
- position_start: start+4,
- index_start: start+position_len+4,
- index_end: end
+ pub fn write_index(&self, buf: &mut Cursor<Vec<u8>>) {
+ for &position in &self.positions {
+ write_be_u32(buf, position);
}
}
}
assert!(b.len() % 4 == 0);
unsafe { slice::from_raw_parts(b.as_ptr() as *const u32, b.len()/4) }
}
-
-#[test]
-fn test_index() {
- let entries = vec![
- IndexEntry { node: 0, pos: 17 },
- IndexEntry { node: 31, pos: 29 },
- IndexEntry { node: 32, pos: 1175 },
- IndexEntry { node: 191, pos: 21 },
- IndexEntry { node: 128, pos: 34 },
- IndexEntry { node: 145, pos: 70 },
- IndexEntry { node: 305, pos: 93214 },
- IndexEntry { node: 138, pos: 64 },
- IndexEntry { node: 129, pos: 53 },
- IndexEntry { node: 192, pos: 33334 },
- IndexEntry { node: 200, pos: 80123 },
- ];
- let mut c = Cursor::new(vec![]);
- write_index(entries.clone(), &mut c);
- let mut buf = c.into_inner();
- let expected: &[u8] = &[
- 0, 0, 0, 11, // # entries
- // values:
- 0,0,0,17, 0,0,0,29, 0,0,4,151, 0,0,0,34,
- 0,0,0,53, 0,0,0,64, 0,0,0,70, 0,0,0,21,
- 0,0,130,54, 0,1,56,251, 0,1,108,30,
- // index:
- 128,0,0,1,0,0,0,0, 0,0,0,1,0,0,0,2,
- 0,0,0,0,0,0,0,3, 0,0,0,0,0,0,0,3,
- 0,2,4,3,0,0,0,3, 128,0,0,0,0,0,0,7,
- 0,0,1,1,0,0,0,8, 0,0,0,0,0,0,0,10,
- 0,0,0,0,0,0,0,10, 0,2,0,0,0,0,0,10
- ];
- assert_eq!(buf, expected);
-
- // insert some junk padding
- for i in 0..17 { buf.insert(0, i); buf.push(i) }
- let index = Index::from_buf(&buf, 17, buf.len()-17);
-
- // test round-trip
- for i in 0..4096 {
- assert_eq!(index.lookup_item(&buf, i),
- entries.iter().find(|e| e.node == i).map(|n| n.pos as u32));
- }
-}