2 use crate::raw_emitter::RawEmitter;
3 use std::convert::TryInto;
4 use std::fmt::Write as _;
7 /// This will get packed into a single u32 before inserting into the data set.
8 #[derive(Debug, PartialEq)]
9 struct ShortOffsetRunHeader {
10 /// Note, we only allow for 21 bits here.
13 /// Note, we actually only allow for 11 bits here. This should be enough --
14 /// our largest sets are around ~1400 offsets long.
18 impl ShortOffsetRunHeader {
19 fn pack(&self) -> u32 {
20 assert!(self.start_idx < (1 << 11));
21 assert!(self.prefix_sum < (1 << 21));
23 (self.start_idx as u32) << 21 | self.prefix_sum
28 pub fn emit_skiplist(&mut self, ranges: &[Range<u32>]) {
29 let mut offsets = Vec::<u32>::new();
30 let points = ranges.iter().flat_map(|r| vec![r.start, r.end]).collect::<Vec<u32>>();
33 let delta = pt - offset;
37 // Guaranteed to terminate, as it's impossible to subtract a value this
38 // large from a valid char.
39 offsets.push(std::char::MAX as u32 + 1);
40 let mut coded_offsets: Vec<u8> = Vec::new();
41 let mut short_offset_runs: Vec<ShortOffsetRunHeader> = vec![];
42 let mut iter = offsets.iter().cloned();
43 let mut prefix_sum = 0;
45 let mut any_elements = false;
46 let mut inserted = false;
47 let start = coded_offsets.len();
48 for offset in iter.by_ref() {
51 if let Ok(offset) = offset.try_into() {
52 coded_offsets.push(offset);
54 short_offset_runs.push(ShortOffsetRunHeader {
55 start_idx: start.try_into().unwrap(),
58 // This is just needed to maintain indices even/odd
60 coded_offsets.push(0);
68 // We always append the huge char::MAX offset to the end which
69 // should never be able to fit into the u8 offsets.
75 "static SHORT_OFFSET_RUNS: [u32; {}] = [{}];",
76 short_offset_runs.len(),
77 fmt_list(short_offset_runs.iter().map(|v| v.pack()))
80 self.bytes_used += 4 * short_offset_runs.len();
83 "static OFFSETS: [u8; {}] = [{}];",
85 fmt_list(&coded_offsets)
88 self.bytes_used += coded_offsets.len();
90 writeln!(&mut self.file, "pub fn lookup(c: char) -> bool {{").unwrap();
91 writeln!(&mut self.file, " super::skip_search(",).unwrap();
92 writeln!(&mut self.file, " c as u32,").unwrap();
93 writeln!(&mut self.file, " &SHORT_OFFSET_RUNS,").unwrap();
94 writeln!(&mut self.file, " &OFFSETS,").unwrap();
95 writeln!(&mut self.file, " )").unwrap();
96 writeln!(&mut self.file, "}}").unwrap();