1 // Copyright 2012 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
13 The CodeMap tracks all the source code used within a single crate, mapping
14 from integer byte positions to the original source code location. Each bit of
15 source parsed during crate parsing (typically files, in-memory strings, or
16 various bits of macro expansion) cover a continuous range of bytes in the
17 CodeMap and are represented by FileMaps. Byte positions are stored in `spans`
18 and used pervasively in the compiler. They are absolute positions within the
19 CodeMap, which upon request can be converted to line and column information,
20 source code snippets, etc.
25 use std::serialize::{Encodable, Decodable, Encoder, Decoder};
28 fn from_uint(n: uint) -> Self;
29 fn to_uint(&self) -> uint;
34 pub struct BytePos(uint);
35 /// A character offset. Because of multibyte utf8 characters, a byte offset
36 /// is not equivalent to a character offset. The CodeMap will convert BytePos
37 /// values to CharPos values as necessary.
39 pub struct CharPos(uint);
41 // XXX: Lots of boilerplate in these impls, but so far my attempts to fix
42 // have been unsuccessful
44 impl Pos for BytePos {
45 fn from_uint(n: uint) -> BytePos { BytePos(n) }
46 fn to_uint(&self) -> uint { **self }
49 impl cmp::Ord for BytePos {
50 fn lt(&self, other: &BytePos) -> bool { **self < **other }
51 fn le(&self, other: &BytePos) -> bool { **self <= **other }
52 fn ge(&self, other: &BytePos) -> bool { **self >= **other }
53 fn gt(&self, other: &BytePos) -> bool { **self > **other }
56 impl Add<BytePos, BytePos> for BytePos {
57 fn add(&self, rhs: &BytePos) -> BytePos {
58 BytePos(**self + **rhs)
62 impl Sub<BytePos, BytePos> for BytePos {
63 fn sub(&self, rhs: &BytePos) -> BytePos {
64 BytePos(**self - **rhs)
68 impl to_bytes::IterBytes for BytePos {
69 fn iter_bytes(&self, lsb0: bool, f: to_bytes::Cb) {
70 (**self).iter_bytes(lsb0, f)
74 impl Pos for CharPos {
75 fn from_uint(n: uint) -> CharPos { CharPos(n) }
76 fn to_uint(&self) -> uint { **self }
79 impl cmp::Ord for CharPos {
80 fn lt(&self, other: &CharPos) -> bool { **self < **other }
81 fn le(&self, other: &CharPos) -> bool { **self <= **other }
82 fn ge(&self, other: &CharPos) -> bool { **self >= **other }
83 fn gt(&self, other: &CharPos) -> bool { **self > **other }
86 impl to_bytes::IterBytes for CharPos {
87 fn iter_bytes(&self, lsb0: bool, f: to_bytes::Cb) {
88 (**self).iter_bytes(lsb0, f)
92 impl Add<CharPos,CharPos> for CharPos {
93 fn add(&self, rhs: &CharPos) -> CharPos {
94 CharPos(**self + **rhs)
98 impl Sub<CharPos,CharPos> for CharPos {
99 fn sub(&self, rhs: &CharPos) -> CharPos {
100 CharPos(**self - **rhs)
105 Spans represent a region of code, used for error reporting. Positions in spans
106 are *absolute* positions from the beginning of the codemap, not positions
107 relative to FileMaps. Methods on the CodeMap can be used to relate spans back
108 to the original source.
113 expn_info: Option<@ExpnInfo>
119 pub struct spanned<T> { node: T, span: span }
121 impl cmp::Eq for span {
122 fn eq(&self, other: &span) -> bool {
123 return (*self).lo == (*other).lo && (*self).hi == (*other).hi;
125 fn ne(&self, other: &span) -> bool { !(*self).eq(other) }
128 impl<S:Encoder> Encodable<S> for span {
129 /* Note #1972 -- spans are encoded but not decoded */
130 fn encode(&self, _s: &S) { _s.emit_nil() }
133 impl<D:Decoder> Decodable<D> for span {
134 fn decode(_d: &D) -> span {
139 pub fn spanned<T>(lo: BytePos, hi: BytePos, t: T) -> spanned<T> {
140 respan(mk_sp(lo, hi), t)
143 pub fn respan<T>(sp: span, t: T) -> spanned<T> {
144 spanned {node: t, span: sp}
147 pub fn dummy_spanned<T>(t: T) -> spanned<T> {
148 respan(dummy_sp(), t)
151 /* assuming that we're not in macro expansion */
152 pub fn mk_sp(lo: BytePos, hi: BytePos) -> span {
153 span {lo: lo, hi: hi, expn_info: None}
156 // make this a const, once the compiler supports it
157 pub fn dummy_sp() -> span { return mk_sp(BytePos(0), BytePos(0)); }
161 /// A source code location used for error reporting
163 /// Information about the original source
165 /// The (1-based) line number
167 /// The (0-based) column offset
171 /// A source code location used as the result of lookup_char_pos_adj
172 // Actually, *none* of the clients use the filename *or* file field;
173 // perhaps they should just be removed.
174 pub struct LocWithOpt {
178 file: Option<@FileMap>,
181 // used to be structural records. Better names, anyone?
182 pub struct FileMapAndLine {fm: @FileMap, line: uint}
183 pub struct FileMapAndBytePos {fm: @FileMap, pos: BytePos}
184 pub struct NameAndSpan {name: ~str, span: Option<span>}
186 pub struct CallInfo {
191 /// Extra information for tracking macro expansion of spans
193 ExpandedFrom(CallInfo)
196 pub type FileName = ~str;
204 // represents the origin of a file:
205 pub enum FileSubstr {
206 // indicates that this is a normal standalone file:
208 // indicates that this "file" is actually a substring
209 // of another file that appears earlier in the codemap
210 pub FssInternal(span),
213 /// Identifies an offset of a multi-byte character in a FileMap
214 pub struct MultiByteChar {
215 /// The absolute offset of the character in the CodeMap
217 /// The number of bytes, >=2
221 /// A single source in the CodeMap
223 /// The name of the file that the source came from, source that doesn't
224 /// originate from files has names between angle brackets by convention,
227 /// Extra information used by qquote
229 /// The complete source code
231 /// The start position of this source in the CodeMap
233 /// Locations of lines beginnings in the source code
234 lines: @mut ~[BytePos],
235 /// Locations of multi-byte characters in the source code
236 multibyte_chars: @mut ~[MultiByteChar],
240 // EFFECT: register a start-of-line offset in the
241 // table of line-beginnings.
242 // UNCHECKED INVARIANT: these offsets must be added in the right
243 // order and must be in the right places; there is shared knowledge
244 // about what ends a line between this file and parse.rs
245 fn next_line(&self, pos: BytePos) {
246 // the new charpos must be > the last one (or it's the first one).
247 let lines = &mut *self.lines;
248 assert!((lines.len() == 0) || (lines[lines.len() - 1] < pos));
249 self.lines.push(pos);
252 // get a line from the list of pre-computed line-beginnings
253 pub fn get_line(&self, line: int) -> ~str {
254 let begin: BytePos = self.lines[line] - self.start_pos;
255 let begin = begin.to_uint();
256 let end = match str::find_char_from(*self.src, '\n', begin) {
258 None => str::len(*self.src)
260 str::slice(*self.src, begin, end).to_owned()
263 pub fn record_multibyte_char(&self, pos: BytePos, bytes: uint) {
264 assert!(bytes >=2 && bytes <= 4);
265 let mbc = MultiByteChar {
269 self.multibyte_chars.push(mbc);
274 files: @mut ~[@FileMap]
278 pub fn new() -> CodeMap {
284 /// Add a new FileMap to the CodeMap and return it
285 fn new_filemap(&self, filename: FileName, src: @~str) -> @FileMap {
286 return self.new_filemap_w_substr(filename, FssNone, src);
289 fn new_filemap_w_substr(
295 let files = &mut *self.files;
296 let start_pos = if files.len() == 0 {
299 let last_start = files.last().start_pos.to_uint();
300 let last_len = files.last().src.len();
301 last_start + last_len
304 let filemap = @FileMap {
305 name: filename, substr: substr, src: src,
306 start_pos: BytePos(start_pos),
308 multibyte_chars: @mut ~[],
311 self.files.push(filemap);
316 pub fn mk_substr_filename(&self, sp: span) -> ~str {
317 let pos = self.lookup_char_pos(sp.lo);
318 return fmt!("<%s:%u:%u>", pos.file.name,
319 pos.line, pos.col.to_uint());
322 /// Lookup source information about a BytePos
323 pub fn lookup_char_pos(&self, pos: BytePos) -> Loc {
324 return self.lookup_pos(pos);
327 pub fn lookup_char_pos_adj(&self, pos: BytePos) -> LocWithOpt
329 let loc = self.lookup_char_pos(pos);
330 match (loc.file.substr) {
333 filename: /* FIXME (#2543) */ copy loc.file.name,
336 file: Some(loc.file)},
338 self.lookup_char_pos_adj(
339 sp.lo + (pos - loc.file.start_pos)),
343 pub fn adjust_span(&self, sp: span) -> span {
344 let line = self.lookup_line(sp.lo);
345 match (line.fm.substr) {
348 self.adjust_span(span {
349 lo: s.lo + (sp.lo - line.fm.start_pos),
350 hi: s.lo + (sp.hi - line.fm.start_pos),
351 expn_info: sp.expn_info
357 pub fn span_to_str(&self, sp: span) -> ~str {
358 let files = &mut *self.files;
359 if files.len() == 0 && sp == dummy_sp() {
360 return ~"no-location";
363 let lo = self.lookup_char_pos_adj(sp.lo);
364 let hi = self.lookup_char_pos_adj(sp.hi);
365 return fmt!("%s:%u:%u: %u:%u", lo.filename,
366 lo.line, lo.col.to_uint(), hi.line, hi.col.to_uint())
369 pub fn span_to_filename(&self, sp: span) -> FileName {
370 let lo = self.lookup_char_pos(sp.lo);
371 return /* FIXME (#2543) */ copy lo.file.name;
374 pub fn span_to_lines(&self, sp: span) -> @FileLines {
375 let lo = self.lookup_char_pos(sp.lo);
376 let hi = self.lookup_char_pos(sp.hi);
378 for uint::range(lo.line - 1u, hi.line as uint) |i| {
381 return @FileLines {file: lo.file, lines: lines};
384 pub fn span_to_snippet(&self, sp: span) -> ~str {
385 let begin = self.lookup_byte_offset(sp.lo);
386 let end = self.lookup_byte_offset(sp.hi);
387 assert!(begin.fm.start_pos == end.fm.start_pos);
388 return str::slice(*begin.fm.src,
389 begin.pos.to_uint(), end.pos.to_uint()).to_owned();
392 pub fn get_filemap(&self, filename: ~str) -> @FileMap {
393 for self.files.each |fm| { if fm.name == filename { return *fm; } }
394 //XXjdm the following triggers a mismatched type bug
395 // (or expected function, found _|_)
396 fail!(); // ("asking for " + filename + " which we don't know about");
403 fn lookup_filemap_idx(&self, pos: BytePos) -> uint {
404 let files = &*self.files;
405 let len = files.len();
409 let m = (a + b) / 2u;
410 if self.files[m].start_pos > pos {
417 fail!(fmt!("position %u does not resolve to a source location",
424 fn lookup_line(&self, pos: BytePos) -> FileMapAndLine
426 let idx = self.lookup_filemap_idx(pos);
427 let f = self.files[idx];
429 let lines = &*f.lines;
430 let mut b = lines.len();
432 let m = (a + b) / 2u;
433 if lines[m] > pos { b = m; } else { a = m; }
435 return FileMapAndLine {fm: f, line: a};
438 fn lookup_pos(&self, pos: BytePos) -> Loc {
439 let FileMapAndLine {fm: f, line: a} = self.lookup_line(pos);
440 let line = a + 1u; // Line numbers start at 1
441 let chpos = self.bytepos_to_local_charpos(pos);
442 let linebpos = f.lines[a];
443 let linechpos = self.bytepos_to_local_charpos(linebpos);
444 debug!("codemap: byte pos %? is on the line at byte pos %?",
446 debug!("codemap: char pos %? is on the line at char pos %?",
448 debug!("codemap: byte is on line: %?", line);
449 assert!(chpos >= linechpos);
453 col: chpos - linechpos
457 fn span_to_str_no_adj(&self, sp: span) -> ~str {
458 let lo = self.lookup_char_pos(sp.lo);
459 let hi = self.lookup_char_pos(sp.hi);
460 return fmt!("%s:%u:%u: %u:%u", lo.file.name,
461 lo.line, lo.col.to_uint(), hi.line, hi.col.to_uint())
464 fn lookup_byte_offset(&self, bpos: BytePos)
465 -> FileMapAndBytePos {
466 let idx = self.lookup_filemap_idx(bpos);
467 let fm = self.files[idx];
468 let offset = bpos - fm.start_pos;
469 return FileMapAndBytePos {fm: fm, pos: offset};
472 // Converts an absolute BytePos to a CharPos relative to the file it is
474 fn bytepos_to_local_charpos(&self, bpos: BytePos) -> CharPos {
475 debug!("codemap: converting %? to char pos", bpos);
476 let idx = self.lookup_filemap_idx(bpos);
477 let map = self.files[idx];
479 // The number of extra bytes due to multibyte chars in the FileMap
480 let mut total_extra_bytes = 0;
482 for map.multibyte_chars.each |mbc| {
483 debug!("codemap: %?-byte char at %?", mbc.bytes, mbc.pos);
485 total_extra_bytes += mbc.bytes;
486 // We should never see a byte position in the middle of a
488 assert!(bpos == mbc.pos
489 || bpos.to_uint() >= mbc.pos.to_uint() + mbc.bytes);
495 CharPos(bpos.to_uint() - total_extra_bytes)
505 let cm = CodeMap::new();
506 let fm = cm.new_filemap(~"blork.rs",@~"first line.\nsecond line");
507 fm.next_line(BytePos(0));
508 assert_eq!(&fm.get_line(0),&~"first line.");
509 // TESTING BROKEN BEHAVIOR:
510 fm.next_line(BytePos(10));
511 assert_eq!(&fm.get_line(1),&~".");
517 let cm = CodeMap::new();
518 let fm = cm.new_filemap(~"blork.rs",@~"first line.\nsecond line");
519 // TESTING *REALLY* BROKEN BEHAVIOR:
520 fm.next_line(BytePos(0));
521 fm.next_line(BytePos(10));
522 fm.next_line(BytePos(2));