1 // Copyright 2012 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
13 The CodeMap tracks all the source code used within a single crate, mapping
14 from integer byte positions to the original source code location. Each bit of
15 source parsed during crate parsing (typically files, in-memory strings, or
16 various bits of macro expansion) cover a continuous range of bytes in the
17 CodeMap and are represented by FileMaps. Byte positions are stored in `spans`
18 and used pervasively in the compiler. They are absolute positions within the
19 CodeMap, which upon request can be converted to line and column information,
20 source code snippets, etc.
24 use serialize::{Encodable, Decodable, Encoder, Decoder};
25 use std::cell::RefCell;
27 use std::strbuf::StrBuf;
30 fn from_uint(n: uint) -> Self;
31 fn to_uint(&self) -> uint;
34 /// A byte offset. Keep this small (currently 32-bits), as AST contains
36 #[deriving(Clone, Eq, TotalEq, Hash, Ord, Show)]
37 pub struct BytePos(pub u32);
39 /// A character offset. Because of multibyte utf8 characters, a byte offset
40 /// is not equivalent to a character offset. The CodeMap will convert BytePos
41 /// values to CharPos values as necessary.
42 #[deriving(Eq, Hash, Ord, Show)]
43 pub struct CharPos(pub uint);
45 // FIXME: Lots of boilerplate in these impls, but so far my attempts to fix
46 // have been unsuccessful
48 impl Pos for BytePos {
49 fn from_uint(n: uint) -> BytePos { BytePos(n as u32) }
50 fn to_uint(&self) -> uint { let BytePos(n) = *self; n as uint }
53 impl Add<BytePos, BytePos> for BytePos {
54 fn add(&self, rhs: &BytePos) -> BytePos {
55 BytePos((self.to_uint() + rhs.to_uint()) as u32)
59 impl Sub<BytePos, BytePos> for BytePos {
60 fn sub(&self, rhs: &BytePos) -> BytePos {
61 BytePos((self.to_uint() - rhs.to_uint()) as u32)
65 impl Pos for CharPos {
66 fn from_uint(n: uint) -> CharPos { CharPos(n) }
67 fn to_uint(&self) -> uint { let CharPos(n) = *self; n }
70 impl Add<CharPos,CharPos> for CharPos {
71 fn add(&self, rhs: &CharPos) -> CharPos {
72 CharPos(self.to_uint() + rhs.to_uint())
76 impl Sub<CharPos,CharPos> for CharPos {
77 fn sub(&self, rhs: &CharPos) -> CharPos {
78 CharPos(self.to_uint() - rhs.to_uint())
83 Spans represent a region of code, used for error reporting. Positions in spans
84 are *absolute* positions from the beginning of the codemap, not positions
85 relative to FileMaps. Methods on the CodeMap can be used to relate spans back
86 to the original source.
88 #[deriving(Clone, Show, Hash)]
92 /// Information about where the macro came from, if this piece of
93 /// code was created by a macro expansion.
94 pub expn_info: Option<@ExpnInfo>
97 pub static DUMMY_SP: Span = Span { lo: BytePos(0), hi: BytePos(0), expn_info: None };
99 #[deriving(Clone, Eq, TotalEq, Encodable, Decodable, Hash)]
100 pub struct Spanned<T> {
106 fn eq(&self, other: &Span) -> bool {
107 return (*self).lo == (*other).lo && (*self).hi == (*other).hi;
109 fn ne(&self, other: &Span) -> bool { !(*self).eq(other) }
112 impl TotalEq for Span {}
114 impl<S:Encoder<E>, E> Encodable<S, E> for Span {
115 /* Note #1972 -- spans are encoded but not decoded */
116 fn encode(&self, s: &mut S) -> Result<(), E> {
121 impl<D:Decoder<E>, E> Decodable<D, E> for Span {
122 fn decode(_d: &mut D) -> Result<Span, E> {
127 pub fn spanned<T>(lo: BytePos, hi: BytePos, t: T) -> Spanned<T> {
128 respan(mk_sp(lo, hi), t)
131 pub fn respan<T>(sp: Span, t: T) -> Spanned<T> {
132 Spanned {node: t, span: sp}
135 pub fn dummy_spanned<T>(t: T) -> Spanned<T> {
139 /* assuming that we're not in macro expansion */
140 pub fn mk_sp(lo: BytePos, hi: BytePos) -> Span {
141 Span {lo: lo, hi: hi, expn_info: None}
144 /// A source code location used for error reporting
146 /// Information about the original source
147 pub file: Rc<FileMap>,
148 /// The (1-based) line number
150 /// The (0-based) column offset
154 /// A source code location used as the result of lookup_char_pos_adj
155 // Actually, *none* of the clients use the filename *or* file field;
156 // perhaps they should just be removed.
157 pub struct LocWithOpt {
158 pub filename: FileName,
161 pub file: Option<Rc<FileMap>>,
164 // used to be structural records. Better names, anyone?
165 pub struct FileMapAndLine { pub fm: Rc<FileMap>, pub line: uint }
166 pub struct FileMapAndBytePos { pub fm: Rc<FileMap>, pub pos: BytePos }
168 /// The syntax with which a macro was invoked.
169 #[deriving(Clone, Hash, Show)]
170 pub enum MacroFormat {
171 /// e.g. #[deriving(...)] <item>
177 #[deriving(Clone, Hash, Show)]
178 pub struct NameAndSpan {
179 /// The name of the macro that was invoked to create the thing
182 /// The format with which the macro was invoked.
183 pub format: MacroFormat,
184 /// The span of the macro definition itself. The macro may not
185 /// have a sensible definition span (e.g. something defined
186 /// completely inside libsyntax) in which case this is None.
187 pub span: Option<Span>
190 /// Extra information for tracking macro expansion of spans
191 #[deriving(Hash, Show)]
192 pub struct ExpnInfo {
193 /// The location of the actual macro invocation, e.g. `let x =
196 /// This may recursively refer to other macro invocations, e.g. if
197 /// `foo!()` invoked `bar!()` internally, and there was an
198 /// expression inside `bar!`; the call_site of the expression in
199 /// the expansion would point to the `bar!` invocation; that
200 /// call_site span would have its own ExpnInfo, with the call_site
201 /// pointing to the `foo!` invocation.
203 /// Information about the macro and its definition.
205 /// The `callee` of the inner expression in the `call_site`
206 /// example would point to the `macro_rules! bar { ... }` and that
207 /// of the `bar!()` invocation would point to the `macro_rules!
209 pub callee: NameAndSpan
212 pub type FileName = ~str;
214 pub struct FileLines {
215 pub file: Rc<FileMap>,
219 /// Identifies an offset of a multi-byte character in a FileMap
220 pub struct MultiByteChar {
221 /// The absolute offset of the character in the CodeMap
223 /// The number of bytes, >=2
227 /// A single source in the CodeMap
229 /// The name of the file that the source came from, source that doesn't
230 /// originate from files has names between angle brackets by convention,
233 /// The complete source code
235 /// The start position of this source in the CodeMap
236 pub start_pos: BytePos,
237 /// Locations of lines beginnings in the source code
238 pub lines: RefCell<Vec<BytePos> >,
239 /// Locations of multi-byte characters in the source code
240 pub multibyte_chars: RefCell<Vec<MultiByteChar> >,
244 // EFFECT: register a start-of-line offset in the
245 // table of line-beginnings.
246 // UNCHECKED INVARIANT: these offsets must be added in the right
247 // order and must be in the right places; there is shared knowledge
248 // about what ends a line between this file and parse.rs
249 // WARNING: pos param here is the offset relative to start of CodeMap,
250 // and CodeMap will append a newline when adding a filemap without a newline at the end,
251 // so the safe way to call this is with value calculated as
252 // filemap.start_pos + newline_offset_relative_to_the_start_of_filemap.
253 pub fn next_line(&self, pos: BytePos) {
254 // the new charpos must be > the last one (or it's the first one).
255 let mut lines = self.lines.borrow_mut();;
256 let line_len = lines.len();
257 assert!(line_len == 0 || (*lines.get(line_len - 1) < pos))
261 // get a line from the list of pre-computed line-beginnings
262 pub fn get_line(&self, line: int) -> ~str {
263 let mut lines = self.lines.borrow_mut();
264 let begin: BytePos = *lines.get(line as uint) - self.start_pos;
265 let begin = begin.to_uint();
266 let slice = self.src.slice_from(begin);
267 match slice.find('\n') {
268 Some(e) => slice.slice_to(e).to_owned(),
269 None => slice.to_owned()
273 pub fn record_multibyte_char(&self, pos: BytePos, bytes: uint) {
274 assert!(bytes >=2 && bytes <= 4);
275 let mbc = MultiByteChar {
279 self.multibyte_chars.borrow_mut().push(mbc);
282 pub fn is_real_file(&self) -> bool {
283 !(self.name.starts_with("<") && self.name.ends_with(">"))
288 pub files: RefCell<Vec<Rc<FileMap>>>
292 pub fn new() -> CodeMap {
294 files: RefCell::new(Vec::new()),
298 pub fn new_filemap(&self, filename: FileName, src: ~str) -> Rc<FileMap> {
299 let mut files = self.files.borrow_mut();
300 let start_pos = match files.last() {
302 Some(last) => last.start_pos.to_uint() + last.src.len(),
305 // Remove utf-8 BOM if any.
306 // FIXME #12884: no efficient/safe way to remove from the start of a string
307 // and reuse the allocation.
308 let mut src = if src.starts_with("\ufeff") {
309 StrBuf::from_str(src.as_slice().slice_from(3))
311 StrBuf::from_owned_str(src)
314 // Append '\n' in case it's not already there.
315 // This is a workaround to prevent CodeMap.lookup_filemap_idx from accidentally
316 // overflowing into the next filemap in case the last byte of span is also the last
317 // byte of filemap, which leads to incorrect results from CodeMap.span_to_*.
318 if src.len() > 0 && !src.as_slice().ends_with("\n") {
322 let filemap = Rc::new(FileMap {
324 src: src.into_owned(),
325 start_pos: Pos::from_uint(start_pos),
326 lines: RefCell::new(Vec::new()),
327 multibyte_chars: RefCell::new(Vec::new()),
330 files.push(filemap.clone());
335 pub fn mk_substr_filename(&self, sp: Span) -> ~str {
336 let pos = self.lookup_char_pos(sp.lo);
337 format!("<{}:{}:{}>", pos.file.name, pos.line, pos.col.to_uint() + 1)
340 /// Lookup source information about a BytePos
341 pub fn lookup_char_pos(&self, pos: BytePos) -> Loc {
345 pub fn lookup_char_pos_adj(&self, pos: BytePos) -> LocWithOpt {
346 let loc = self.lookup_char_pos(pos);
348 filename: loc.file.name.to_str(),
355 pub fn span_to_str(&self, sp: Span) -> ~str {
356 if self.files.borrow().len() == 0 && sp == DUMMY_SP {
357 return ~"no-location";
360 let lo = self.lookup_char_pos_adj(sp.lo);
361 let hi = self.lookup_char_pos_adj(sp.hi);
362 return format!("{}:{}:{}: {}:{}", lo.filename,
363 lo.line, lo.col.to_uint() + 1, hi.line, hi.col.to_uint() + 1)
366 pub fn span_to_filename(&self, sp: Span) -> FileName {
367 self.lookup_char_pos(sp.lo).file.name.to_str()
370 pub fn span_to_lines(&self, sp: Span) -> FileLines {
371 let lo = self.lookup_char_pos(sp.lo);
372 let hi = self.lookup_char_pos(sp.hi);
373 let mut lines = Vec::new();
374 for i in range(lo.line - 1u, hi.line as uint) {
377 FileLines {file: lo.file, lines: lines}
380 pub fn span_to_snippet(&self, sp: Span) -> Option<~str> {
381 let begin = self.lookup_byte_offset(sp.lo);
382 let end = self.lookup_byte_offset(sp.hi);
384 // FIXME #8256: this used to be an assert but whatever precondition
385 // it's testing isn't true for all spans in the AST, so to allow the
386 // caller to not have to fail (and it can't catch it since the CodeMap
387 // isn't sendable), return None
388 if begin.fm.start_pos != end.fm.start_pos {
391 Some(begin.fm.src.slice( begin.pos.to_uint(), end.pos.to_uint()).to_owned())
395 pub fn get_filemap(&self, filename: &str) -> Rc<FileMap> {
396 for fm in self.files.borrow().iter() {
397 if filename == fm.name {
401 fail!("asking for {} which we don't know about", filename);
404 fn lookup_filemap_idx(&self, pos: BytePos) -> uint {
405 let files = self.files.borrow();
407 let len = files.len();
411 let m = (a + b) / 2u;
412 if files.get(m).start_pos > pos {
418 // There can be filemaps with length 0. These have the same start_pos as the previous
419 // filemap, but are not the filemaps we want (because they are length 0, they cannot
420 // contain what we are looking for). So, rewind until we find a useful filemap.
422 let lines = files.get(a).lines.borrow();
428 fail!("position {} does not resolve to a source location", pos.to_uint());
433 fail!("position {} does not resolve to a source location", pos.to_uint())
439 fn lookup_line(&self, pos: BytePos) -> FileMapAndLine {
440 let idx = self.lookup_filemap_idx(pos);
442 let files = self.files.borrow();
443 let f = files.get(idx).clone();
446 let mut lines = f.lines.borrow_mut();
447 let mut b = lines.len();
449 let m = (a + b) / 2u;
450 if *lines.get(m) > pos { b = m; } else { a = m; }
453 FileMapAndLine {fm: f, line: a}
456 fn lookup_pos(&self, pos: BytePos) -> Loc {
457 let FileMapAndLine {fm: f, line: a} = self.lookup_line(pos);
458 let line = a + 1u; // Line numbers start at 1
459 let chpos = self.bytepos_to_file_charpos(pos);
460 let linebpos = *f.lines.borrow().get(a);
461 let linechpos = self.bytepos_to_file_charpos(linebpos);
462 debug!("codemap: byte pos {:?} is on the line at byte pos {:?}",
464 debug!("codemap: char pos {:?} is on the line at char pos {:?}",
466 debug!("codemap: byte is on line: {:?}", line);
467 assert!(chpos >= linechpos);
471 col: chpos - linechpos
475 fn lookup_byte_offset(&self, bpos: BytePos) -> FileMapAndBytePos {
476 let idx = self.lookup_filemap_idx(bpos);
477 let fm = self.files.borrow().get(idx).clone();
478 let offset = bpos - fm.start_pos;
479 FileMapAndBytePos {fm: fm, pos: offset}
482 // Converts an absolute BytePos to a CharPos relative to the filemap.
483 fn bytepos_to_file_charpos(&self, bpos: BytePos) -> CharPos {
484 debug!("codemap: converting {:?} to char pos", bpos);
485 let idx = self.lookup_filemap_idx(bpos);
486 let files = self.files.borrow();
487 let map = files.get(idx);
489 // The number of extra bytes due to multibyte chars in the FileMap
490 let mut total_extra_bytes = 0;
492 for mbc in map.multibyte_chars.borrow().iter() {
493 debug!("codemap: {:?}-byte char at {:?}", mbc.bytes, mbc.pos);
495 // every character is at least one byte, so we only
496 // count the actual extra bytes.
497 total_extra_bytes += mbc.bytes - 1;
498 // We should never see a byte position in the middle of a
500 assert!(bpos.to_uint() >= mbc.pos.to_uint() + mbc.bytes);
506 assert!(map.start_pos.to_uint() + total_extra_bytes <= bpos.to_uint());
507 CharPos(bpos.to_uint() - map.start_pos.to_uint() - total_extra_bytes)
517 let cm = CodeMap::new();
518 let fm = cm.new_filemap(~"blork.rs",~"first line.\nsecond line");
519 fm.next_line(BytePos(0));
520 assert_eq!(&fm.get_line(0),&~"first line.");
521 // TESTING BROKEN BEHAVIOR:
522 fm.next_line(BytePos(10));
523 assert_eq!(&fm.get_line(1),&~".");
529 let cm = CodeMap::new();
530 let fm = cm.new_filemap(~"blork.rs",~"first line.\nsecond line");
531 // TESTING *REALLY* BROKEN BEHAVIOR:
532 fm.next_line(BytePos(0));
533 fm.next_line(BytePos(10));
534 fm.next_line(BytePos(2));
537 fn init_code_map() -> CodeMap {
538 let cm = CodeMap::new();
539 let fm1 = cm.new_filemap(~"blork.rs",~"first line.\nsecond line");
540 let fm2 = cm.new_filemap(~"empty.rs",~"");
541 let fm3 = cm.new_filemap(~"blork2.rs",~"first line.\nsecond line");
543 fm1.next_line(BytePos(0));
544 fm1.next_line(BytePos(12));
545 fm2.next_line(BytePos(24));
546 fm3.next_line(BytePos(24));
547 fm3.next_line(BytePos(34));
554 // Test lookup_byte_offset
555 let cm = init_code_map();
557 let fmabp1 = cm.lookup_byte_offset(BytePos(22));
558 assert_eq!(fmabp1.fm.name, ~"blork.rs");
559 assert_eq!(fmabp1.pos, BytePos(22));
561 let fmabp2 = cm.lookup_byte_offset(BytePos(24));
562 assert_eq!(fmabp2.fm.name, ~"blork2.rs");
563 assert_eq!(fmabp2.pos, BytePos(0));
568 // Test bytepos_to_file_charpos
569 let cm = init_code_map();
571 let cp1 = cm.bytepos_to_file_charpos(BytePos(22));
572 assert_eq!(cp1, CharPos(22));
574 let cp2 = cm.bytepos_to_file_charpos(BytePos(24));
575 assert_eq!(cp2, CharPos(0));
580 // Test zero-length filemaps.
581 let cm = init_code_map();
583 let loc1 = cm.lookup_char_pos(BytePos(22));
584 assert_eq!(loc1.file.name, ~"blork.rs");
585 assert_eq!(loc1.line, 2);
586 assert_eq!(loc1.col, CharPos(10));
588 let loc2 = cm.lookup_char_pos(BytePos(24));
589 assert_eq!(loc2.file.name, ~"blork2.rs");
590 assert_eq!(loc2.line, 1);
591 assert_eq!(loc2.col, CharPos(0));
594 fn init_code_map_mbc() -> CodeMap {
595 let cm = CodeMap::new();
596 // € is a three byte utf8 char.
597 let fm1 = cm.new_filemap(~"blork.rs",~"fir€st €€€€ line.\nsecond line");
598 let fm2 = cm.new_filemap(~"blork2.rs",~"first line€€.\n€ second line");
600 fm1.next_line(BytePos(0));
601 fm1.next_line(BytePos(22));
602 fm2.next_line(BytePos(40));
603 fm2.next_line(BytePos(58));
605 fm1.record_multibyte_char(BytePos(3), 3);
606 fm1.record_multibyte_char(BytePos(9), 3);
607 fm1.record_multibyte_char(BytePos(12), 3);
608 fm1.record_multibyte_char(BytePos(15), 3);
609 fm1.record_multibyte_char(BytePos(18), 3);
610 fm2.record_multibyte_char(BytePos(50), 3);
611 fm2.record_multibyte_char(BytePos(53), 3);
612 fm2.record_multibyte_char(BytePos(58), 3);
619 // Test bytepos_to_file_charpos in the presence of multi-byte chars
620 let cm = init_code_map_mbc();
622 let cp1 = cm.bytepos_to_file_charpos(BytePos(3));
623 assert_eq!(cp1, CharPos(3));
625 let cp2 = cm.bytepos_to_file_charpos(BytePos(6));
626 assert_eq!(cp2, CharPos(4));
628 let cp3 = cm.bytepos_to_file_charpos(BytePos(56));
629 assert_eq!(cp3, CharPos(12));
631 let cp4 = cm.bytepos_to_file_charpos(BytePos(61));
632 assert_eq!(cp4, CharPos(15));
637 // Test span_to_lines for a span ending at the end of filemap
638 let cm = init_code_map();
639 let span = Span {lo: BytePos(12), hi: BytePos(23), expn_info: None};
640 let file_lines = cm.span_to_lines(span);
642 assert_eq!(file_lines.file.name, ~"blork.rs");
643 assert_eq!(file_lines.lines.len(), 1);
644 assert_eq!(*file_lines.lines.get(0), 1u);
649 // Test span_to_snippet for a span ending at the end of filemap
650 let cm = init_code_map();
651 let span = Span {lo: BytePos(12), hi: BytePos(23), expn_info: None};
652 let snippet = cm.span_to_snippet(span);
654 assert_eq!(snippet, Some(~"second line"));
659 // Test span_to_str for a span ending at the end of filemap
660 let cm = init_code_map();
661 let span = Span {lo: BytePos(12), hi: BytePos(23), expn_info: None};
662 let sstr = cm.span_to_str(span);
664 assert_eq!(sstr, ~"blork.rs:2:1: 2:12");