1 // Copyright 2012 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
13 The CodeMap tracks all the source code used within a single crate, mapping
14 from integer byte positions to the original source code location. Each bit of
15 source parsed during crate parsing (typically files, in-memory strings, or
16 various bits of macro expansion) cover a continuous range of bytes in the
17 CodeMap and are represented by FileMaps. Byte positions are stored in `spans`
18 and used pervasively in the compiler. They are absolute positions within the
19 CodeMap, which upon request can be converted to line and column information,
20 source code snippets, etc.
24 use std::cell::RefCell;
27 use serialize::{Encodable, Decodable, Encoder, Decoder};
30 fn from_uint(n: uint) -> Self;
31 fn to_uint(&self) -> uint;
34 /// A byte offset. Keep this small (currently 32-bits), as AST contains
36 #[deriving(Clone, Eq, Hash, Ord, Show)]
37 pub struct BytePos(u32);
39 /// A character offset. Because of multibyte utf8 characters, a byte offset
40 /// is not equivalent to a character offset. The CodeMap will convert BytePos
41 /// values to CharPos values as necessary.
42 #[deriving(Eq, Hash, Ord, Show)]
43 pub struct CharPos(uint);
45 // FIXME: Lots of boilerplate in these impls, but so far my attempts to fix
46 // have been unsuccessful
48 impl Pos for BytePos {
49 fn from_uint(n: uint) -> BytePos { BytePos(n as u32) }
50 fn to_uint(&self) -> uint { let BytePos(n) = *self; n as uint }
53 impl Add<BytePos, BytePos> for BytePos {
54 fn add(&self, rhs: &BytePos) -> BytePos {
55 BytePos((self.to_uint() + rhs.to_uint()) as u32)
59 impl Sub<BytePos, BytePos> for BytePos {
60 fn sub(&self, rhs: &BytePos) -> BytePos {
61 BytePos((self.to_uint() - rhs.to_uint()) as u32)
65 impl Pos for CharPos {
66 fn from_uint(n: uint) -> CharPos { CharPos(n) }
67 fn to_uint(&self) -> uint { let CharPos(n) = *self; n }
70 impl Add<CharPos,CharPos> for CharPos {
71 fn add(&self, rhs: &CharPos) -> CharPos {
72 CharPos(self.to_uint() + rhs.to_uint())
76 impl Sub<CharPos,CharPos> for CharPos {
77 fn sub(&self, rhs: &CharPos) -> CharPos {
78 CharPos(self.to_uint() - rhs.to_uint())
83 Spans represent a region of code, used for error reporting. Positions in spans
84 are *absolute* positions from the beginning of the codemap, not positions
85 relative to FileMaps. Methods on the CodeMap can be used to relate spans back
86 to the original source.
88 #[deriving(Clone, Show, Hash)]
92 expn_info: Option<@ExpnInfo>
95 pub static DUMMY_SP: Span = Span { lo: BytePos(0), hi: BytePos(0), expn_info: None };
97 #[deriving(Clone, Eq, Encodable, Decodable, Hash)]
98 pub struct Spanned<T> {
103 impl cmp::Eq for Span {
104 fn eq(&self, other: &Span) -> bool {
105 return (*self).lo == (*other).lo && (*self).hi == (*other).hi;
107 fn ne(&self, other: &Span) -> bool { !(*self).eq(other) }
110 impl<S:Encoder> Encodable<S> for Span {
111 /* Note #1972 -- spans are encoded but not decoded */
112 fn encode(&self, s: &mut S) {
117 impl<D:Decoder> Decodable<D> for Span {
118 fn decode(_d: &mut D) -> Span {
123 pub fn spanned<T>(lo: BytePos, hi: BytePos, t: T) -> Spanned<T> {
124 respan(mk_sp(lo, hi), t)
127 pub fn respan<T>(sp: Span, t: T) -> Spanned<T> {
128 Spanned {node: t, span: sp}
131 pub fn dummy_spanned<T>(t: T) -> Spanned<T> {
135 /* assuming that we're not in macro expansion */
136 pub fn mk_sp(lo: BytePos, hi: BytePos) -> Span {
137 Span {lo: lo, hi: hi, expn_info: None}
140 /// A source code location used for error reporting
142 /// Information about the original source
144 /// The (1-based) line number
146 /// The (0-based) column offset
150 /// A source code location used as the result of lookup_char_pos_adj
151 // Actually, *none* of the clients use the filename *or* file field;
152 // perhaps they should just be removed.
153 pub struct LocWithOpt {
157 file: Option<Rc<FileMap>>,
160 // used to be structural records. Better names, anyone?
161 pub struct FileMapAndLine {fm: Rc<FileMap>, line: uint}
162 pub struct FileMapAndBytePos {fm: Rc<FileMap>, pos: BytePos}
164 #[deriving(Clone, Hash, Show)]
165 pub enum MacroFormat {
166 // e.g. #[deriving(...)] <item>
172 #[deriving(Clone, Hash, Show)]
173 pub struct NameAndSpan {
175 // the format with which the macro was invoked.
180 /// Extra information for tracking macro expansion of spans
181 #[deriving(Hash, Show)]
182 pub struct ExpnInfo {
187 pub type FileName = ~str;
189 pub struct FileLines {
194 /// Identifies an offset of a multi-byte character in a FileMap
195 pub struct MultiByteChar {
196 /// The absolute offset of the character in the CodeMap
198 /// The number of bytes, >=2
202 /// A single source in the CodeMap
204 /// The name of the file that the source came from, source that doesn't
205 /// originate from files has names between angle brackets by convention,
208 /// The complete source code
210 /// The start position of this source in the CodeMap
212 /// Locations of lines beginnings in the source code
213 lines: RefCell<Vec<BytePos> >,
214 /// Locations of multi-byte characters in the source code
215 multibyte_chars: RefCell<Vec<MultiByteChar> >,
219 // EFFECT: register a start-of-line offset in the
220 // table of line-beginnings.
221 // UNCHECKED INVARIANT: these offsets must be added in the right
222 // order and must be in the right places; there is shared knowledge
223 // about what ends a line between this file and parse.rs
224 // WARNING: pos param here is the offset relative to start of CodeMap,
225 // and CodeMap will append a newline when adding a filemap without a newline at the end,
226 // so the safe way to call this is with value calculated as
227 // filemap.start_pos + newline_offset_relative_to_the_start_of_filemap.
228 pub fn next_line(&self, pos: BytePos) {
229 // the new charpos must be > the last one (or it's the first one).
230 let mut lines = self.lines.borrow_mut();;
231 let line_len = lines.get().len();
232 assert!(line_len == 0 || (*lines.get().get(line_len - 1) < pos))
233 lines.get().push(pos);
236 // get a line from the list of pre-computed line-beginnings
237 pub fn get_line(&self, line: int) -> ~str {
238 let mut lines = self.lines.borrow_mut();
239 let begin: BytePos = *lines.get().get(line as uint) - self.start_pos;
240 let begin = begin.to_uint();
241 let slice = self.src.slice_from(begin);
242 match slice.find('\n') {
243 Some(e) => slice.slice_to(e).to_owned(),
244 None => slice.to_owned()
248 pub fn record_multibyte_char(&self, pos: BytePos, bytes: uint) {
249 assert!(bytes >=2 && bytes <= 4);
250 let mbc = MultiByteChar {
254 self.multibyte_chars.borrow_mut().get().push(mbc);
257 pub fn is_real_file(&self) -> bool {
258 !(self.name.starts_with("<") && self.name.ends_with(">"))
263 files: RefCell<Vec<Rc<FileMap>>>
267 pub fn new() -> CodeMap {
269 files: RefCell::new(Vec::new()),
273 pub fn new_filemap(&self, filename: FileName, src: ~str) -> Rc<FileMap> {
274 let mut files = self.files.borrow_mut();
275 let start_pos = match files.get().last() {
277 Some(last) => last.deref().start_pos.to_uint() + last.deref().src.len(),
280 // Remove utf-8 BOM if any.
281 // FIXME #12884: no efficient/safe way to remove from the start of a string
282 // and reuse the allocation.
283 let mut src = if src.starts_with("\ufeff") {
284 src.as_slice().slice_from(3).into_owned()
289 // Append '\n' in case it's not already there.
290 // This is a workaround to prevent CodeMap.lookup_filemap_idx from accidentally
291 // overflowing into the next filemap in case the last byte of span is also the last
292 // byte of filemap, which leads to incorrect results from CodeMap.span_to_*.
293 if src.len() > 0 && !src.ends_with("\n") {
297 let filemap = Rc::new(FileMap {
300 start_pos: Pos::from_uint(start_pos),
301 lines: RefCell::new(Vec::new()),
302 multibyte_chars: RefCell::new(Vec::new()),
305 files.get().push(filemap.clone());
310 pub fn mk_substr_filename(&self, sp: Span) -> ~str {
311 let pos = self.lookup_char_pos(sp.lo);
312 format!("<{}:{}:{}>", pos.file.deref().name, pos.line, pos.col.to_uint() + 1)
315 /// Lookup source information about a BytePos
316 pub fn lookup_char_pos(&self, pos: BytePos) -> Loc {
320 pub fn lookup_char_pos_adj(&self, pos: BytePos) -> LocWithOpt {
321 let loc = self.lookup_char_pos(pos);
323 filename: loc.file.deref().name.to_str(),
330 pub fn span_to_str(&self, sp: Span) -> ~str {
331 if self.files.borrow().get().len() == 0 && sp == DUMMY_SP {
332 return ~"no-location";
335 let lo = self.lookup_char_pos_adj(sp.lo);
336 let hi = self.lookup_char_pos_adj(sp.hi);
337 return format!("{}:{}:{}: {}:{}", lo.filename,
338 lo.line, lo.col.to_uint() + 1, hi.line, hi.col.to_uint() + 1)
341 pub fn span_to_filename(&self, sp: Span) -> FileName {
342 self.lookup_char_pos(sp.lo).file.deref().name.to_str()
345 pub fn span_to_lines(&self, sp: Span) -> FileLines {
346 let lo = self.lookup_char_pos(sp.lo);
347 let hi = self.lookup_char_pos(sp.hi);
348 let mut lines = Vec::new();
349 for i in range(lo.line - 1u, hi.line as uint) {
352 FileLines {file: lo.file, lines: lines}
355 pub fn span_to_snippet(&self, sp: Span) -> Option<~str> {
356 let begin = self.lookup_byte_offset(sp.lo);
357 let end = self.lookup_byte_offset(sp.hi);
359 // FIXME #8256: this used to be an assert but whatever precondition
360 // it's testing isn't true for all spans in the AST, so to allow the
361 // caller to not have to fail (and it can't catch it since the CodeMap
362 // isn't sendable), return None
363 if begin.fm.deref().start_pos != end.fm.deref().start_pos {
366 Some(begin.fm.deref().src.slice( begin.pos.to_uint(), end.pos.to_uint()).to_owned())
370 pub fn get_filemap(&self, filename: &str) -> Rc<FileMap> {
371 for fm in self.files.borrow().get().iter() {
372 if filename == fm.deref().name {
376 fail!("asking for {} which we don't know about", filename);
379 fn lookup_filemap_idx(&self, pos: BytePos) -> uint {
380 let files = self.files.borrow();
381 let files = files.get();
382 let len = files.len();
386 let m = (a + b) / 2u;
387 if files.get(m).deref().start_pos > pos {
393 // There can be filemaps with length 0. These have the same start_pos as the previous
394 // filemap, but are not the filemaps we want (because they are length 0, they cannot
395 // contain what we are looking for). So, rewind until we find a useful filemap.
397 let lines = files.get(a).deref().lines.borrow();
398 let lines = lines.get();
403 fail!("position {} does not resolve to a source location", pos.to_uint());
408 fail!("position {} does not resolve to a source location", pos.to_uint())
414 fn lookup_line(&self, pos: BytePos) -> FileMapAndLine {
415 let idx = self.lookup_filemap_idx(pos);
417 let files = self.files.borrow();
418 let f = files.get().get(idx).clone();
421 let mut lines = f.deref().lines.borrow_mut();
422 let mut b = lines.get().len();
424 let m = (a + b) / 2u;
425 if *lines.get().get(m) > pos { b = m; } else { a = m; }
428 FileMapAndLine {fm: f, line: a}
431 fn lookup_pos(&self, pos: BytePos) -> Loc {
432 let FileMapAndLine {fm: f, line: a} = self.lookup_line(pos);
433 let line = a + 1u; // Line numbers start at 1
434 let chpos = self.bytepos_to_file_charpos(pos);
435 let linebpos = *f.deref().lines.borrow().get().get(a);
436 let linechpos = self.bytepos_to_file_charpos(linebpos);
437 debug!("codemap: byte pos {:?} is on the line at byte pos {:?}",
439 debug!("codemap: char pos {:?} is on the line at char pos {:?}",
441 debug!("codemap: byte is on line: {:?}", line);
442 assert!(chpos >= linechpos);
446 col: chpos - linechpos
450 fn lookup_byte_offset(&self, bpos: BytePos) -> FileMapAndBytePos {
451 let idx = self.lookup_filemap_idx(bpos);
452 let fm = self.files.borrow().get().get(idx).clone();
453 let offset = bpos - fm.deref().start_pos;
454 FileMapAndBytePos {fm: fm, pos: offset}
457 // Converts an absolute BytePos to a CharPos relative to the filemap.
458 fn bytepos_to_file_charpos(&self, bpos: BytePos) -> CharPos {
459 debug!("codemap: converting {:?} to char pos", bpos);
460 let idx = self.lookup_filemap_idx(bpos);
461 let files = self.files.borrow();
462 let map = files.get().get(idx);
464 // The number of extra bytes due to multibyte chars in the FileMap
465 let mut total_extra_bytes = 0;
467 for mbc in map.deref().multibyte_chars.borrow().get().iter() {
468 debug!("codemap: {:?}-byte char at {:?}", mbc.bytes, mbc.pos);
470 // every character is at least one byte, so we only
471 // count the actual extra bytes.
472 total_extra_bytes += mbc.bytes - 1;
473 // We should never see a byte position in the middle of a
475 assert!(bpos.to_uint() >= mbc.pos.to_uint() + mbc.bytes);
481 assert!(map.deref().start_pos.to_uint() + total_extra_bytes <= bpos.to_uint());
482 CharPos(bpos.to_uint() - map.deref().start_pos.to_uint() - total_extra_bytes)
492 let cm = CodeMap::new();
493 let fm = cm.new_filemap(~"blork.rs",~"first line.\nsecond line");
494 fm.next_line(BytePos(0));
495 assert_eq!(&fm.get_line(0),&~"first line.");
496 // TESTING BROKEN BEHAVIOR:
497 fm.next_line(BytePos(10));
498 assert_eq!(&fm.get_line(1),&~".");
504 let cm = CodeMap::new();
505 let fm = cm.new_filemap(~"blork.rs",~"first line.\nsecond line");
506 // TESTING *REALLY* BROKEN BEHAVIOR:
507 fm.next_line(BytePos(0));
508 fm.next_line(BytePos(10));
509 fm.next_line(BytePos(2));
512 fn init_code_map() -> CodeMap {
513 let cm = CodeMap::new();
514 let fm1 = cm.new_filemap(~"blork.rs",~"first line.\nsecond line");
515 let fm2 = cm.new_filemap(~"empty.rs",~"");
516 let fm3 = cm.new_filemap(~"blork2.rs",~"first line.\nsecond line");
518 fm1.next_line(BytePos(0));
519 fm1.next_line(BytePos(12));
520 fm2.next_line(BytePos(24));
521 fm3.next_line(BytePos(24));
522 fm3.next_line(BytePos(34));
529 // Test lookup_byte_offset
530 let cm = init_code_map();
532 let fmabp1 = cm.lookup_byte_offset(BytePos(22));
533 assert_eq!(fmabp1.fm.name, ~"blork.rs");
534 assert_eq!(fmabp1.pos, BytePos(22));
536 let fmabp2 = cm.lookup_byte_offset(BytePos(24));
537 assert_eq!(fmabp2.fm.name, ~"blork2.rs");
538 assert_eq!(fmabp2.pos, BytePos(0));
543 // Test bytepos_to_file_charpos
544 let cm = init_code_map();
546 let cp1 = cm.bytepos_to_file_charpos(BytePos(22));
547 assert_eq!(cp1, CharPos(22));
549 let cp2 = cm.bytepos_to_file_charpos(BytePos(24));
550 assert_eq!(cp2, CharPos(0));
555 // Test zero-length filemaps.
556 let cm = init_code_map();
558 let loc1 = cm.lookup_char_pos(BytePos(22));
559 assert_eq!(loc1.file.name, ~"blork.rs");
560 assert_eq!(loc1.line, 2);
561 assert_eq!(loc1.col, CharPos(10));
563 let loc2 = cm.lookup_char_pos(BytePos(24));
564 assert_eq!(loc2.file.name, ~"blork2.rs");
565 assert_eq!(loc2.line, 1);
566 assert_eq!(loc2.col, CharPos(0));
569 fn init_code_map_mbc() -> CodeMap {
570 let cm = CodeMap::new();
571 // € is a three byte utf8 char.
572 let fm1 = cm.new_filemap(~"blork.rs",~"fir€st €€€€ line.\nsecond line");
573 let fm2 = cm.new_filemap(~"blork2.rs",~"first line€€.\n€ second line");
575 fm1.next_line(BytePos(0));
576 fm1.next_line(BytePos(22));
577 fm2.next_line(BytePos(40));
578 fm2.next_line(BytePos(58));
580 fm1.record_multibyte_char(BytePos(3), 3);
581 fm1.record_multibyte_char(BytePos(9), 3);
582 fm1.record_multibyte_char(BytePos(12), 3);
583 fm1.record_multibyte_char(BytePos(15), 3);
584 fm1.record_multibyte_char(BytePos(18), 3);
585 fm2.record_multibyte_char(BytePos(50), 3);
586 fm2.record_multibyte_char(BytePos(53), 3);
587 fm2.record_multibyte_char(BytePos(58), 3);
594 // Test bytepos_to_file_charpos in the presence of multi-byte chars
595 let cm = init_code_map_mbc();
597 let cp1 = cm.bytepos_to_file_charpos(BytePos(3));
598 assert_eq!(cp1, CharPos(3));
600 let cp2 = cm.bytepos_to_file_charpos(BytePos(6));
601 assert_eq!(cp2, CharPos(4));
603 let cp3 = cm.bytepos_to_file_charpos(BytePos(56));
604 assert_eq!(cp3, CharPos(12));
606 let cp4 = cm.bytepos_to_file_charpos(BytePos(61));
607 assert_eq!(cp4, CharPos(15));
612 // Test span_to_lines for a span ending at the end of filemap
613 let cm = init_code_map();
614 let span = Span {lo: BytePos(12), hi: BytePos(23), expn_info: None};
615 let file_lines = cm.span_to_lines(span);
617 assert_eq!(file_lines.file.name, ~"blork.rs");
618 assert_eq!(file_lines.lines.len(), 1);
619 assert_eq!(*file_lines.lines.get(0), 1u);
624 // Test span_to_snippet for a span ending at the end of filemap
625 let cm = init_code_map();
626 let span = Span {lo: BytePos(12), hi: BytePos(23), expn_info: None};
627 let snippet = cm.span_to_snippet(span);
629 assert_eq!(snippet, Some(~"second line"));
634 // Test span_to_str for a span ending at the end of filemap
635 let cm = init_code_map();
636 let span = Span {lo: BytePos(12), hi: BytePos(23), expn_info: None};
637 let sstr = cm.span_to_str(span);
639 assert_eq!(sstr, ~"blork.rs:2:1: 2:12");