1 // Copyright 2012 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
13 The CodeMap tracks all the source code used within a single crate, mapping
14 from integer byte positions to the original source code location. Each bit of
15 source parsed during crate parsing (typically files, in-memory strings, or
16 various bits of macro expansion) cover a continuous range of bytes in the
17 CodeMap and are represented by FileMaps. Byte positions are stored in `spans`
18 and used pervasively in the compiler. They are absolute positions within the
19 CodeMap, which upon request can be converted to line and column information,
20 source code snippets, etc.
24 use serialize::{Encodable, Decodable, Encoder, Decoder};
25 use std::cell::RefCell;
27 use std::string::String;
30 fn from_uint(n: uint) -> Self;
31 fn to_uint(&self) -> uint;
34 /// A byte offset. Keep this small (currently 32-bits), as AST contains
36 #[deriving(Clone, Eq, TotalEq, Hash, Ord, Show)]
37 pub struct BytePos(pub u32);
39 /// A character offset. Because of multibyte utf8 characters, a byte offset
40 /// is not equivalent to a character offset. The CodeMap will convert BytePos
41 /// values to CharPos values as necessary.
42 #[deriving(Eq, Hash, Ord, Show)]
43 pub struct CharPos(pub uint);
45 // FIXME: Lots of boilerplate in these impls, but so far my attempts to fix
46 // have been unsuccessful
48 impl Pos for BytePos {
49 fn from_uint(n: uint) -> BytePos { BytePos(n as u32) }
50 fn to_uint(&self) -> uint { let BytePos(n) = *self; n as uint }
53 impl Add<BytePos, BytePos> for BytePos {
54 fn add(&self, rhs: &BytePos) -> BytePos {
55 BytePos((self.to_uint() + rhs.to_uint()) as u32)
59 impl Sub<BytePos, BytePos> for BytePos {
60 fn sub(&self, rhs: &BytePos) -> BytePos {
61 BytePos((self.to_uint() - rhs.to_uint()) as u32)
65 impl Pos for CharPos {
66 fn from_uint(n: uint) -> CharPos { CharPos(n) }
67 fn to_uint(&self) -> uint { let CharPos(n) = *self; n }
70 impl Add<CharPos,CharPos> for CharPos {
71 fn add(&self, rhs: &CharPos) -> CharPos {
72 CharPos(self.to_uint() + rhs.to_uint())
76 impl Sub<CharPos,CharPos> for CharPos {
77 fn sub(&self, rhs: &CharPos) -> CharPos {
78 CharPos(self.to_uint() - rhs.to_uint())
83 Spans represent a region of code, used for error reporting. Positions in spans
84 are *absolute* positions from the beginning of the codemap, not positions
85 relative to FileMaps. Methods on the CodeMap can be used to relate spans back
86 to the original source.
88 #[deriving(Clone, Show, Hash)]
92 /// Information about where the macro came from, if this piece of
93 /// code was created by a macro expansion.
94 pub expn_info: Option<@ExpnInfo>
97 pub static DUMMY_SP: Span = Span { lo: BytePos(0), hi: BytePos(0), expn_info: None };
99 #[deriving(Clone, Eq, TotalEq, Encodable, Decodable, Hash)]
100 pub struct Spanned<T> {
106 fn eq(&self, other: &Span) -> bool {
107 return (*self).lo == (*other).lo && (*self).hi == (*other).hi;
109 fn ne(&self, other: &Span) -> bool { !(*self).eq(other) }
112 impl TotalEq for Span {}
114 impl<S:Encoder<E>, E> Encodable<S, E> for Span {
115 /* Note #1972 -- spans are encoded but not decoded */
116 fn encode(&self, s: &mut S) -> Result<(), E> {
121 impl<D:Decoder<E>, E> Decodable<D, E> for Span {
122 fn decode(_d: &mut D) -> Result<Span, E> {
127 pub fn spanned<T>(lo: BytePos, hi: BytePos, t: T) -> Spanned<T> {
128 respan(mk_sp(lo, hi), t)
131 pub fn respan<T>(sp: Span, t: T) -> Spanned<T> {
132 Spanned {node: t, span: sp}
135 pub fn dummy_spanned<T>(t: T) -> Spanned<T> {
139 /* assuming that we're not in macro expansion */
140 pub fn mk_sp(lo: BytePos, hi: BytePos) -> Span {
141 Span {lo: lo, hi: hi, expn_info: None}
144 /// Return the span itself if it doesn't come from a macro expansion,
145 /// otherwise return the call site span up to the `enclosing_sp` by
146 /// following the `expn_info` chain.
147 pub fn original_sp(sp: Span, enclosing_sp: Span) -> Span {
148 match (sp.expn_info, enclosing_sp.expn_info) {
150 (Some(expn1), Some(expn2)) if expn1.call_site == expn2.call_site => sp,
151 (Some(expn1), _) => original_sp(expn1.call_site, enclosing_sp),
155 /// A source code location used for error reporting
157 /// Information about the original source
158 pub file: Rc<FileMap>,
159 /// The (1-based) line number
161 /// The (0-based) column offset
165 /// A source code location used as the result of lookup_char_pos_adj
166 // Actually, *none* of the clients use the filename *or* file field;
167 // perhaps they should just be removed.
168 pub struct LocWithOpt {
169 pub filename: FileName,
172 pub file: Option<Rc<FileMap>>,
175 // used to be structural records. Better names, anyone?
176 pub struct FileMapAndLine { pub fm: Rc<FileMap>, pub line: uint }
177 pub struct FileMapAndBytePos { pub fm: Rc<FileMap>, pub pos: BytePos }
179 /// The syntax with which a macro was invoked.
180 #[deriving(Clone, Hash, Show)]
181 pub enum MacroFormat {
182 /// e.g. #[deriving(...)] <item>
188 #[deriving(Clone, Hash, Show)]
189 pub struct NameAndSpan {
190 /// The name of the macro that was invoked to create the thing
193 /// The format with which the macro was invoked.
194 pub format: MacroFormat,
195 /// The span of the macro definition itself. The macro may not
196 /// have a sensible definition span (e.g. something defined
197 /// completely inside libsyntax) in which case this is None.
198 pub span: Option<Span>
201 /// Extra information for tracking macro expansion of spans
202 #[deriving(Hash, Show)]
203 pub struct ExpnInfo {
204 /// The location of the actual macro invocation, e.g. `let x =
207 /// This may recursively refer to other macro invocations, e.g. if
208 /// `foo!()` invoked `bar!()` internally, and there was an
209 /// expression inside `bar!`; the call_site of the expression in
210 /// the expansion would point to the `bar!` invocation; that
211 /// call_site span would have its own ExpnInfo, with the call_site
212 /// pointing to the `foo!` invocation.
214 /// Information about the macro and its definition.
216 /// The `callee` of the inner expression in the `call_site`
217 /// example would point to the `macro_rules! bar { ... }` and that
218 /// of the `bar!()` invocation would point to the `macro_rules!
220 pub callee: NameAndSpan
223 pub type FileName = String;
225 pub struct FileLines {
226 pub file: Rc<FileMap>,
230 /// Identifies an offset of a multi-byte character in a FileMap
231 pub struct MultiByteChar {
232 /// The absolute offset of the character in the CodeMap
234 /// The number of bytes, >=2
238 /// A single source in the CodeMap
240 /// The name of the file that the source came from, source that doesn't
241 /// originate from files has names between angle brackets by convention,
244 /// The complete source code
246 /// The start position of this source in the CodeMap
247 pub start_pos: BytePos,
248 /// Locations of lines beginnings in the source code
249 pub lines: RefCell<Vec<BytePos> >,
250 /// Locations of multi-byte characters in the source code
251 pub multibyte_chars: RefCell<Vec<MultiByteChar> >,
255 // EFFECT: register a start-of-line offset in the
256 // table of line-beginnings.
257 // UNCHECKED INVARIANT: these offsets must be added in the right
258 // order and must be in the right places; there is shared knowledge
259 // about what ends a line between this file and parse.rs
260 // WARNING: pos param here is the offset relative to start of CodeMap,
261 // and CodeMap will append a newline when adding a filemap without a newline at the end,
262 // so the safe way to call this is with value calculated as
263 // filemap.start_pos + newline_offset_relative_to_the_start_of_filemap.
264 pub fn next_line(&self, pos: BytePos) {
265 // the new charpos must be > the last one (or it's the first one).
266 let mut lines = self.lines.borrow_mut();;
267 let line_len = lines.len();
268 assert!(line_len == 0 || (*lines.get(line_len - 1) < pos))
272 // get a line from the list of pre-computed line-beginnings
273 pub fn get_line(&self, line: int) -> String {
274 let mut lines = self.lines.borrow_mut();
275 let begin: BytePos = *lines.get(line as uint) - self.start_pos;
276 let begin = begin.to_uint();
277 let slice = self.src.as_slice().slice_from(begin);
278 match slice.find('\n') {
279 Some(e) => slice.slice_to(e).to_strbuf(),
280 None => slice.to_strbuf()
284 pub fn record_multibyte_char(&self, pos: BytePos, bytes: uint) {
285 assert!(bytes >=2 && bytes <= 4);
286 let mbc = MultiByteChar {
290 self.multibyte_chars.borrow_mut().push(mbc);
293 pub fn is_real_file(&self) -> bool {
294 !(self.name.as_slice().starts_with("<") &&
295 self.name.as_slice().ends_with(">"))
300 pub files: RefCell<Vec<Rc<FileMap>>>
304 pub fn new() -> CodeMap {
306 files: RefCell::new(Vec::new()),
310 pub fn new_filemap(&self, filename: FileName, src: String) -> Rc<FileMap> {
311 let mut files = self.files.borrow_mut();
312 let start_pos = match files.last() {
314 Some(last) => last.start_pos.to_uint() + last.src.len(),
317 // Remove utf-8 BOM if any.
318 // FIXME #12884: no efficient/safe way to remove from the start of a string
319 // and reuse the allocation.
320 let mut src = if src.as_slice().starts_with("\ufeff") {
321 String::from_str(src.as_slice().slice_from(3))
323 String::from_str(src.as_slice())
326 // Append '\n' in case it's not already there.
327 // This is a workaround to prevent CodeMap.lookup_filemap_idx from accidentally
328 // overflowing into the next filemap in case the last byte of span is also the last
329 // byte of filemap, which leads to incorrect results from CodeMap.span_to_*.
330 if src.len() > 0 && !src.as_slice().ends_with("\n") {
334 let filemap = Rc::new(FileMap {
336 src: src.to_strbuf(),
337 start_pos: Pos::from_uint(start_pos),
338 lines: RefCell::new(Vec::new()),
339 multibyte_chars: RefCell::new(Vec::new()),
342 files.push(filemap.clone());
347 pub fn mk_substr_filename(&self, sp: Span) -> String {
348 let pos = self.lookup_char_pos(sp.lo);
349 (format!("<{}:{}:{}>",
352 pos.col.to_uint() + 1)).to_strbuf()
355 /// Lookup source information about a BytePos
356 pub fn lookup_char_pos(&self, pos: BytePos) -> Loc {
360 pub fn lookup_char_pos_adj(&self, pos: BytePos) -> LocWithOpt {
361 let loc = self.lookup_char_pos(pos);
363 filename: loc.file.name.to_strbuf(),
370 pub fn span_to_str(&self, sp: Span) -> String {
371 if self.files.borrow().len() == 0 && sp == DUMMY_SP {
372 return "no-location".to_strbuf();
375 let lo = self.lookup_char_pos_adj(sp.lo);
376 let hi = self.lookup_char_pos_adj(sp.hi);
377 return (format!("{}:{}:{}: {}:{}",
380 lo.col.to_uint() + 1,
382 hi.col.to_uint() + 1)).to_strbuf()
385 pub fn span_to_filename(&self, sp: Span) -> FileName {
386 self.lookup_char_pos(sp.lo).file.name.to_strbuf()
389 pub fn span_to_lines(&self, sp: Span) -> FileLines {
390 let lo = self.lookup_char_pos(sp.lo);
391 let hi = self.lookup_char_pos(sp.hi);
392 let mut lines = Vec::new();
393 for i in range(lo.line - 1u, hi.line as uint) {
396 FileLines {file: lo.file, lines: lines}
399 pub fn span_to_snippet(&self, sp: Span) -> Option<String> {
400 let begin = self.lookup_byte_offset(sp.lo);
401 let end = self.lookup_byte_offset(sp.hi);
403 // FIXME #8256: this used to be an assert but whatever precondition
404 // it's testing isn't true for all spans in the AST, so to allow the
405 // caller to not have to fail (and it can't catch it since the CodeMap
406 // isn't sendable), return None
407 if begin.fm.start_pos != end.fm.start_pos {
410 Some(begin.fm.src.as_slice().slice(begin.pos.to_uint(),
411 end.pos.to_uint()).to_strbuf())
415 pub fn get_filemap(&self, filename: &str) -> Rc<FileMap> {
416 for fm in self.files.borrow().iter() {
417 if filename == fm.name.as_slice() {
421 fail!("asking for {} which we don't know about", filename);
424 fn lookup_filemap_idx(&self, pos: BytePos) -> uint {
425 let files = self.files.borrow();
427 let len = files.len();
431 let m = (a + b) / 2u;
432 if files.get(m).start_pos > pos {
438 // There can be filemaps with length 0. These have the same start_pos as the previous
439 // filemap, but are not the filemaps we want (because they are length 0, they cannot
440 // contain what we are looking for). So, rewind until we find a useful filemap.
442 let lines = files.get(a).lines.borrow();
448 fail!("position {} does not resolve to a source location", pos.to_uint());
453 fail!("position {} does not resolve to a source location", pos.to_uint())
459 fn lookup_line(&self, pos: BytePos) -> FileMapAndLine {
460 let idx = self.lookup_filemap_idx(pos);
462 let files = self.files.borrow();
463 let f = files.get(idx).clone();
466 let mut lines = f.lines.borrow_mut();
467 let mut b = lines.len();
469 let m = (a + b) / 2u;
470 if *lines.get(m) > pos { b = m; } else { a = m; }
473 FileMapAndLine {fm: f, line: a}
476 fn lookup_pos(&self, pos: BytePos) -> Loc {
477 let FileMapAndLine {fm: f, line: a} = self.lookup_line(pos);
478 let line = a + 1u; // Line numbers start at 1
479 let chpos = self.bytepos_to_file_charpos(pos);
480 let linebpos = *f.lines.borrow().get(a);
481 let linechpos = self.bytepos_to_file_charpos(linebpos);
482 debug!("codemap: byte pos {:?} is on the line at byte pos {:?}",
484 debug!("codemap: char pos {:?} is on the line at char pos {:?}",
486 debug!("codemap: byte is on line: {:?}", line);
487 assert!(chpos >= linechpos);
491 col: chpos - linechpos
495 fn lookup_byte_offset(&self, bpos: BytePos) -> FileMapAndBytePos {
496 let idx = self.lookup_filemap_idx(bpos);
497 let fm = self.files.borrow().get(idx).clone();
498 let offset = bpos - fm.start_pos;
499 FileMapAndBytePos {fm: fm, pos: offset}
502 // Converts an absolute BytePos to a CharPos relative to the filemap.
503 fn bytepos_to_file_charpos(&self, bpos: BytePos) -> CharPos {
504 debug!("codemap: converting {:?} to char pos", bpos);
505 let idx = self.lookup_filemap_idx(bpos);
506 let files = self.files.borrow();
507 let map = files.get(idx);
509 // The number of extra bytes due to multibyte chars in the FileMap
510 let mut total_extra_bytes = 0;
512 for mbc in map.multibyte_chars.borrow().iter() {
513 debug!("codemap: {:?}-byte char at {:?}", mbc.bytes, mbc.pos);
515 // every character is at least one byte, so we only
516 // count the actual extra bytes.
517 total_extra_bytes += mbc.bytes - 1;
518 // We should never see a byte position in the middle of a
520 assert!(bpos.to_uint() >= mbc.pos.to_uint() + mbc.bytes);
526 assert!(map.start_pos.to_uint() + total_extra_bytes <= bpos.to_uint());
527 CharPos(bpos.to_uint() - map.start_pos.to_uint() - total_extra_bytes)
537 let cm = CodeMap::new();
538 let fm = cm.new_filemap("blork.rs".to_strbuf(),
539 "first line.\nsecond line".to_strbuf());
540 fm.next_line(BytePos(0));
541 assert_eq!(&fm.get_line(0),&"first line.".to_strbuf());
542 // TESTING BROKEN BEHAVIOR:
543 fm.next_line(BytePos(10));
544 assert_eq!(&fm.get_line(1), &".".to_strbuf());
550 let cm = CodeMap::new();
551 let fm = cm.new_filemap("blork.rs".to_strbuf(),
552 "first line.\nsecond line".to_strbuf());
553 // TESTING *REALLY* BROKEN BEHAVIOR:
554 fm.next_line(BytePos(0));
555 fm.next_line(BytePos(10));
556 fm.next_line(BytePos(2));
559 fn init_code_map() -> CodeMap {
560 let cm = CodeMap::new();
561 let fm1 = cm.new_filemap("blork.rs".to_strbuf(),
562 "first line.\nsecond line".to_strbuf());
563 let fm2 = cm.new_filemap("empty.rs".to_strbuf(),
565 let fm3 = cm.new_filemap("blork2.rs".to_strbuf(),
566 "first line.\nsecond line".to_strbuf());
568 fm1.next_line(BytePos(0));
569 fm1.next_line(BytePos(12));
570 fm2.next_line(BytePos(24));
571 fm3.next_line(BytePos(24));
572 fm3.next_line(BytePos(34));
579 // Test lookup_byte_offset
580 let cm = init_code_map();
582 let fmabp1 = cm.lookup_byte_offset(BytePos(22));
583 assert_eq!(fmabp1.fm.name, "blork.rs".to_strbuf());
584 assert_eq!(fmabp1.pos, BytePos(22));
586 let fmabp2 = cm.lookup_byte_offset(BytePos(24));
587 assert_eq!(fmabp2.fm.name, "blork2.rs".to_strbuf());
588 assert_eq!(fmabp2.pos, BytePos(0));
593 // Test bytepos_to_file_charpos
594 let cm = init_code_map();
596 let cp1 = cm.bytepos_to_file_charpos(BytePos(22));
597 assert_eq!(cp1, CharPos(22));
599 let cp2 = cm.bytepos_to_file_charpos(BytePos(24));
600 assert_eq!(cp2, CharPos(0));
605 // Test zero-length filemaps.
606 let cm = init_code_map();
608 let loc1 = cm.lookup_char_pos(BytePos(22));
609 assert_eq!(loc1.file.name, "blork.rs".to_strbuf());
610 assert_eq!(loc1.line, 2);
611 assert_eq!(loc1.col, CharPos(10));
613 let loc2 = cm.lookup_char_pos(BytePos(24));
614 assert_eq!(loc2.file.name, "blork2.rs".to_strbuf());
615 assert_eq!(loc2.line, 1);
616 assert_eq!(loc2.col, CharPos(0));
619 fn init_code_map_mbc() -> CodeMap {
620 let cm = CodeMap::new();
621 // € is a three byte utf8 char.
623 cm.new_filemap("blork.rs".to_strbuf(),
624 "fir€st €€€€ line.\nsecond line".to_strbuf());
625 let fm2 = cm.new_filemap("blork2.rs".to_strbuf(),
626 "first line€€.\n€ second line".to_strbuf());
628 fm1.next_line(BytePos(0));
629 fm1.next_line(BytePos(22));
630 fm2.next_line(BytePos(40));
631 fm2.next_line(BytePos(58));
633 fm1.record_multibyte_char(BytePos(3), 3);
634 fm1.record_multibyte_char(BytePos(9), 3);
635 fm1.record_multibyte_char(BytePos(12), 3);
636 fm1.record_multibyte_char(BytePos(15), 3);
637 fm1.record_multibyte_char(BytePos(18), 3);
638 fm2.record_multibyte_char(BytePos(50), 3);
639 fm2.record_multibyte_char(BytePos(53), 3);
640 fm2.record_multibyte_char(BytePos(58), 3);
647 // Test bytepos_to_file_charpos in the presence of multi-byte chars
648 let cm = init_code_map_mbc();
650 let cp1 = cm.bytepos_to_file_charpos(BytePos(3));
651 assert_eq!(cp1, CharPos(3));
653 let cp2 = cm.bytepos_to_file_charpos(BytePos(6));
654 assert_eq!(cp2, CharPos(4));
656 let cp3 = cm.bytepos_to_file_charpos(BytePos(56));
657 assert_eq!(cp3, CharPos(12));
659 let cp4 = cm.bytepos_to_file_charpos(BytePos(61));
660 assert_eq!(cp4, CharPos(15));
665 // Test span_to_lines for a span ending at the end of filemap
666 let cm = init_code_map();
667 let span = Span {lo: BytePos(12), hi: BytePos(23), expn_info: None};
668 let file_lines = cm.span_to_lines(span);
670 assert_eq!(file_lines.file.name, "blork.rs".to_strbuf());
671 assert_eq!(file_lines.lines.len(), 1);
672 assert_eq!(*file_lines.lines.get(0), 1u);
677 // Test span_to_snippet for a span ending at the end of filemap
678 let cm = init_code_map();
679 let span = Span {lo: BytePos(12), hi: BytePos(23), expn_info: None};
680 let snippet = cm.span_to_snippet(span);
682 assert_eq!(snippet, Some("second line".to_strbuf()));
687 // Test span_to_str for a span ending at the end of filemap
688 let cm = init_code_map();
689 let span = Span {lo: BytePos(12), hi: BytePos(23), expn_info: None};
690 let sstr = cm.span_to_str(span);
692 assert_eq!(sstr, "blork.rs:2:1: 2:12".to_strbuf());