1. make /// ... and //! ... and /** ... */ and /*! ... */ into sugar for #[doc = ...] attributes.
2. add a script in etc/ to help converting doc-attributes to doc-comments
3. add some functions to core::str to help with (1)
--- /dev/null
+#!/usr/bin/python
+
+#
+# this script attempts to turn doc comment attributes (#[doc = "..."])
+# into sugared-doc-comments (/** ... */ and /// ...)
+#
+# it sugarises all .rs/.rc files underneath the working directory
+#
+
+import sys, os, fnmatch, re
+
+
+DOC_PATTERN = '^(?P<indent>[\\t ]*)#\\[(\\s*)doc(\\s*)=' + \
+ '(\\s*)"(?P<text>(\\"|[^"])*?)"(\\s*)\\]' + \
+ '(?P<semi>;)?'
+
+ESCAPES = [("\\'", "'"),
+ ('\\"', '"'),
+ ("\\n", "\n"),
+ ("\\r", "\r"),
+ ("\\t", "\t")]
+
+
+def unescape(s):
+ for (find, repl) in ESCAPES:
+ s = s.replace(find, repl)
+ return s
+
+
+def block_trim(s):
+ lns = s.splitlines()
+
+ # remove leading/trailing whitespace-lines
+ while lns and not lns[0].strip():
+ lns = lns[1:]
+ while lns and not lns[-1].strip():
+ lns = lns[:-1]
+
+ # remove leading horizontal whitespace
+ n = sys.maxint
+ for ln in lns:
+ if ln.strip():
+ n = min(n, len(re.search('^\s*', ln).group()))
+ if n != sys.maxint:
+ lns = [ln[n:] for ln in lns]
+
+ # strip trailing whitespace
+ lns = [ln.rstrip() for ln in lns]
+
+ return lns
+
+
+def replace_doc(m):
+ indent = m.group('indent')
+ text = block_trim(unescape(m.group('text')))
+
+ if len(text) > 1:
+ inner = '!' if m.group('semi') else '*'
+ starify = lambda s: indent + ' *' + (' ' + s if s else '')
+ text = '\n'.join(map(starify, text))
+ repl = indent + '/*' + inner + '\n' + text + '\n' + indent + ' */'
+ else:
+ inner = '!' if m.group('semi') else '/'
+ repl = indent + '//' + inner + ' ' + text[0]
+
+ return repl
+
+
+def sugarise_file(path):
+ s = open(path).read()
+
+ r = re.compile(DOC_PATTERN, re.MULTILINE | re.DOTALL)
+ ns = re.sub(r, replace_doc, s)
+
+ if s != ns:
+ open(path, 'w').write(ns)
+
+
+for (dirpath, dirnames, filenames) in os.walk('.'):
+ for name in fnmatch.filter(filenames, '*.r[sc]'):
+ sugarise_file(os.path.join(dirpath, name))
+
all, any,
all_between, any_between,
map,
- each,
- each_char,
+ each, eachi,
+ each_char, each_chari,
bytes_iter,
chars_iter,
split_char_iter,
find_char, find_char_from, find_char_between,
rfind_char, rfind_char_from, rfind_char_between,
find_str, find_str_from, find_str_between,
- contains,
+ contains, contains_char,
starts_with,
ends_with,
#[doc = "Iterate over the bytes in a string"]
#[inline(always)]
pure fn each(s: str/&, it: fn(u8) -> bool) {
+ eachi(s, {|_i, b| it(b)})
+}
+
+#[doc = "Iterate over the bytes in a string, with indices"]
+#[inline(always)]
+pure fn eachi(s: str/&, it: fn(uint, u8) -> bool) {
let mut i = 0u, l = len(s);
while (i < l) {
- if !it(s[i]) { break; }
+ if !it(i, s[i]) { break; }
i += 1u;
}
}
#[doc = "Iterates over the chars in a string"]
#[inline(always)]
pure fn each_char(s: str/&, it: fn(char) -> bool) {
- let mut pos = 0u;
+ each_chari(s, {|_i, c| it(c)})
+}
+
+#[doc = "Iterates over the chars in a string, with indices"]
+#[inline(always)]
+pure fn each_chari(s: str/&, it: fn(uint, char) -> bool) {
+ let mut pos = 0u, ch_pos = 0u;
let len = len(s);
while pos < len {
let {ch, next} = char_range_at(s, pos);
pos = next;
- if !it(ch) { break; }
+ if !it(ch_pos, ch) { break; }
+ ch_pos += 1u;
}
}
option::is_some(find_str(haystack, needle))
}
+#[doc = "
+Returns true if a string contains a char.
+
+# Arguments
+
+* haystack - The string to look in
+* needle - The char to look for
+"]
+pure fn contains_char(haystack: str/&, needle: char) -> bool {
+ option::is_some(find_char(haystack, needle))
+}
+
#[doc = "
Returns true if one string starts with another
#[doc = "Returns true if one string contains another"]
#[inline]
fn contains(needle: str/&a) -> bool { contains(self, needle) }
+ #[doc = "Returns true if a string contains a char"]
+ #[inline]
+ fn contains_char(needle: char) -> bool { contains_char(self, needle) }
#[doc = "Iterate over the bytes in a string"]
#[inline]
fn each(it: fn(u8) -> bool) { each(self, it) }
+ #[doc = "Iterate over the bytes in a string, with indices"]
+ #[inline]
+ fn eachi(it: fn(uint, u8) -> bool) { eachi(self, it) }
#[doc = "Iterate over the chars in a string"]
#[inline]
fn each_char(it: fn(char) -> bool) { each_char(self, it) }
+ #[doc = "Iterate over the chars in a string, with indices"]
+ #[inline]
+ fn each_chari(it: fn(uint, char) -> bool) { each_chari(self, it) }
#[doc = "Returns true if one string ends with another"]
#[inline]
fn ends_with(needle: str/&) -> bool { ends_with(self, needle) }
assert !contains(data, "ไท华");
}
+ #[test]
+ fn test_contains_char() {
+ assert contains_char("abc", 'b');
+ assert contains_char("a", 'a');
+ assert !contains_char("abc", 'd');
+ assert !contains_char("", 'a');
+ }
+
#[test]
fn test_chars_iter() {
let mut i = 0;
#[auto_serialize]
enum attr_style { attr_outer, attr_inner, }
+// doc-comments are promoted to attributes that have is_sugared_doc = true
#[auto_serialize]
-type attribute_ = {style: attr_style, value: meta_item};
+type attribute_ = {style: attr_style, value: meta_item, is_sugared_doc: bool};
/*
iface_refs appear in both impls and in classes that implement ifaces.
import std::map::hashmap;
import either::either;
import diagnostic::span_handler;
-import ast_util::dummy_spanned;
+import ast_util::{spanned, dummy_spanned};
+import parse::comments::{doc_comment_style, strip_doc_comment_decoration};
// Constructors
export mk_name_value_item_str;
export mk_list_item;
export mk_word_item;
export mk_attr;
+export mk_sugared_doc_attr;
// Conversion
export attr_meta;
export attr_metas;
+export desugar_doc_attr;
// Accessors
export get_attr_name;
}
fn mk_attr(item: @ast::meta_item) -> ast::attribute {
- ret dummy_spanned({style: ast::attr_inner, value: *item});
+ ret dummy_spanned({style: ast::attr_inner, value: *item,
+ is_sugared_doc: false});
}
+fn mk_sugared_doc_attr(text: str, lo: uint, hi: uint) -> ast::attribute {
+ let lit = spanned(lo, hi, ast::lit_str(@text));
+ let attr = {
+ style: doc_comment_style(text),
+ value: spanned(lo, hi, ast::meta_name_value(@"doc", lit)),
+ is_sugared_doc: true
+ };
+ ret spanned(lo, hi, attr);
+}
/* Conversion */
ret mitems;
}
+fn desugar_doc_attr(attr: ast::attribute) -> ast::attribute {
+ if attr.node.is_sugared_doc {
+ let comment = get_meta_item_value_str(@attr.node.value).get();
+ let meta = mk_name_value_item_str(@"doc",
+ strip_doc_comment_decoration(*comment));
+ ret mk_attr(meta);
+ } else {
+ attr
+ }
+}
/* Accessors */
fn fold_attribute_(at: attribute, fld: ast_fold) ->
attribute {
ret {node: {style: at.node.style,
- value: *fold_meta_item_(@at.node.value, fld)},
+ value: *fold_meta_item_(@at.node.value, fld),
+ is_sugared_doc: at.node.is_sugared_doc },
span: fld.new_span(at.span)};
}
//used in noop_fold_foreign_item and noop_fold_fn_decl
-> attr_or_ext
{
let expect_item_next = vec::is_not_empty(first_item_attrs);
- if self.token == token::POUND {
+ alt self.token {
+ token::POUND {
let lo = self.span.lo;
if self.look_ahead(1u) == token::LBRACKET {
self.bump();
self.bump();
ret some(right(self.parse_syntax_ext_naked(lo)));
} else { ret none; }
- } else { ret none; }
+ }
+ token::DOC_COMMENT(_) {
+ ret some(left(self.parse_outer_attributes()));
+ }
+ _ {
+ ret none;
+ }
+ }
}
// Parse attributes that appear before an item
fn parse_outer_attributes() -> [ast::attribute]/~ {
let mut attrs: [ast::attribute]/~ = []/~;
- while self.token == token::POUND
- && self.look_ahead(1u) == token::LBRACKET {
- vec::push(attrs, self.parse_attribute(ast::attr_outer));
+ loop {
+ alt copy self.token {
+ token::POUND {
+ if self.look_ahead(1u) != token::LBRACKET {
+ break;
+ }
+ attrs += [self.parse_attribute(ast::attr_outer)]/~;
+ }
+ token::DOC_COMMENT(s) {
+ let attr = ::attr::mk_sugared_doc_attr(
+ *self.get_str(s), self.span.lo, self.span.hi);
+ if attr.node.style != ast::attr_outer {
+ self.fatal("expected outer comment");
+ }
+ attrs += [attr]/~;
+ self.bump();
+ }
+ _ {
+ break;
+ }
+ }
}
ret attrs;
}
let meta_item = self.parse_meta_item();
self.expect(token::RBRACKET);
let mut hi = self.span.hi;
- ret spanned(lo, hi, {style: style, value: *meta_item});
+ ret spanned(lo, hi, {style: style, value: *meta_item,
+ is_sugared_doc: false});
}
// Parse attributes that appear after the opening of an item, each
{inner: [ast::attribute]/~, next: [ast::attribute]/~} {
let mut inner_attrs: [ast::attribute]/~ = []/~;
let mut next_outer_attrs: [ast::attribute]/~ = []/~;
- while self.token == token::POUND {
- if self.look_ahead(1u) != token::LBRACKET {
- // This is an extension
- break;
- }
- let attr = self.parse_attribute(ast::attr_inner);
- if self.token == token::SEMI {
+ loop {
+ alt copy self.token {
+ token::POUND {
+ if self.look_ahead(1u) != token::LBRACKET {
+ // This is an extension
+ break;
+ }
+ let attr = self.parse_attribute(ast::attr_inner);
+ if self.token == token::SEMI {
+ self.bump();
+ inner_attrs += [attr]/~;
+ } else {
+ // It's not really an inner attribute
+ let outer_attr =
+ spanned(attr.span.lo, attr.span.hi,
+ {style: ast::attr_outer, value: attr.node.value,
+ is_sugared_doc: false});
+ next_outer_attrs += [outer_attr]/~;
+ break;
+ }
+ }
+ token::DOC_COMMENT(s) {
+ let attr = ::attr::mk_sugared_doc_attr(
+ *self.get_str(s), self.span.lo, self.span.hi);
self.bump();
- vec::push(inner_attrs, attr);
- } else {
- // It's not really an inner attribute
- let outer_attr =
- spanned(attr.span.lo, attr.span.hi,
- {style: ast::attr_outer, value: attr.node.value});
- vec::push(next_outer_attrs, outer_attr);
+ if attr.node.style == ast::attr_inner {
+ inner_attrs += [attr]/~;
+ } else {
+ next_outer_attrs += [attr]/~;
+ break;
+ }
+ }
+ _ {
break;
+ }
}
}
ret {inner: inner_attrs, next: next_outer_attrs};
export lit;
export cmnt_style;
export gather_comments_and_literals;
+export is_doc_comment, doc_comment_style, strip_doc_comment_decoration;
enum cmnt_style {
isolated, // No code on either side of each line of the comment
type cmnt = {style: cmnt_style, lines: [str]/~, pos: uint};
+fn is_doc_comment(s: str) -> bool {
+ s.starts_with("///") ||
+ s.starts_with("//!") ||
+ s.starts_with("/**") ||
+ s.starts_with("/*!")
+}
+
+fn doc_comment_style(comment: str) -> ast::attr_style {
+ assert is_doc_comment(comment);
+ if comment.starts_with("//!") || comment.starts_with("/*!") {
+ ast::attr_inner
+ } else {
+ ast::attr_outer
+ }
+}
+
+fn strip_doc_comment_decoration(comment: str) -> str {
+
+ /// remove whitespace-only lines from the start/end of lines
+ fn vertical_trim(lines: [str]/~) -> [str]/~ {
+ let mut i = 0u, j = lines.len();
+ while i < j && lines[i].trim().is_empty() {
+ i += 1u;
+ }
+ while j > i && lines[j - 1u].trim().is_empty() {
+ j -= 1u;
+ }
+ ret lines.slice(i, j);
+ }
+
+ // drop leftmost columns that contain only values in chars
+ fn block_trim(lines: [str]/~, chars: str, max: option<uint>) -> [str]/~ {
+
+ let mut i = max.get_default(uint::max_value);
+ for lines.each {|line|
+ if line.trim().is_empty() {
+ cont;
+ }
+ for line.each_chari {|j, c|
+ if j >= i {
+ break;
+ }
+ if !chars.contains_char(c) {
+ i = j;
+ break;
+ }
+ }
+ }
+
+ ret lines.map {|line|
+ let chars = str::chars(line);
+ if i > chars.len() {
+ ""
+ } else {
+ str::from_chars(chars.slice(i, chars.len()))
+ }
+ };
+ }
+
+ if comment.starts_with("//") {
+ ret comment.slice(3u, comment.len()).trim();
+ }
+
+ if comment.starts_with("/*") {
+ let lines = str::lines_any(comment.slice(3u, comment.len() - 2u));
+ let lines = vertical_trim(lines);
+ let lines = block_trim(lines, "\t ", none);
+ let lines = block_trim(lines, "*", some(1u));
+ let lines = block_trim(lines, "\t ", none);
+ ret str::connect(lines, "\n");
+ }
+
+ fail "not a doc-comment: " + comment;
+}
+
fn read_to_eol(rdr: string_reader) -> str {
let mut val = "";
while rdr.curr != '\n' && !is_eof(rdr) {
}
}
-fn read_shebang_comment(rdr: string_reader, code_to_the_left: bool) -> cmnt {
+
+fn read_shebang_comment(rdr: string_reader, code_to_the_left: bool,
+ &comments: [cmnt]/~) {
#debug(">>> shebang comment");
let p = rdr.chpos;
#debug("<<< shebang comment");
- ret {style: if code_to_the_left { trailing } else { isolated },
- lines: [read_one_line_comment(rdr)]/~,
- pos: p};
+ vec::push(comments, {
+ style: if code_to_the_left { trailing } else { isolated },
+ lines: [read_one_line_comment(rdr)]/~,
+ pos: p
+ });
}
-fn read_line_comments(rdr: string_reader, code_to_the_left: bool) -> cmnt {
+fn read_line_comments(rdr: string_reader, code_to_the_left: bool,
+ &comments: [cmnt]/~) {
#debug(">>> line comments");
let p = rdr.chpos;
let mut lines: [str]/~ = []/~;
while rdr.curr == '/' && nextch(rdr) == '/' {
let line = read_one_line_comment(rdr);
log(debug, line);
+ if is_doc_comment(line) { // doc-comments are not put in comments
+ break;
+ }
vec::push(lines, line);
consume_non_eol_whitespace(rdr);
}
#debug("<<< line comments");
- ret {style: if code_to_the_left { trailing } else { isolated },
- lines: lines,
- pos: p};
+ if !lines.is_empty() {
+ vec::push(comments, {
+ style: if code_to_the_left { trailing } else { isolated },
+ lines: lines,
+ pos: p
+ });
+ }
}
fn all_whitespace(s: str, begin: uint, end: uint) -> bool {
vec::push(lines, s1);
}
-fn read_block_comment(rdr: string_reader, code_to_the_left: bool) -> cmnt {
+fn read_block_comment(rdr: string_reader, code_to_the_left: bool,
+ &comments: [cmnt]/~) {
#debug(">>> block comment");
let p = rdr.chpos;
let mut lines: [str]/~ = []/~;
let mut col: uint = rdr.col;
bump(rdr);
bump(rdr);
+
+ // doc-comments are not really comments, they are attributes
+ if rdr.curr == '*' || rdr.curr == '!' {
+ while !(rdr.curr == '*' && nextch(rdr) == '/') && !is_eof(rdr) {
+ bump(rdr);
+ }
+ if !is_eof(rdr) {
+ bump(rdr);
+ bump(rdr);
+ }
+ ret;
+ }
+
let mut curr_line = "/*";
let mut level: int = 1;
while level > 0 {
style = mixed;
}
#debug("<<< block comment");
- ret {style: style, lines: lines, pos: p};
+ vec::push(comments, {style: style, lines: lines, pos: p});
}
fn peeking_at_comment(rdr: string_reader) -> bool {
&comments: [cmnt]/~) {
#debug(">>> consume comment");
if rdr.curr == '/' && nextch(rdr) == '/' {
- vec::push(comments, read_line_comments(rdr, code_to_the_left));
+ read_line_comments(rdr, code_to_the_left, comments);
} else if rdr.curr == '/' && nextch(rdr) == '*' {
- vec::push(comments, read_block_comment(rdr, code_to_the_left));
+ read_block_comment(rdr, code_to_the_left, comments);
} else if rdr.curr == '#' && nextch(rdr) == '!' {
- vec::push(comments, read_shebang_comment(rdr, code_to_the_left));
+ read_shebang_comment(rdr, code_to_the_left, comments);
} else { fail; }
#debug("<<< consume comment");
}
}
fn string_advance_token(&&r: string_reader) {
- consume_whitespace_and_comments(r);
+ for consume_whitespace_and_comments(r).each {|comment|
+ r.peek_tok = comment.tok;
+ r.peek_span = comment.sp;
+ ret;
+ }
if is_eof(r) {
r.peek_tok = token::EOF;
fn is_bin_digit(c: char) -> bool { ret c == '0' || c == '1'; }
-fn consume_whitespace_and_comments(rdr: string_reader) {
+// might return a sugared-doc-attr
+fn consume_whitespace_and_comments(rdr: string_reader)
+ -> option<{tok: token::token, sp: span}> {
while is_whitespace(rdr.curr) { bump(rdr); }
ret consume_any_line_comment(rdr);
}
-fn consume_any_line_comment(rdr: string_reader) {
+// might return a sugared-doc-attr
+fn consume_any_line_comment(rdr: string_reader)
+ -> option<{tok: token::token, sp: span}> {
if rdr.curr == '/' {
alt nextch(rdr) {
'/' {
- while rdr.curr != '\n' && !is_eof(rdr) { bump(rdr); }
- // Restart whitespace munch.
-
- ret consume_whitespace_and_comments(rdr);
+ bump(rdr);
+ bump(rdr);
+ // line comments starting with "///" or "//!" are doc-comments
+ if rdr.curr == '/' || rdr.curr == '!' {
+ let start_chpos = rdr.chpos - 2u;
+ let mut acc = "//";
+ while rdr.curr != '\n' && !is_eof(rdr) {
+ str::push_char(acc, rdr.curr);
+ bump(rdr);
+ }
+ ret some({
+ tok: token::DOC_COMMENT(intern(*rdr.interner, @acc)),
+ sp: ast_util::mk_sp(start_chpos, rdr.chpos)
+ });
+ } else {
+ while rdr.curr != '\n' && !is_eof(rdr) { bump(rdr); }
+ // Restart whitespace munch.
+ ret consume_whitespace_and_comments(rdr);
+ }
}
'*' { bump(rdr); bump(rdr); ret consume_block_comment(rdr); }
- _ { ret; }
+ _ {}
}
} else if rdr.curr == '#' {
if nextch(rdr) == '!' {
}
}
}
+ ret none;
}
-fn consume_block_comment(rdr: string_reader) {
+// might return a sugared-doc-attr
+fn consume_block_comment(rdr: string_reader)
+ -> option<{tok: token::token, sp: span}> {
+
+ // block comments starting with "/**" or "/*!" are doc-comments
+ if rdr.curr == '*' || rdr.curr == '!' {
+ let start_chpos = rdr.chpos - 2u;
+ let mut acc = "/*";
+ while !(rdr.curr == '*' && nextch(rdr) == '/') && !is_eof(rdr) {
+ str::push_char(acc, rdr.curr);
+ bump(rdr);
+ }
+ if is_eof(rdr) {
+ rdr.fatal("unterminated block doc-comment");
+ } else {
+ acc += "*/";
+ bump(rdr);
+ bump(rdr);
+ ret some({
+ tok: token::DOC_COMMENT(intern(*rdr.interner, @acc)),
+ sp: ast_util::mk_sp(start_chpos, rdr.chpos)
+ });
+ }
+ }
+
let mut level: int = 1;
while level > 0 {
if is_eof(rdr) { rdr.fatal("unterminated block comment"); }
//ACTUALLY(whole_nonterminal),
+ DOC_COMMENT(str_num),
EOF,
}
+ str::escape_default(*interner::get(in, s))
+ "\""
}
+
/* Name components */
IDENT(s, _) {
*interner::get(in, s)
}
UNDERSCORE { "_" }
+
+ /* Other */
+ DOC_COMMENT(s) { *interner::get(in, s) }
EOF { "<eof>" }
}
}
alt attr.node.style {
ast::attr_inner {
print_attribute(s, attr);
- word(s.s, ";");
+ if !attr.node.is_sugared_doc {
+ word(s.s, ";");
+ }
count += 1;
}
_ {/* fallthrough */ }
fn print_attribute(s: ps, attr: ast::attribute) {
hardbreak_if_not_bol(s);
maybe_print_comment(s, attr.span.lo);
- word(s.s, "#[");
- print_meta_item(s, @attr.node.value);
- word(s.s, "]");
+ if attr.node.is_sugared_doc {
+ let meta = attr::attr_meta(attr);
+ let comment = attr::get_meta_item_value_str(meta).get();
+ word(s.s, *comment);
+ } else {
+ word(s.s, "#[");
+ print_meta_item(s, @attr.node.value);
+ word(s.s, "]");
+ }
}
ast::meta_name_value(
@"crate_type",
ast_util::respan(ast_util::dummy_sp(),
- ast::lit_str(@t))))
+ ast::lit_str(@t)))),
+ is_sugared_doc: false
})
}
assert (vec::len(meta_items) == 1u);
let meta_item = meta_items[0];
vec::push(attrs,
- {node: {style: ast::attr_outer, value: *meta_item},
+ {node: {style: ast::attr_outer, value: *meta_item,
+ is_sugared_doc: false},
span: ast_util::dummy_sp()});
};
}
doc attribute"];
let doc_attrs = attr::find_attrs_by_name(attrs, "doc");
- let doc_metas = attr::attr_metas(doc_attrs);
+ let doc_metas = doc_attrs.map {|attr|
+ attr::attr_meta(attr::desugar_doc_attr(attr))
+ };
+
if vec::is_not_empty(doc_metas) {
if vec::len(doc_metas) != 1u {
#warn("ignoring %u doc attributes", vec::len(doc_metas) - 1u);
--- /dev/null
+// pp-exact
+
+// some single-line non-doc comment
+
+/// some single line outer-docs
+fn a() { }
+
+fn b() {
+ //! some single line inner-docs
+}
+
+/*
+ * some multi-line non-doc comment
+ */
+
+/**
+ * some multi-line outer-docs
+ */
+fn c() { }
+
+fn d() {
+ /*!
+ * some multi-line inner-docs
+ */
+}
+
+#[doc = "unsugared outer doc-comments work also"]
+fn e() { }
+
+fn f() {
+ #[doc = "as do inner ones"];
+}