src/tools/clippy/clippy_lints/src/tabs_in_doc_comments.rs

   1 use clippy_utils::diagnostics::span_lint_and_sugg;
   2 use rustc_ast::ast;
   3 use rustc_errors::Applicability;
   4 use rustc_lint::{EarlyContext, EarlyLintPass};
   5 use rustc_session::{declare_lint_pass, declare_tool_lint};
   6 use rustc_span::source_map::{BytePos, Span};
   7
   8 declare_clippy_lint! {
   9     /// ### What it does
  10     /// Checks doc comments for usage of tab characters.
  11     ///
  12     /// ### Why is this bad?
  13     /// The rust style-guide promotes spaces instead of tabs for indentation.
  14     /// To keep a consistent view on the source, also doc comments should not have tabs.
  15     /// Also, explaining ascii-diagrams containing tabs can get displayed incorrectly when the
  16     /// display settings of the author and reader differ.
  17     ///
  18     /// ### Example
  19     /// ```rust
  20     /// ///
  21     /// /// Struct to hold two strings:
  22     /// ///     - first         one
  23     /// ///     - second        one
  24     /// pub struct DoubleString {
  25     ///    ///
  26     ///    ///  - First String:
  27     ///    ///          - needs to be inside here
  28     ///    first_string: String,
  29     ///    ///
  30     ///    ///  - Second String:
  31     ///    ///          - needs to be inside here
  32     ///    second_string: String,
  33     ///}
  34     /// ```
  35     ///
  36     /// Will be converted to:
  37     /// ```rust
  38     /// ///
  39     /// /// Struct to hold two strings:
  40     /// ///     - first        one
  41     /// ///     - second    one
  42     /// pub struct DoubleString {
  43     ///    ///
  44     ///    ///     - First String:
  45     ///    ///         - needs to be inside here
  46     ///    first_string: String,
  47     ///    ///
  48     ///    ///     - Second String:
  49     ///    ///         - needs to be inside here
  50     ///    second_string: String,
  51     ///}
  52     /// ```
  53     #[clippy::version = "1.41.0"]
  54     pub TABS_IN_DOC_COMMENTS,
  55     style,
  56     "using tabs in doc comments is not recommended"
  57 }
  58
  59 declare_lint_pass!(TabsInDocComments => [TABS_IN_DOC_COMMENTS]);
  60
  61 impl TabsInDocComments {
  62     fn warn_if_tabs_in_doc(cx: &EarlyContext<'_>, attr: &ast::Attribute) {
  63         if let ast::AttrKind::DocComment(_, comment) = attr.kind {
  64             let comment = comment.as_str();
  65
  66             for (lo, hi) in get_chunks_of_tabs(comment) {
  67                 // +3 skips the opening delimiter
  68                 let new_span = Span::new(
  69                     attr.span.lo() + BytePos(3 + lo),
  70                     attr.span.lo() + BytePos(3 + hi),
  71                     attr.span.ctxt(),
  72                     attr.span.parent(),
  73                 );
  74                 span_lint_and_sugg(
  75                     cx,
  76                     TABS_IN_DOC_COMMENTS,
  77                     new_span,
  78                     "using tabs in doc comments is not recommended",
  79                     "consider using four spaces per tab",
  80                     "    ".repeat((hi - lo) as usize),
  81                     Applicability::MaybeIncorrect,
  82                 );
  83             }
  84         }
  85     }
  86 }
  87
  88 impl EarlyLintPass for TabsInDocComments {
  89     fn check_attribute(&mut self, cx: &EarlyContext<'_>, attribute: &ast::Attribute) {
  90         Self::warn_if_tabs_in_doc(cx, attribute);
  91     }
  92 }
  93
  94 ///
  95 /// scans the string for groups of tabs and returns the start(inclusive) and end positions
  96 /// (exclusive) of all groups
  97 /// e.g. "sd\tasd\t\taa" will be converted to [(2, 3), (6, 8)] as
  98 ///       012 3456 7 89
  99 ///         ^-^  ^---^
 100 fn get_chunks_of_tabs(the_str: &str) -> Vec<(u32, u32)> {
 101     let line_length_way_to_long = "doc comment longer than 2^32 chars";
 102     let mut spans: Vec<(u32, u32)> = vec![];
 103     let mut current_start: u32 = 0;
 104
 105     // tracker to decide if the last group of tabs is not closed by a non-tab character
 106     let mut is_active = false;
 107
 108     // Note that we specifically need the char _byte_ indices here, not the positional indexes
 109     // within the char array to deal with multi-byte characters properly. `char_indices` does
 110     // exactly that. It provides an iterator over tuples of the form `(byte position, char)`.
 111     let char_indices: Vec<_> = the_str.char_indices().collect();
 112
 113     if let [(_, '\t')] = char_indices.as_slice() {
 114         return vec![(0, 1)];
 115     }
 116
 117     for entry in char_indices.windows(2) {
 118         match entry {
 119             [(_, '\t'), (_, '\t')] => {
 120                 // either string starts with double tab, then we have to set it active,
 121                 // otherwise is_active is true anyway
 122                 is_active = true;
 123             },
 124             [(_, _), (index_b, '\t')] => {
 125                 // as ['\t', '\t'] is excluded, this has to be a start of a tab group,
 126                 // set indices accordingly
 127                 is_active = true;
 128                 current_start = u32::try_from(*index_b).unwrap();
 129             },
 130             [(_, '\t'), (index_b, _)] => {
 131                 // this now has to be an end of the group, hence we have to push a new tuple
 132                 is_active = false;
 133                 spans.push((current_start, u32::try_from(*index_b).unwrap()));
 134             },
 135             _ => {},
 136         }
 137     }
 138
 139     // only possible when tabs are at the end, insert last group
 140     if is_active {
 141         spans.push((
 142             current_start,
 143             u32::try_from(char_indices.last().unwrap().0 + 1).expect(line_length_way_to_long),
 144         ));
 145     }
 146
 147     spans
 148 }
 149
 150 #[cfg(test)]
 151 mod tests_for_get_chunks_of_tabs {
 152     use super::get_chunks_of_tabs;
 153
 154     #[test]
 155     fn test_unicode_han_string() {
 156         let res = get_chunks_of_tabs(" \u{4f4d}\t");
 157
 158         assert_eq!(res, vec![(4, 5)]);
 159     }
 160
 161     #[test]
 162     fn test_empty_string() {
 163         let res = get_chunks_of_tabs("");
 164
 165         assert_eq!(res, vec![]);
 166     }
 167
 168     #[test]
 169     fn test_simple() {
 170         let res = get_chunks_of_tabs("sd\t\t\taa");
 171
 172         assert_eq!(res, vec![(2, 5)]);
 173     }
 174
 175     #[test]
 176     fn test_only_t() {
 177         let res = get_chunks_of_tabs("\t\t");
 178
 179         assert_eq!(res, vec![(0, 2)]);
 180     }
 181
 182     #[test]
 183     fn test_only_one_t() {
 184         let res = get_chunks_of_tabs("\t");
 185
 186         assert_eq!(res, vec![(0, 1)]);
 187     }
 188
 189     #[test]
 190     fn test_double() {
 191         let res = get_chunks_of_tabs("sd\tasd\t\taa");
 192
 193         assert_eq!(res, vec![(2, 3), (6, 8)]);
 194     }
 195
 196     #[test]
 197     fn test_start() {
 198         let res = get_chunks_of_tabs("\t\taa");
 199
 200         assert_eq!(res, vec![(0, 2)]);
 201     }
 202
 203     #[test]
 204     fn test_end() {
 205         let res = get_chunks_of_tabs("aa\t\t");
 206
 207         assert_eq!(res, vec![(2, 4)]);
 208     }
 209
 210     #[test]
 211     fn test_start_single() {
 212         let res = get_chunks_of_tabs("\taa");
 213
 214         assert_eq!(res, vec![(0, 1)]);
 215     }
 216
 217     #[test]
 218     fn test_end_single() {
 219         let res = get_chunks_of_tabs("aa\t");
 220
 221         assert_eq!(res, vec![(2, 3)]);
 222     }
 223
 224     #[test]
 225     fn test_no_tabs() {
 226         let res = get_chunks_of_tabs("dsfs");
 227
 228         assert_eq!(res, vec![]);
 229     }
 230 }