Shrink `Token`.

[rust.git] / clippy_lints / src / tabs_in_doc_comments.rs
diff --git a/clippy_lints/src/tabs_in_doc_comments.rs b/clippy_lints/src/tabs_in_doc_comments.rs

index 41e3f9918cc9b05d79db88272750c9ac2c3a1a09..e223aea297fc478bf9816be4ec8f4e73a65b6391 100644 (file)
--- a/clippy_lints/src/tabs_in_doc_comments.rs
+++ b/clippy_lints/src/tabs_in_doc_comments.rs
@@ -1,22 +1,21 @@
-use crate::utils::span_lint_and_sugg;
+use clippy_utils::diagnostics::span_lint_and_sugg;
+use rustc_ast::ast;
  use rustc_errors::Applicability;
  use rustc_lint::{EarlyContext, EarlyLintPass};
  use rustc_session::{declare_lint_pass, declare_tool_lint};
  use rustc_span::source_map::{BytePos, Span};
-use std::convert::TryFrom;
-use syntax::ast;
  
  declare_clippy_lint! {
-    /// **What it does:** Checks doc comments for usage of tab characters.
+    /// ### What it does
+    /// Checks doc comments for usage of tab characters.
      ///
-    /// **Why is this bad?** The rust style-guide promotes spaces instead of tabs for indentation.
+    /// ### Why is this bad?
+    /// The rust style-guide promotes spaces instead of tabs for indentation.
      /// To keep a consistent view on the source, also doc comments should not have tabs.
      /// Also, explaining ascii-diagrams containing tabs can get displayed incorrectly when the
      /// display settings of the author and reader differ.
      ///
-    /// **Known problems:** None.
-    ///
-    /// **Example:**
+    /// ### Example
      /// ```rust
      /// ///
      /// /// Struct to hold two strings:
@@ -51,6 +50,7 @@
      ///    second_string: String,
      ///}
      /// ```
+    #[clippy::version = "1.41.0"]
      pub TABS_IN_DOC_COMMENTS,
      style,
      "using tabs in doc comments is not recommended"
@@ -60,14 +60,16 @@
  
  impl TabsInDocComments {
      fn warn_if_tabs_in_doc(cx: &EarlyContext<'_>, attr: &ast::Attribute) {
-        if let ast::AttrKind::DocComment(comment) = attr.kind {
+        if let ast::AttrKind::DocComment(_, comment) = attr.kind {
              let comment = comment.as_str();
  
-            for (lo, hi) in get_chunks_of_tabs(&comment) {
+            for (lo, hi) in get_chunks_of_tabs(comment) {
+                // +3 skips the opening delimiter
                  let new_span = Span::new(
-                    attr.span.lo() + BytePos(lo),
-                    attr.span.lo() + BytePos(hi),
+                    attr.span.lo() + BytePos(3 + lo),
+                    attr.span.lo() + BytePos(3 + hi),
                      attr.span.ctxt(),
+                    attr.span.parent(),
                  );
                  span_lint_and_sugg(
                      cx,
@@ -85,7 +87,7 @@ fn warn_if_tabs_in_doc(cx: &EarlyContext<'_>, attr: &ast::Attribute) {
  
  impl EarlyLintPass for TabsInDocComments {
      fn check_attribute(&mut self, cx: &EarlyContext<'_>, attribute: &ast::Attribute) {
-        Self::warn_if_tabs_in_doc(cx, &attribute);
+        Self::warn_if_tabs_in_doc(cx, attribute);
      }
  }
  
@@ -103,30 +105,32 @@ fn get_chunks_of_tabs(the_str: &str) -> Vec<(u32, u32)> {
      // tracker to decide if the last group of tabs is not closed by a non-tab character
      let mut is_active = false;
  
-    let chars_array: Vec<_> = the_str.chars().collect();
+    // Note that we specifically need the char _byte_ indices here, not the positional indexes
+    // within the char array to deal with multi-byte characters properly. `char_indices` does
+    // exactly that. It provides an iterator over tuples of the form `(byte position, char)`.
+    let char_indices: Vec<_> = the_str.char_indices().collect();
  
-    if chars_array == vec!['\t'] {
+    if let [(_, '\t')] = char_indices.as_slice() {
          return vec![(0, 1)];
      }
  
-    for (index, arr) in chars_array.windows(2).enumerate() {
-        let index = u32::try_from(index).expect(line_length_way_to_long);
-        match arr {
-            ['\t', '\t'] => {
+    for entry in char_indices.windows(2) {
+        match entry {
+            [(_, '\t'), (_, '\t')] => {
                  // either string starts with double tab, then we have to set it active,
                  // otherwise is_active is true anyway
                  is_active = true;
              },
-            [_, '\t'] => {
+            [(_, _), (index_b, '\t')] => {
                  // as ['\t', '\t'] is excluded, this has to be a start of a tab group,
                  // set indices accordingly
                  is_active = true;
-                current_start = index + 1;
+                current_start = u32::try_from(*index_b).unwrap();
              },
-            ['\t', _] => {
+            [(_, '\t'), (index_b, _)] => {
                  // this now has to be an end of the group, hence we have to push a new tuple
                  is_active = false;
-                spans.push((current_start, index + 1));
+                spans.push((current_start, u32::try_from(*index_b).unwrap()));
              },
              _ => {},
          }
@@ -136,7 +140,7 @@ fn get_chunks_of_tabs(the_str: &str) -> Vec<(u32, u32)> {
      if is_active {
          spans.push((
              current_start,
-            u32::try_from(the_str.chars().count()).expect(line_length_way_to_long),
+            u32::try_from(char_indices.last().unwrap().0 + 1).expect(line_length_way_to_long),
          ));
      }
  
@@ -147,6 +151,13 @@ fn get_chunks_of_tabs(the_str: &str) -> Vec<(u32, u32)> {
  mod tests_for_get_chunks_of_tabs {
      use super::get_chunks_of_tabs;
  
+    #[test]
+    fn test_unicode_han_string() {
+        let res = get_chunks_of_tabs(" \u{4f4d}\t");
+
+        assert_eq!(res, vec![(4, 5)]);
+    }
+
      #[test]
      fn test_empty_string() {
          let res = get_chunks_of_tabs("");