1 /// Dealing with sting indices can be hard, this struct ensures that both the
2 /// character and byte index are provided for correct indexing.
3 #[derive(Debug, Default, PartialEq, Eq)]
10 pub fn new(char_index: usize, byte_index: usize) -> Self {
11 Self { char_index, byte_index }
15 /// Returns the index of the character after the first camel-case component of `s`.
18 /// # use clippy_utils::str_utils::{camel_case_until, StrIndex};
19 /// assert_eq!(camel_case_until("AbcDef"), StrIndex::new(6, 6));
20 /// assert_eq!(camel_case_until("ABCD"), StrIndex::new(0, 0));
21 /// assert_eq!(camel_case_until("AbcDD"), StrIndex::new(3, 3));
22 /// assert_eq!(camel_case_until("Abc\u{f6}\u{f6}DD"), StrIndex::new(5, 7));
25 pub fn camel_case_until(s: &str) -> StrIndex {
26 let mut iter = s.char_indices().enumerate();
27 if let Some((_char_index, (_, first))) = iter.next() {
28 if !first.is_uppercase() {
29 return StrIndex::new(0, 0);
32 return StrIndex::new(0, 0);
35 let mut last_index = StrIndex::new(0, 0);
36 for (char_index, (byte_index, c)) in iter {
43 } else if c.is_uppercase() {
45 last_index.byte_index = byte_index;
46 last_index.char_index = char_index;
47 } else if !c.is_lowercase() {
48 return StrIndex::new(char_index, byte_index);
55 StrIndex::new(s.chars().count(), s.len())
59 /// Returns index of the first camel-case component of `s`.
62 /// # use clippy_utils::str_utils::{camel_case_start, StrIndex};
63 /// assert_eq!(camel_case_start("AbcDef"), StrIndex::new(0, 0));
64 /// assert_eq!(camel_case_start("abcDef"), StrIndex::new(3, 3));
65 /// assert_eq!(camel_case_start("ABCD"), StrIndex::new(4, 4));
66 /// assert_eq!(camel_case_start("abcd"), StrIndex::new(4, 4));
67 /// assert_eq!(camel_case_start("\u{f6}\u{f6}cd"), StrIndex::new(4, 6));
70 pub fn camel_case_start(s: &str) -> StrIndex {
71 camel_case_start_from_idx(s, 0)
74 /// Returns `StrIndex` of the last camel-case component of `s[idx..]`.
77 /// # use clippy_utils::str_utils::{camel_case_start_from_idx, StrIndex};
78 /// assert_eq!(camel_case_start_from_idx("AbcDef", 0), StrIndex::new(0, 0));
79 /// assert_eq!(camel_case_start_from_idx("AbcDef", 1), StrIndex::new(3, 3));
80 /// assert_eq!(camel_case_start_from_idx("AbcDefGhi", 0), StrIndex::new(0, 0));
81 /// assert_eq!(camel_case_start_from_idx("AbcDefGhi", 1), StrIndex::new(3, 3));
82 /// assert_eq!(camel_case_start_from_idx("Abcdefg", 1), StrIndex::new(7, 7));
84 pub fn camel_case_start_from_idx(s: &str, start_idx: usize) -> StrIndex {
85 let char_count = s.chars().count();
86 let range = 0..char_count;
87 let mut iter = range.rev().zip(s.char_indices().rev());
88 if let Some((_, (_, first))) = iter.next() {
89 if !first.is_lowercase() {
90 return StrIndex::new(char_count, s.len());
93 return StrIndex::new(char_count, s.len());
97 let mut last_index = StrIndex::new(char_count, s.len());
98 for (char_index, (byte_index, c)) in iter {
99 if byte_index < start_idx {
103 if c.is_uppercase() {
105 last_index.byte_index = byte_index;
106 last_index.char_index = char_index;
107 } else if !c.is_lowercase() {
110 } else if c.is_lowercase() {
112 } else if c.is_uppercase() {
113 last_index.byte_index = byte_index;
114 last_index.char_index = char_index;
123 /// Get the indexes of camel case components of a string `s`
126 /// # use clippy_utils::str_utils::{camel_case_indices, StrIndex};
128 /// camel_case_indices("AbcDef"),
129 /// vec![StrIndex::new(0, 0), StrIndex::new(3, 3), StrIndex::new(6, 6)]
132 /// camel_case_indices("abcDef"),
133 /// vec![StrIndex::new(3, 3), StrIndex::new(6, 6)]
136 pub fn camel_case_indices(s: &str) -> Vec<StrIndex> {
137 let mut result = Vec::new();
138 let mut str_idx = camel_case_start(s);
140 while str_idx.byte_index < s.len() {
141 let next_idx = str_idx.byte_index + 1;
142 result.push(str_idx);
143 str_idx = camel_case_start_from_idx(s, next_idx);
145 result.push(str_idx);
150 /// Split camel case string into a vector of its components
153 /// # use clippy_utils::str_utils::{camel_case_split, StrIndex};
154 /// assert_eq!(camel_case_split("AbcDef"), vec!["Abc", "Def"]);
156 pub fn camel_case_split(s: &str) -> Vec<&str> {
157 let mut offsets = camel_case_indices(s)
159 .map(|e| e.byte_index)
160 .collect::<Vec<usize>>();
162 offsets.insert(0, 0);
165 offsets.windows(2).map(|w| &s[w[0]..w[1]]).collect()
168 /// Dealing with sting comparison can be complicated, this struct ensures that both the
169 /// character and byte count are provided for correct indexing.
170 #[derive(Debug, Default, PartialEq, Eq)]
171 pub struct StrCount {
172 pub char_count: usize,
173 pub byte_count: usize,
177 pub fn new(char_count: usize, byte_count: usize) -> Self {
178 Self { char_count, byte_count }
182 /// Returns the number of chars that match from the start
185 /// # use clippy_utils::str_utils::{count_match_start, StrCount};
186 /// assert_eq!(count_match_start("hello_mouse", "hello_penguin"), StrCount::new(6, 6));
187 /// assert_eq!(count_match_start("hello_clippy", "bye_bugs"), StrCount::new(0, 0));
188 /// assert_eq!(count_match_start("hello_world", "hello_world"), StrCount::new(11, 11));
189 /// assert_eq!(count_match_start("T\u{f6}ffT\u{f6}ff", "T\u{f6}ff"), StrCount::new(4, 5));
192 pub fn count_match_start(str1: &str, str2: &str) -> StrCount {
193 // (char_index, char1)
194 let char_count = str1.chars().count();
195 let iter1 = (0..=char_count).zip(str1.chars());
196 // (byte_index, char2)
197 let iter2 = str2.char_indices();
201 .take_while(|((_, c1), (_, c2))| c1 == c2)
203 .map_or_else(StrCount::default, |((char_index, _), (byte_index, character))| {
204 StrCount::new(char_index + 1, byte_index + character.len_utf8())
208 /// Returns the number of chars and bytes that match from the end
211 /// # use clippy_utils::str_utils::{count_match_end, StrCount};
212 /// assert_eq!(count_match_end("hello_cat", "bye_cat"), StrCount::new(4, 4));
213 /// assert_eq!(count_match_end("if_item_thing", "enum_value"), StrCount::new(0, 0));
214 /// assert_eq!(count_match_end("Clippy", "Clippy"), StrCount::new(6, 6));
215 /// assert_eq!(count_match_end("MyT\u{f6}ff", "YourT\u{f6}ff"), StrCount::new(4, 5));
218 pub fn count_match_end(str1: &str, str2: &str) -> StrCount {
219 let char_count = str1.chars().count();
221 return StrCount::default();
224 // (char_index, char1)
225 let iter1 = (0..char_count).rev().zip(str1.chars().rev());
226 // (byte_index, char2)
227 let byte_count = str2.len();
228 let iter2 = str2.char_indices().rev();
232 .take_while(|((_, c1), (_, c2))| c1 == c2)
234 .map_or_else(StrCount::default, |((char_index, _), (byte_index, _))| {
235 StrCount::new(char_count - char_index, byte_count - byte_index)
244 fn camel_case_start_full() {
245 assert_eq!(camel_case_start("AbcDef"), StrIndex::new(0, 0));
246 assert_eq!(camel_case_start("Abc"), StrIndex::new(0, 0));
247 assert_eq!(camel_case_start("ABcd"), StrIndex::new(0, 0));
248 assert_eq!(camel_case_start("ABcdEf"), StrIndex::new(0, 0));
249 assert_eq!(camel_case_start("AabABcd"), StrIndex::new(0, 0));
253 fn camel_case_start_partial() {
254 assert_eq!(camel_case_start("abcDef"), StrIndex::new(3, 3));
255 assert_eq!(camel_case_start("aDbc"), StrIndex::new(1, 1));
256 assert_eq!(camel_case_start("aabABcd"), StrIndex::new(3, 3));
257 assert_eq!(camel_case_start("\u{f6}\u{f6}AabABcd"), StrIndex::new(2, 4));
261 fn camel_case_start_not() {
262 assert_eq!(camel_case_start("AbcDef_"), StrIndex::new(7, 7));
263 assert_eq!(camel_case_start("AbcDD"), StrIndex::new(5, 5));
264 assert_eq!(camel_case_start("all_small"), StrIndex::new(9, 9));
265 assert_eq!(camel_case_start("\u{f6}_all_small"), StrIndex::new(11, 12));
269 fn camel_case_start_caps() {
270 assert_eq!(camel_case_start("ABCD"), StrIndex::new(4, 4));
274 fn camel_case_until_full() {
275 assert_eq!(camel_case_until("AbcDef"), StrIndex::new(6, 6));
276 assert_eq!(camel_case_until("Abc"), StrIndex::new(3, 3));
277 assert_eq!(camel_case_until("Abc\u{f6}\u{f6}\u{f6}"), StrIndex::new(6, 9));
281 fn camel_case_until_not() {
282 assert_eq!(camel_case_until("abcDef"), StrIndex::new(0, 0));
283 assert_eq!(camel_case_until("aDbc"), StrIndex::new(0, 0));
287 fn camel_case_until_partial() {
288 assert_eq!(camel_case_until("AbcDef_"), StrIndex::new(6, 6));
289 assert_eq!(camel_case_until("CallTypeC"), StrIndex::new(8, 8));
290 assert_eq!(camel_case_until("AbcDD"), StrIndex::new(3, 3));
291 assert_eq!(camel_case_until("Abc\u{f6}\u{f6}DD"), StrIndex::new(5, 7));
296 assert_eq!(camel_case_until("ABCD"), StrIndex::new(0, 0));
300 fn camel_case_start_from_idx_full() {
301 assert_eq!(camel_case_start_from_idx("AbcDef", 0), StrIndex::new(0, 0));
302 assert_eq!(camel_case_start_from_idx("AbcDef", 1), StrIndex::new(3, 3));
303 assert_eq!(camel_case_start_from_idx("AbcDef", 4), StrIndex::new(6, 6));
304 assert_eq!(camel_case_start_from_idx("AbcDefGhi", 0), StrIndex::new(0, 0));
305 assert_eq!(camel_case_start_from_idx("AbcDefGhi", 1), StrIndex::new(3, 3));
306 assert_eq!(camel_case_start_from_idx("Abcdefg", 1), StrIndex::new(7, 7));
310 fn camel_case_indices_full() {
311 assert_eq!(camel_case_indices("Abc\u{f6}\u{f6}DD"), vec![StrIndex::new(7, 9)]);
315 fn camel_case_split_full() {
316 assert_eq!(camel_case_split("A"), vec!["A"]);
317 assert_eq!(camel_case_split("AbcDef"), vec!["Abc", "Def"]);
318 assert_eq!(camel_case_split("Abc"), vec!["Abc"]);
319 assert_eq!(camel_case_split("abcDef"), vec!["abc", "Def"]);
321 camel_case_split("\u{f6}\u{f6}AabABcd"),
322 vec!["\u{f6}\u{f6}", "Aab", "A", "Bcd"]