Auto merge of #7873 - xFrednet:7869-string-index-ice, r=Manishearth

Update `str` utils to prevent ICEs and FNs This PR reworks some string handling for lints regarding enum naming. I hope the refactoring will prevent future ICEs and help with new bug free implementations. It might be better to review this PR by going through the commits, as `clippy_utils::camel_case` was renamed to `clippy_utils::str_utils` and then changed further. GH sadly doesn't really make the changes that obvious 🙃 Not too much more to say. Have a nice day 🌞 --- Fixes: rust-lang/rust-clippy#7869 changelog: ICE Fix: [`enum_variant_names`] #7869
2021-10-25 19:20:05 +00:00
parent 4f46f4266a 7517ae2fb3
commit 3ef1f19d60
8 changed files with 275 additions and 158 deletions
--- a/clippy_utils/src/camel_case.rs
+++ b/clippy_utils/src/camel_case.rs
@@ -1,117 +0,0 @@
-/// Returns the index of the character after the first camel-case component of `s`.
-#[must_use]
-pub fn until(s: &str) -> usize {
-    let mut iter = s.char_indices();
-    if let Some((_, first)) = iter.next() {
-        if !first.is_uppercase() {
-            return 0;
-        }
-    } else {
-        return 0;
-    }
-    let mut up = true;
-    let mut last_i = 0;
-    for (i, c) in iter {
-        if up {
-            if c.is_lowercase() {
-                up = false;
-            } else {
-                return last_i;
-            }
-        } else if c.is_uppercase() {
-            up = true;
-            last_i = i;
-        } else if !c.is_lowercase() {
-            return i;
-        }
-    }
-    if up { last_i } else { s.len() }
-}
-
-/// Returns index of the last camel-case component of `s`.
-#[must_use]
-pub fn from(s: &str) -> usize {
-    let mut iter = s.char_indices().rev();
-    if let Some((_, first)) = iter.next() {
-        if !first.is_lowercase() {
-            return s.len();
-        }
-    } else {
-        return s.len();
-    }
-    let mut down = true;
-    let mut last_i = s.len();
-    for (i, c) in iter {
-        if down {
-            if c.is_uppercase() {
-                down = false;
-                last_i = i;
-            } else if !c.is_lowercase() {
-                return last_i;
-            }
-        } else if c.is_lowercase() {
-            down = true;
-        } else if c.is_uppercase() {
-            last_i = i;
-        } else {
-            return last_i;
-        }
-    }
-    last_i
-}
-
-#[cfg(test)]
-mod test {
-    use super::{from, until};
-
-    #[test]
-    fn from_full() {
-        assert_eq!(from("AbcDef"), 0);
-        assert_eq!(from("Abc"), 0);
-        assert_eq!(from("ABcd"), 0);
-        assert_eq!(from("ABcdEf"), 0);
-        assert_eq!(from("AabABcd"), 0);
-    }
-
-    #[test]
-    fn from_partial() {
-        assert_eq!(from("abcDef"), 3);
-        assert_eq!(from("aDbc"), 1);
-        assert_eq!(from("aabABcd"), 3);
-    }
-
-    #[test]
-    fn from_not() {
-        assert_eq!(from("AbcDef_"), 7);
-        assert_eq!(from("AbcDD"), 5);
-    }
-
-    #[test]
-    fn from_caps() {
-        assert_eq!(from("ABCD"), 4);
-    }
-
-    #[test]
-    fn until_full() {
-        assert_eq!(until("AbcDef"), 6);
-        assert_eq!(until("Abc"), 3);
-    }
-
-    #[test]
-    fn until_not() {
-        assert_eq!(until("abcDef"), 0);
-        assert_eq!(until("aDbc"), 0);
-    }
-
-    #[test]
-    fn until_partial() {
-        assert_eq!(until("AbcDef_"), 6);
-        assert_eq!(until("CallTypeC"), 8);
-        assert_eq!(until("AbcDD"), 3);
-    }
-
-    #[test]
-    fn until_caps() {
-        assert_eq!(until("ABCD"), 0);
-    }
-}
--- a/clippy_utils/src/lib.rs
+++ b/clippy_utils/src/lib.rs
@@ -37,7 +37,6 @@ pub mod sym_helper;
 #[allow(clippy::module_name_repetitions)]
 pub mod ast_utils;
 pub mod attrs;
-pub mod camel_case;
 pub mod comparisons;
 pub mod consts;
 pub mod diagnostics;
@@ -50,6 +49,7 @@ pub mod paths;
 pub mod ptr;
 pub mod qualify_min_const_fn;
 pub mod source;
+pub mod str_utils;
 pub mod sugg;
 pub mod ty;
 pub mod usage;
--- a/clippy_utils/src/str_utils.rs
+++ b/clippy_utils/src/str_utils.rs
@@ -0,0 +1,230 @@
+/// Dealing with sting indices can be hard, this struct ensures that both the
+/// character and byte index are provided for correct indexing.
+#[derive(Debug, Default, PartialEq, Eq)]
+pub struct StrIndex {
+    pub char_index: usize,
+    pub byte_index: usize,
+}
+
+impl StrIndex {
+    pub fn new(char_index: usize, byte_index: usize) -> Self {
+        Self { char_index, byte_index }
+    }
+}
+
+/// Returns the index of the character after the first camel-case component of `s`.
+///
+/// ```
+/// assert_eq!(camel_case_until("AbcDef"), StrIndex::new(6, 6));
+/// assert_eq!(camel_case_until("ABCD"), StrIndex::new(0, 0));
+/// assert_eq!(camel_case_until("AbcDD"), StrIndex::new(3, 3));
+/// assert_eq!(camel_case_until("Abc\u{f6}\u{f6}DD"), StrIndex::new(5, 7));
+/// ```
+#[must_use]
+pub fn camel_case_until(s: &str) -> StrIndex {
+    let mut iter = s.char_indices().enumerate();
+    if let Some((_char_index, (_, first))) = iter.next() {
+        if !first.is_uppercase() {
+            return StrIndex::new(0, 0);
+        }
+    } else {
+        return StrIndex::new(0, 0);
+    }
+    let mut up = true;
+    let mut last_index = StrIndex::new(0, 0);
+    for (char_index, (byte_index, c)) in iter {
+        if up {
+            if c.is_lowercase() {
+                up = false;
+            } else {
+                return last_index;
+            }
+        } else if c.is_uppercase() {
+            up = true;
+            last_index.byte_index = byte_index;
+            last_index.char_index = char_index;
+        } else if !c.is_lowercase() {
+            return StrIndex::new(char_index, byte_index);
+        }
+    }
+
+    if up {
+        last_index
+    } else {
+        StrIndex::new(s.chars().count(), s.len())
+    }
+}
+
+/// Returns index of the last camel-case component of `s`.
+///
+/// ```
+/// assert_eq!(camel_case_start("AbcDef"), StrIndex::new(0, 0));
+/// assert_eq!(camel_case_start("abcDef"), StrIndex::new(3, 3));
+/// assert_eq!(camel_case_start("ABCD"), StrIndex::new(4, 4));
+/// assert_eq!(camel_case_start("abcd"), StrIndex::new(4, 4));
+/// assert_eq!(camel_case_start("\u{f6}\u{f6}cd"), StrIndex::new(4, 6));
+/// ```
+#[must_use]
+pub fn camel_case_start(s: &str) -> StrIndex {
+    let char_count = s.chars().count();
+    let range = 0..char_count;
+    let mut iter = range.rev().zip(s.char_indices().rev());
+    if let Some((char_index, (_, first))) = iter.next() {
+        if !first.is_lowercase() {
+            return StrIndex::new(char_index, s.len());
+        }
+    } else {
+        return StrIndex::new(char_count, s.len());
+    }
+    let mut down = true;
+    let mut last_index = StrIndex::new(char_count, s.len());
+    for (char_index, (byte_index, c)) in iter {
+        if down {
+            if c.is_uppercase() {
+                down = false;
+                last_index.byte_index = byte_index;
+                last_index.char_index = char_index;
+            } else if !c.is_lowercase() {
+                return last_index;
+            }
+        } else if c.is_lowercase() {
+            down = true;
+        } else if c.is_uppercase() {
+            last_index.byte_index = byte_index;
+            last_index.char_index = char_index;
+        } else {
+            return last_index;
+        }
+    }
+    last_index
+}
+
+/// Dealing with sting comparison can be complicated, this struct ensures that both the
+/// character and byte count are provided for correct indexing.
+#[derive(Debug, Default, PartialEq, Eq)]
+pub struct StrCount {
+    pub char_count: usize,
+    pub byte_count: usize,
+}
+
+impl StrCount {
+    pub fn new(char_count: usize, byte_count: usize) -> Self {
+        Self { char_count, byte_count }
+    }
+}
+
+/// Returns the number of chars that match from the start
+///
+/// ```
+/// assert_eq!(count_match_start("hello_mouse", "hello_penguin"), StrCount::new(6, 6));
+/// assert_eq!(count_match_start("hello_clippy", "bye_bugs"), StrCount::new(0, 0));
+/// assert_eq!(count_match_start("hello_world", "hello_world"), StrCount::new(11, 11));
+/// assert_eq!(count_match_start("T\u{f6}ffT\u{f6}ff", "T\u{f6}ff"), StrCount::new(4, 5));
+/// ```
+#[must_use]
+pub fn count_match_start(str1: &str, str2: &str) -> StrCount {
+    // (char_index, char1)
+    let char_count = str1.chars().count();
+    let iter1 = (0..=char_count).zip(str1.chars());
+    // (byte_index, char2)
+    let iter2 = str2.char_indices();
+
+    iter1
+        .zip(iter2)
+        .take_while(|((_, c1), (_, c2))| c1 == c2)
+        .last()
+        .map_or_else(StrCount::default, |((char_index, _), (byte_index, character))| {
+            StrCount::new(char_index + 1, byte_index + character.len_utf8())
+        })
+}
+
+/// Returns the number of chars and bytes that match from the end
+///
+/// ```
+/// assert_eq!(count_match_end("hello_cat", "bye_cat"), StrCount::new(4, 4));
+/// assert_eq!(count_match_end("if_item_thing", "enum_value"), StrCount::new(0, 0));
+/// assert_eq!(count_match_end("Clippy", "Clippy"), StrCount::new(6, 6));
+/// assert_eq!(count_match_end("MyT\u{f6}ff", "YourT\u{f6}ff"), StrCount::new(4, 5));
+/// ```
+#[must_use]
+pub fn count_match_end(str1: &str, str2: &str) -> StrCount {
+    let char_count = str1.chars().count();
+    if char_count == 0 {
+        return StrCount::default();
+    }
+
+    // (char_index, char1)
+    let iter1 = (0..char_count).rev().zip(str1.chars().rev());
+    // (byte_index, char2)
+    let byte_count = str2.len();
+    let iter2 = str2.char_indices().rev();
+
+    iter1
+        .zip(iter2)
+        .take_while(|((_, c1), (_, c2))| c1 == c2)
+        .last()
+        .map_or_else(StrCount::default, |((char_index, _), (byte_index, _))| {
+            StrCount::new(char_count - char_index, byte_count - byte_index)
+        })
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+
+    #[test]
+    fn camel_case_start_full() {
+        assert_eq!(camel_case_start("AbcDef"), StrIndex::new(0, 0));
+        assert_eq!(camel_case_start("Abc"), StrIndex::new(0, 0));
+        assert_eq!(camel_case_start("ABcd"), StrIndex::new(0, 0));
+        assert_eq!(camel_case_start("ABcdEf"), StrIndex::new(0, 0));
+        assert_eq!(camel_case_start("AabABcd"), StrIndex::new(0, 0));
+    }
+
+    #[test]
+    fn camel_case_start_partial() {
+        assert_eq!(camel_case_start("abcDef"), StrIndex::new(3, 3));
+        assert_eq!(camel_case_start("aDbc"), StrIndex::new(1, 1));
+        assert_eq!(camel_case_start("aabABcd"), StrIndex::new(3, 3));
+        assert_eq!(camel_case_start("\u{f6}\u{f6}AabABcd"), StrIndex::new(2, 4));
+    }
+
+    #[test]
+    fn camel_case_start_not() {
+        assert_eq!(camel_case_start("AbcDef_"), StrIndex::new(7, 7));
+        assert_eq!(camel_case_start("AbcDD"), StrIndex::new(5, 5));
+        assert_eq!(camel_case_start("all_small"), StrIndex::new(9, 9));
+        assert_eq!(camel_case_start("\u{f6}_all_small"), StrIndex::new(11, 12));
+    }
+
+    #[test]
+    fn camel_case_start_caps() {
+        assert_eq!(camel_case_start("ABCD"), StrIndex::new(4, 4));
+    }
+
+    #[test]
+    fn camel_case_until_full() {
+        assert_eq!(camel_case_until("AbcDef"), StrIndex::new(6, 6));
+        assert_eq!(camel_case_until("Abc"), StrIndex::new(3, 3));
+        assert_eq!(camel_case_until("Abc\u{f6}\u{f6}\u{f6}"), StrIndex::new(6, 9));
+    }
+
+    #[test]
+    fn camel_case_until_not() {
+        assert_eq!(camel_case_until("abcDef"), StrIndex::new(0, 0));
+        assert_eq!(camel_case_until("aDbc"), StrIndex::new(0, 0));
+    }
+
+    #[test]
+    fn camel_case_until_partial() {
+        assert_eq!(camel_case_until("AbcDef_"), StrIndex::new(6, 6));
+        assert_eq!(camel_case_until("CallTypeC"), StrIndex::new(8, 8));
+        assert_eq!(camel_case_until("AbcDD"), StrIndex::new(3, 3));
+        assert_eq!(camel_case_until("Abc\u{f6}\u{f6}DD"), StrIndex::new(5, 7));
+    }
+
+    #[test]
+    fn until_caps() {
+        assert_eq!(camel_case_until("ABCD"), StrIndex::new(0, 0));
+    }
+}