Auto merge of #22466 - Kimundi:str_pattern_ai_safe, r=aturon

This is not a complete implementation of the RFC:

- only existing methods got updated, no new ones added
- doc comments are not extensive enough yet
- optimizations got lost and need to be reimplemented

See https://github.com/rust-lang/rfcs/pull/528

Technically a

[breaking-change]
This commit is contained in:
bors
2015-02-22 22:45:46 +00:00
20 changed files with 1076 additions and 350 deletions

View File

@@ -1,4 +1,4 @@
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
// Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
@@ -8,6 +8,23 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.
#[test]
fn test_pattern_deref_forward() {
let data = "aabcdaa";
assert!(data.contains("bcd"));
assert!(data.contains(&"bcd"));
assert!(data.contains(&&"bcd"));
assert!(data.contains(&"bcd".to_string()));
assert!(data.contains(&&"bcd".to_string()));
}
#[test]
fn test_empty_match_indices() {
let data = "aä中!";
let vec: Vec<_> = data.match_indices("").collect();
assert_eq!(vec, vec![(0, 0), (1, 1), (3, 3), (6, 6), (7, 7)]);
}
#[test]
fn test_bool_from_str() {
assert_eq!("true".parse().ok(), Some(true));
@@ -121,3 +138,252 @@ fn test_utf16_code_units() {
assert_eq!(Utf16Encoder::new(vec!['é', '\u{1F4A9}'].into_iter()).collect::<Vec<u16>>(),
vec![0xE9, 0xD83D, 0xDCA9])
}
#[test]
fn starts_with_in_unicode() {
assert!(!"├── Cargo.toml".starts_with("# "));
}
#[test]
fn starts_short_long() {
assert!(!"".starts_with("##"));
assert!(!"##".starts_with("####"));
assert!("####".starts_with("##"));
assert!(!"##ä".starts_with("####"));
assert!("####ä".starts_with("##"));
assert!(!"##".starts_with("####ä"));
assert!("##ä##".starts_with("##ä"));
assert!("".starts_with(""));
assert!("ä".starts_with(""));
assert!("".starts_with(""));
assert!("##ä".starts_with(""));
assert!("ä###".starts_with(""));
assert!("#ä##".starts_with(""));
assert!("##ä#".starts_with(""));
}
#[test]
fn contains_weird_cases() {
assert!("* \t".contains_char(' '));
assert!(!"* \t".contains_char('?'));
assert!(!"* \t".contains_char('\u{1F4A9}'));
}
#[test]
fn trim_ws() {
assert_eq!(" \t a \t ".trim_left_matches(|c: char| c.is_whitespace()),
"a \t ");
assert_eq!(" \t a \t ".trim_right_matches(|c: char| c.is_whitespace()),
" \t a");
assert_eq!(" \t a \t ".trim_matches(|c: char| c.is_whitespace()),
"a");
assert_eq!(" \t \t ".trim_left_matches(|c: char| c.is_whitespace()),
"");
assert_eq!(" \t \t ".trim_right_matches(|c: char| c.is_whitespace()),
"");
assert_eq!(" \t \t ".trim_matches(|c: char| c.is_whitespace()),
"");
}
mod pattern {
use std::str::Pattern;
use std::str::{Searcher, ReverseSearcher, DoubleEndedSearcher};
use std::str::SearchStep::{self, Match, Reject, Done};
macro_rules! make_test {
($name:ident, $p:expr, $h:expr, [$($e:expr,)*]) => {
mod $name {
use std::str::Pattern;
use std::str::{Searcher, ReverseSearcher, DoubleEndedSearcher};
use std::str::SearchStep::{self, Match, Reject, Done};
use super::{cmp_search_to_vec};
#[test]
fn fwd() {
cmp_search_to_vec(false, $p, $h, vec![$($e),*]);
}
#[test]
fn bwd() {
cmp_search_to_vec(true, $p, $h, vec![$($e),*]);
}
}
}
}
fn cmp_search_to_vec<'a, P: Pattern<'a>>(rev: bool, pat: P, haystack: &'a str,
right: Vec<SearchStep>)
where P::Searcher: ReverseSearcher<'a>
{
let mut searcher = pat.into_searcher(haystack);
let mut v = vec![];
loop {
match if !rev {searcher.next()} else {searcher.next_back()} {
Match(a, b) => v.push(Match(a, b)),
Reject(a, b) => v.push(Reject(a, b)),
Done => break,
}
}
if rev {
v.reverse();
}
assert_eq!(v, right);
}
make_test!(str_searcher_ascii_haystack, "bb", "abbcbbd", [
Reject(0, 1),
Match (1, 3),
Reject(3, 4),
Match (4, 6),
Reject(6, 7),
]);
make_test!(str_searcher_empty_needle_ascii_haystack, "", "abbcbbd", [
Match(0, 0),
Match(1, 1),
Match(2, 2),
Match(3, 3),
Match(4, 4),
Match(5, 5),
Match(6, 6),
Match(7, 7),
]);
make_test!(str_searcher_mulibyte_haystack, " ", "├──", [
Reject(0, 3),
Reject(3, 6),
Reject(6, 9),
]);
make_test!(str_searcher_empty_needle_mulibyte_haystack, "", "├──", [
Match(0, 0),
Match(3, 3),
Match(6, 6),
Match(9, 9),
]);
make_test!(str_searcher_empty_needle_empty_haystack, "", "", [
Match(0, 0),
]);
make_test!(str_searcher_nonempty_needle_empty_haystack, "", "", [
]);
make_test!(char_searcher_ascii_haystack, 'b', "abbcbbd", [
Reject(0, 1),
Match (1, 2),
Match (2, 3),
Reject(3, 4),
Match (4, 5),
Match (5, 6),
Reject(6, 7),
]);
make_test!(char_searcher_mulibyte_haystack, ' ', "├──", [
Reject(0, 3),
Reject(3, 6),
Reject(6, 9),
]);
make_test!(char_searcher_short_haystack, '\u{1F4A9}', "* \t", [
Reject(0, 1),
Reject(1, 2),
Reject(2, 3),
]);
}
mod bench {
macro_rules! make_test_inner {
($s:ident, $code:expr, $name:ident, $str:expr) => {
#[bench]
fn $name(bencher: &mut Bencher) {
let mut $s = $str;
black_box(&mut $s);
bencher.iter(|| $code);
}
}
}
macro_rules! make_test {
($name:ident, $s:ident, $code:expr) => {
mod $name {
use test::Bencher;
use test::black_box;
// Short strings: 65 bytes each
make_test_inner!($s, $code, short_ascii,
"Mary had a little lamb, Little lamb Mary had a littl lamb, lamb!");
make_test_inner!($s, $code, short_mixed,
"ศไทย中华Việt Nam; Mary had a little lamb, Little lam!");
make_test_inner!($s, $code, short_pile_of_poo,
"💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩💩!");
make_test_inner!($s, $code, long_lorem_ipsum,"\
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Suspendisse quis lorem sit amet dolor \
ultricies condimentum. Praesent iaculis purus elit, ac malesuada quam malesuada in. Duis sed orci \
eros. Suspendisse sit amet magna mollis, mollis nunc luctus, imperdiet mi. Integer fringilla non \
sem ut lacinia. Fusce varius tortor a risus porttitor hendrerit. Morbi mauris dui, ultricies nec \
tempus vel, gravida nec quam.
In est dui, tincidunt sed tempus interdum, adipiscing laoreet ante. Etiam tempor, tellus quis \
sagittis interdum, nulla purus mattis sem, quis auctor erat odio ac tellus. In nec nunc sit amet \
diam volutpat molestie at sed ipsum. Vestibulum laoreet consequat vulputate. Integer accumsan \
lorem ac dignissim placerat. Suspendisse convallis faucibus lorem. Aliquam erat volutpat. In vel \
eleifend felis. Sed suscipit nulla lorem, sed mollis est sollicitudin et. Nam fermentum egestas \
interdum. Curabitur ut nisi justo.
Sed sollicitudin ipsum tellus, ut condimentum leo eleifend nec. Cras ut velit ante. Phasellus nec \
mollis odio. Mauris molestie erat in arcu mattis, at aliquet dolor vehicula. Quisque malesuada \
lectus sit amet nisi pretium, a condimentum ipsum porta. Morbi at dapibus diam. Praesent egestas \
est sed risus elementum, eu rutrum metus ultrices. Etiam fermentum consectetur magna, id rutrum \
felis accumsan a. Aliquam ut pellentesque libero. Sed mi nulla, lobortis eu tortor id, suscipit \
ultricies neque. Morbi iaculis sit amet risus at iaculis. Praesent eget ligula quis turpis \
feugiat suscipit vel non arcu. Interdum et malesuada fames ac ante ipsum primis in faucibus. \
Aliquam sit amet placerat lorem.
Cras a lacus vel ante posuere elementum. Nunc est leo, bibendum ut facilisis vel, bibendum at \
mauris. Nullam adipiscing diam vel odio ornare, luctus adipiscing mi luctus. Nulla facilisi. \
Mauris adipiscing bibendum neque, quis adipiscing lectus tempus et. Sed feugiat erat et nisl \
lobortis pharetra. Donec vitae erat enim. Nullam sit amet felis et quam lacinia tincidunt. Aliquam \
suscipit dapibus urna. Sed volutpat urna in magna pulvinar volutpat. Phasellus nec tellus ac diam \
cursus accumsan.
Nam lectus enim, dapibus non nisi tempor, consectetur convallis massa. Maecenas eleifend dictum \
feugiat. Etiam quis mauris vel risus luctus mattis a a nunc. Nullam orci quam, imperdiet id \
vehicula in, porttitor ut nibh. Duis sagittis adipiscing nisl vitae congue. Donec mollis risus eu \
leo suscipit, varius porttitor nulla porta. Pellentesque ut sem nec nisi euismod vehicula. Nulla \
malesuada sollicitudin quam eu fermentum!");
}
}
}
make_test!(chars_count, s, s.chars().count());
make_test!(contains_bang_str, s, s.contains("!"));
make_test!(contains_bang_char, s, s.contains_char('!'));
make_test!(match_indices_a_str, s, s.match_indices("a").count());
make_test!(split_str_a_str, s, s.split_str("a").count());
make_test!(trim_ascii_char, s, {
use std::ascii::AsciiExt;
s.trim_matches(|c: char| c.is_ascii())
});
make_test!(trim_left_ascii_char, s, {
use std::ascii::AsciiExt;
s.trim_left_matches(|c: char| c.is_ascii())
});
make_test!(trim_right_ascii_char, s, {
use std::ascii::AsciiExt;
s.trim_right_matches(|c: char| c.is_ascii())
});
make_test!(find_underscore_char, s, s.find('_'));
make_test!(rfind_underscore_char, s, s.rfind('_'));
make_test!(find_underscore_str, s, s.find_str("_"));
make_test!(find_zzz_char, s, s.find('\u{1F4A4}'));
make_test!(rfind_zzz_char, s, s.rfind('\u{1F4A4}'));
make_test!(find_zzz_str, s, s.find_str("\u{1F4A4}"));
make_test!(split_space_char, s, s.split(' ').count());
make_test!(split_terminator_space_char, s, s.split_terminator(' ').count());
make_test!(splitn_space_char, s, s.splitn(10, ' ').count());
make_test!(rsplitn_space_char, s, s.rsplitn(10, ' ').count());
make_test!(split_str_space_str, s, s.split_str(" ").count());
make_test!(split_str_ad_str, s, s.split_str("ad").count());
}