std: replace str::each_split* with an iterator

This commit is contained in:
Huon Wilson
2013-06-09 23:10:50 +10:00
parent 2ff6b298c5
commit 1e8982bdb2
19 changed files with 270 additions and 516 deletions

View File

@@ -25,7 +25,7 @@ use clone::Clone;
use cmp::{TotalOrd, Ordering, Less, Equal, Greater};
use container::Container;
use iter::Times;
use iterator::{Iterator, IteratorUtil};
use iterator::{Iterator, IteratorUtil, FilterIterator};
use libc;
use option::{None, Option, Some};
use old_iter::{BaseIter, EqIter};
@@ -633,128 +633,92 @@ pub fn slice<'a>(s: &'a str, begin: uint, end: uint) -> &'a str {
unsafe { raw::slice_bytes(s, begin, end) }
}
/// Splits a string into substrings at each occurrence of a given character
pub fn each_split_char<'a>(s: &'a str, sep: char,
it: &fn(&'a str) -> bool) -> bool {
each_split_char_inner(s, sep, len(s), true, true, it)
/// An iterator over the substrings of a string, separated by `sep`.
pub struct StrCharSplitIterator<'self,Sep> {
priv string: &'self str,
priv position: uint,
priv sep: Sep,
/// The number of splits remaining
priv count: uint,
/// Whether an empty string at the end is allowed
priv allow_trailing_empty: bool,
priv finished: bool,
priv only_ascii: bool
}
/// Like `each_split_char`, but a trailing empty string is omitted
pub fn each_split_char_no_trailing<'a>(s: &'a str,
sep: char,
it: &fn(&'a str) -> bool) -> bool {
each_split_char_inner(s, sep, len(s), true, false, it)
/// An iterator over the words of a string, separated by an sequence of whitespace
pub type WordIterator<'self> =
FilterIterator<'self, &'self str,
StrCharSplitIterator<'self, extern "Rust" fn(char) -> bool>>;
/// A separator for splitting a string character-wise
pub trait StrCharSplitSeparator {
/// Determine if the splitter should split at the given character
fn should_split(&self, char) -> bool;
/// Indicate if the splitter only uses ASCII characters, which
/// allows for a faster implementation.
fn only_ascii(&self) -> bool;
}
impl StrCharSplitSeparator for char {
#[inline(always)]
fn should_split(&self, c: char) -> bool { *self == c }
fn only_ascii(&self) -> bool { (*self as uint) < 128 }
}
impl<'self> StrCharSplitSeparator for &'self fn(char) -> bool {
#[inline(always)]
fn should_split(&self, c: char) -> bool { (*self)(c) }
fn only_ascii(&self) -> bool { false }
}
impl<'self> StrCharSplitSeparator for extern "Rust" fn(char) -> bool {
#[inline(always)]
fn should_split(&self, c: char) -> bool { (*self)(c) }
fn only_ascii(&self) -> bool { false }
}
/**
* Splits a string into substrings at each occurrence of a given
* character up to 'count' times.
*
* The character must be a valid UTF-8/ASCII character
*/
pub fn each_splitn_char<'a>(s: &'a str,
sep: char,
count: uint,
it: &fn(&'a str) -> bool) -> bool {
each_split_char_inner(s, sep, count, true, true, it)
}
impl<'self, Sep: StrCharSplitSeparator> Iterator<&'self str> for StrCharSplitIterator<'self, Sep> {
fn next(&mut self) -> Option<&'self str> {
if self.finished { return None }
/// Like `each_split_char`, but omits empty strings
pub fn each_split_char_nonempty<'a>(s: &'a str,
sep: char,
it: &fn(&'a str) -> bool) -> bool {
each_split_char_inner(s, sep, len(s), false, false, it)
}
let l = self.string.len();
let start = self.position;
fn each_split_char_inner<'a>(s: &'a str,
sep: char,
count: uint,
allow_empty: bool,
allow_trailing_empty: bool,
it: &fn(&'a str) -> bool) -> bool {
if sep < 128u as char {
let (b, l) = (sep as u8, len(s));
let mut done = 0u;
let mut (i, start) = (0u, 0u);
while i < l && done < count {
if s[i] == b {
if allow_empty || start < i {
if !it( unsafe{ raw::slice_bytes(s, start, i) } ) {
return false;
}
if self.only_ascii {
// this gives a *huge* speed up for splitting on ASCII
// characters (e.g. '\n' or ' ')
while self.position < l && self.count > 0 {
let byte = self.string[self.position];
if self.sep.should_split(byte as char) {
let slice = unsafe { raw::slice_bytes(self.string, start, self.position) };
self.position += 1;
self.count -= 1;
return Some(slice);
}
start = i + 1u;
done += 1u;
self.position += 1;
}
i += 1u;
}
// only slice a non-empty trailing substring
if allow_trailing_empty || start < l {
if !it( unsafe{ raw::slice_bytes(s, start, l) } ) { return false; }
}
return true;
}
return each_split_inner(s, |cur| cur == sep, count,
allow_empty, allow_trailing_empty, it)
}
} else {
while self.position < l && self.count > 0 {
let CharRange {ch, next} = char_range_at(self.string, self.position);
/// Splits a string into substrings using a character function
pub fn each_split<'a>(s: &'a str,
sepfn: &fn(char) -> bool,
it: &fn(&'a str) -> bool) -> bool {
each_split_inner(s, sepfn, len(s), true, true, it)
}
/// Like `each_split`, but a trailing empty string is omitted
pub fn each_split_no_trailing<'a>(s: &'a str,
sepfn: &fn(char) -> bool,
it: &fn(&'a str) -> bool) -> bool {
each_split_inner(s, sepfn, len(s), true, false, it)
}
/**
* Splits a string into substrings using a character function, cutting at
* most `count` times.
*/
pub fn each_splitn<'a>(s: &'a str,
sepfn: &fn(char) -> bool,
count: uint,
it: &fn(&'a str) -> bool) -> bool {
each_split_inner(s, sepfn, count, true, true, it)
}
/// Like `each_split`, but omits empty strings
pub fn each_split_nonempty<'a>(s: &'a str,
sepfn: &fn(char) -> bool,
it: &fn(&'a str) -> bool) -> bool {
each_split_inner(s, sepfn, len(s), false, false, it)
}
fn each_split_inner<'a>(s: &'a str,
sepfn: &fn(cc: char) -> bool,
count: uint,
allow_empty: bool,
allow_trailing_empty: bool,
it: &fn(&'a str) -> bool) -> bool {
let l = len(s);
let mut (i, start, done) = (0u, 0u, 0u);
while i < l && done < count {
let CharRange {ch, next} = char_range_at(s, i);
if sepfn(ch) {
if allow_empty || start < i {
if !it( unsafe{ raw::slice_bytes(s, start, i) } ) {
return false;
if self.sep.should_split(ch) {
let slice = unsafe { raw::slice_bytes(self.string, start, self.position) };
self.position = next;
self.count -= 1;
return Some(slice);
}
self.position = next;
}
start = next;
done += 1u;
}
i = next;
self.finished = true;
if self.allow_trailing_empty || start < l {
Some(unsafe { raw::slice_bytes(self.string, start, l) })
} else {
None
}
}
if allow_trailing_empty || start < l {
if !it( unsafe{ raw::slice_bytes(s, start, l) } ) { return false; }
}
return true;
}
// See Issue #1932 for why this is a naive search
@@ -875,19 +839,12 @@ pub fn levdistance(s: &str, t: &str) -> uint {
return dcol[tlen];
}
/**
* Splits a string into substrings separated by LF ('\n').
*/
pub fn each_line<'a>(s: &'a str, it: &fn(&'a str) -> bool) -> bool {
each_split_char_no_trailing(s, '\n', it)
}
/**
* Splits a string into substrings separated by LF ('\n')
* and/or CR LF ("\r\n")
*/
pub fn each_line_any<'a>(s: &'a str, it: &fn(&'a str) -> bool) -> bool {
for each_line(s) |s| {
for s.line_iter().advance |s| {
let l = s.len();
if l > 0u && s[l - 1u] == '\r' as u8 {
if !it( unsafe { raw::slice_bytes(s, 0, l - 1) } ) { return false; }
@@ -898,11 +855,6 @@ pub fn each_line_any<'a>(s: &'a str, it: &fn(&'a str) -> bool) -> bool {
return true;
}
/// Splits a string into substrings separated by whitespace
pub fn each_word<'a>(s: &'a str, it: &fn(&'a str) -> bool) -> bool {
each_split_nonempty(s, char::is_whitespace, it)
}
/** Splits a string into substrings with possibly internal whitespace,
* each of them at most `lim` bytes long. The substrings have leading and trailing
* whitespace removed, and are only cut at whitespace boundaries.
@@ -2216,7 +2168,7 @@ pub fn as_buf<T>(s: &str, f: &fn(*u8, uint) -> T) -> T {
* ~~~ {.rust}
* let string = "a\nb\nc";
* let mut lines = ~[];
* for each_line(string) |line| { lines.push(line) }
* for string.line_iter().advance |line| { lines.push(line) }
*
* assert!(subslice_offset(string, lines[0]) == 0); // &"a"
* assert!(subslice_offset(string, lines[1]) == 2); // &"b"
@@ -2523,6 +2475,18 @@ pub trait StrSlice<'self> {
fn rev_iter(&self) -> StrCharRevIterator<'self>;
fn bytes_iter(&self) -> StrBytesIterator<'self>;
fn bytes_rev_iter(&self) -> StrBytesRevIterator<'self>;
fn split_iter<Sep: StrCharSplitSeparator>(&self, sep: Sep) -> StrCharSplitIterator<'self, Sep>;
fn splitn_iter<Sep: StrCharSplitSeparator>(&self, sep: Sep, count: uint)
-> StrCharSplitIterator<'self, Sep>;
fn split_options_iter<Sep: StrCharSplitSeparator>(&self, sep: Sep,
count: uint, allow_trailing_empty: bool)
-> StrCharSplitIterator<'self, Sep>;
/// An iterator over the lines of a string (subsequences separated
/// by `\n`).
fn line_iter(&self) -> StrCharSplitIterator<'self, char>;
/// An iterator over the words of a string (subsequences separated
/// by any sequence of whitespace).
fn word_iter(&self) -> WordIterator<'self>;
fn ends_with(&self, needle: &str) -> bool;
fn is_empty(&self) -> bool;
fn is_whitespace(&self) -> bool;
@@ -2530,8 +2494,6 @@ pub trait StrSlice<'self> {
fn len(&self) -> uint;
fn char_len(&self) -> uint;
fn slice(&self, begin: uint, end: uint) -> &'self str;
fn each_split(&self, sepfn: &fn(char) -> bool, it: &fn(&'self str) -> bool) -> bool;
fn each_split_char(&self, sep: char, it: &fn(&'self str) -> bool) -> bool;
fn each_split_str<'a>(&self, sep: &'a str, it: &fn(&'self str) -> bool) -> bool;
fn starts_with<'a>(&self, needle: &'a str) -> bool;
fn substr(&self, begin: uint, n: uint) -> &'self str;
@@ -2597,6 +2559,36 @@ impl<'self> StrSlice<'self> for &'self str {
StrBytesRevIterator { it: as_bytes_slice(*self).rev_iter() }
}
fn split_iter<Sep: StrCharSplitSeparator>(&self, sep: Sep) -> StrCharSplitIterator<'self, Sep> {
self.split_options_iter(sep, self.len(), true)
}
fn splitn_iter<Sep: StrCharSplitSeparator>(&self, sep: Sep, count: uint)
-> StrCharSplitIterator<'self, Sep> {
self.split_options_iter(sep, count, true)
}
fn split_options_iter<Sep: StrCharSplitSeparator>(&self, sep: Sep,
count: uint, allow_trailing_empty: bool)
-> StrCharSplitIterator<'self, Sep> {
let only_ascii = sep.only_ascii();
StrCharSplitIterator {
string: *self,
position: 0,
sep: sep,
count: count,
allow_trailing_empty: allow_trailing_empty,
finished: false,
only_ascii: only_ascii
}
}
fn line_iter(&self) -> StrCharSplitIterator<'self, char> {
self.split_options_iter('\n', self.len(), false)
}
fn word_iter(&self) -> WordIterator<'self> {
self.split_iter(char::is_whitespace).filter(|s| !s.is_empty())
}
/// Returns true if one string ends with another
#[inline]
@@ -2637,18 +2629,6 @@ impl<'self> StrSlice<'self> for &'self str {
fn slice(&self, begin: uint, end: uint) -> &'self str {
slice(*self, begin, end)
}
/// Splits a string into substrings using a character function
#[inline]
fn each_split(&self, sepfn: &fn(char) -> bool, it: &fn(&'self str) -> bool) -> bool {
each_split(*self, sepfn, it)
}
/**
* Splits a string into substrings at each occurrence of a given character
*/
#[inline]
fn each_split_char(&self, sep: char, it: &fn(&'self str) -> bool) -> bool {
each_split_char(*self, sep, it)
}
/**
* Splits a string into a vector of the substrings separated by a given
* string
@@ -2904,131 +2884,6 @@ mod tests {
let _cc3 = pop_char(&mut data);
}
#[test]
fn test_split_char() {
fn t(s: &str, c: char, u: &[~str]) {
debug!("split_byte: %?", s);
let mut v = ~[];
for each_split_char(s, c) |s| { v.push(s.to_owned()) }
debug!("split_byte to: %?", v);
assert!(v.iter().zip(u.iter()).all(|(a,b)| a == b));
}
t("abc.hello.there", '.', [~"abc", ~"hello", ~"there"]);
t(".hello.there", '.', [~"", ~"hello", ~"there"]);
t("...hello.there.", '.', [~"", ~"", ~"", ~"hello", ~"there", ~""]);
t("", 'z', [~""]);
t("z", 'z', [~"",~""]);
t("ok", 'z', [~"ok"]);
}
#[test]
fn test_split_char_2() {
fn t(s: &str, c: char, u: &[~str]) {
debug!("split_byte: %?", s);
let mut v = ~[];
for each_split_char(s, c) |s| { v.push(s.to_owned()) }
debug!("split_byte to: %?", v);
assert!(v.iter().zip(u.iter()).all(|(a,b)| a == b));
}
let data = "ประเทศไทย中华Việt Nam";
t(data, 'V', [~"ประเทศไทย中华", ~"iệt Nam"]);
t(data, 'ท', [~"ประเ", ~"ศไ", ~"ย中华Việt Nam"]);
}
#[test]
fn test_splitn_char() {
fn t(s: &str, c: char, n: uint, u: &[~str]) {
debug!("splitn_byte: %?", s);
let mut v = ~[];
for each_splitn_char(s, c, n) |s| { v.push(s.to_owned()) }
debug!("split_byte to: %?", v);
debug!("comparing vs. %?", u);
assert!(v.iter().zip(u.iter()).all(|(a,b)| a == b));
}
t("abc.hello.there", '.', 0u, [~"abc.hello.there"]);
t("abc.hello.there", '.', 1u, [~"abc", ~"hello.there"]);
t("abc.hello.there", '.', 2u, [~"abc", ~"hello", ~"there"]);
t("abc.hello.there", '.', 3u, [~"abc", ~"hello", ~"there"]);
t(".hello.there", '.', 0u, [~".hello.there"]);
t(".hello.there", '.', 1u, [~"", ~"hello.there"]);
t("...hello.there.", '.', 3u, [~"", ~"", ~"", ~"hello.there."]);
t("...hello.there.", '.', 5u, [~"", ~"", ~"", ~"hello", ~"there", ~""]);
t("", 'z', 5u, [~""]);
t("z", 'z', 5u, [~"",~""]);
t("ok", 'z', 5u, [~"ok"]);
t("z", 'z', 0u, [~"z"]);
t("w.x.y", '.', 0u, [~"w.x.y"]);
t("w.x.y", '.', 1u, [~"w",~"x.y"]);
}
#[test]
fn test_splitn_char_2() {
fn t(s: &str, c: char, n: uint, u: &[~str]) {
debug!("splitn_byte: %?", s);
let mut v = ~[];
for each_splitn_char(s, c, n) |s| { v.push(s.to_owned()) }
debug!("split_byte to: %?", v);
debug!("comparing vs. %?", u);
assert!(v.iter().zip(u.iter()).all(|(a,b)| a == b));
}
t("ประเทศไทย中华Việt Nam", '华', 1u, [~"ประเทศไทย中", ~"Việt Nam"]);
t("zzXXXzYYYzWWWz", 'z', 3u, [~"", ~"", ~"XXX", ~"YYYzWWWz"]);
t("z", 'z', 5u, [~"",~""]);
t("", 'z', 5u, [~""]);
t("ok", 'z', 5u, [~"ok"]);
}
#[test]
fn test_splitn_char_3() {
fn t(s: &str, c: char, n: uint, u: &[~str]) {
debug!("splitn_byte: %?", s);
let mut v = ~[];
for each_splitn_char(s, c, n) |s| { v.push(s.to_owned()) }
debug!("split_byte to: %?", v);
debug!("comparing vs. %?", u);
assert!(v.iter().zip(u.iter()).all(|(a,b)| a == b));
}
let data = "ประเทศไทย中华Việt Nam";
t(data, 'V', 1u, [~"ประเทศไทย中华", ~"iệt Nam"]);
t(data, 'ท', 1u, [~"ประเ", ~"ศไทย中华Việt Nam"]);
}
#[test]
fn test_split_char_no_trailing() {
fn t(s: &str, c: char, u: &[~str]) {
debug!("split_byte: %?", s);
let mut v = ~[];
for each_split_char_no_trailing(s, c) |s| { v.push(s.to_owned()) }
debug!("split_byte to: %?", v);
assert!(v.iter().zip(u.iter()).all(|(a,b)| a == b));
}
t("abc.hello.there", '.', [~"abc", ~"hello", ~"there"]);
t(".hello.there", '.', [~"", ~"hello", ~"there"]);
t("...hello.there.", '.', [~"", ~"", ~"", ~"hello", ~"there"]);
t("...hello.there.", '.', [~"", ~"", ~"", ~"hello", ~"there"]);
t("", 'z', []);
t("z", 'z', [~""]);
t("ok", 'z', [~"ok"]);
}
#[test]
fn test_split_char_no_trailing_2() {
fn t(s: &str, c: char, u: &[~str]) {
debug!("split_byte: %?", s);
let mut v = ~[];
for each_split_char_no_trailing(s, c) |s| { v.push(s.to_owned()) }
debug!("split_byte to: %?", v);
assert!(v.iter().zip(u.iter()).all(|(a,b)| a == b));
}
let data = "ประเทศไทย中华Việt Nam";
t(data, 'V', [~"ประเทศไทย中华", ~"iệt Nam"]);
t(data, 'ท', [~"ประเ", ~"ศไ", ~"ย中华Việt Nam"]);
}
#[test]
fn test_split_str() {
fn t<'a>(s: &str, sep: &'a str, u: &[~str]) {
@@ -3053,75 +2908,6 @@ mod tests {
}
#[test]
fn test_split() {
fn t(s: &str, sepf: &fn(char) -> bool, u: &[~str]) {
let mut v = ~[];
for each_split(s, sepf) |s| { v.push(s.to_owned()) }
assert!(v.iter().zip(u.iter()).all(|(a,b)| a == b));
}
t("ประเทศไทย中华Việt Nam", |cc| cc == '华', [~"ประเทศไทย中", ~"Việt Nam"]);
t("zzXXXzYYYz", char::is_lowercase, [~"", ~"", ~"XXX", ~"YYY", ~""]);
t("zzXXXzYYYz", char::is_uppercase, [~"zz", ~"", ~"", ~"z", ~"", ~"", ~"z"]);
t("z", |cc| cc == 'z', [~"",~""]);
t("", |cc| cc == 'z', [~""]);
t("ok", |cc| cc == 'z', [~"ok"]);
}
#[test]
fn test_split_no_trailing() {
fn t(s: &str, sepf: &fn(char) -> bool, u: &[~str]) {
let mut v = ~[];
for each_split_no_trailing(s, sepf) |s| { v.push(s.to_owned()) }
assert!(v.iter().zip(u.iter()).all(|(a,b)| a == b));
}
t("ประเทศไทย中华Việt Nam", |cc| cc == '华', [~"ประเทศไทย中", ~"Việt Nam"]);
t("zzXXXzYYYz", char::is_lowercase, [~"", ~"", ~"XXX", ~"YYY"]);
t("zzXXXzYYYz", char::is_uppercase, [~"zz", ~"", ~"", ~"z", ~"", ~"", ~"z"]);
t("z", |cc| cc == 'z', [~""]);
t("", |cc| cc == 'z', []);
t("ok", |cc| cc == 'z', [~"ok"]);
}
#[test]
fn test_lines() {
let lf = "\nMary had a little lamb\nLittle lamb\n";
let crlf = "\r\nMary had a little lamb\r\nLittle lamb\r\n";
fn t(s: &str, f: &fn(&str, &fn(&str) -> bool) -> bool, u: &[~str]) {
let mut v = ~[];
for f(s) |s| { v.push(s.to_owned()) }
assert!(v.iter().zip(u.iter()).all(|(a,b)| a == b));
}
t(lf, each_line, [~"", ~"Mary had a little lamb", ~"Little lamb"]);
t(lf, each_line_any, [~"", ~"Mary had a little lamb", ~"Little lamb"]);
t(crlf, each_line, [~"\r", ~"Mary had a little lamb\r", ~"Little lamb\r"]);
t(crlf, each_line_any, [~"", ~"Mary had a little lamb", ~"Little lamb"]);
t("", each_line, []);
t("", each_line_any, []);
t("\n", each_line, [~""]);
t("\n", each_line_any, [~""]);
t("banana", each_line, [~"banana"]);
t("banana", each_line_any, [~"banana"]);
}
#[test]
fn test_words() {
fn t(s: &str, f: &fn(&str, &fn(&str) -> bool) -> bool, u: &[~str]) {
let mut v = ~[];
for f(s) |s| { v.push(s.to_owned()) }
assert!(v.iter().zip(u.iter()).all(|(a,b)| a == b));
}
let data = "\nMary had a little lamb\nLittle lamb\n";
t(data, each_word, [~"Mary",~"had",~"a",~"little",~"lamb",~"Little",~"lamb"]);
t("ok", each_word, [~"ok"]);
t("", each_word, []);
}
#[test]
fn test_split_within() {
fn t(s: &str, i: uint, u: &[~str]) {
@@ -3671,7 +3457,7 @@ mod tests {
let string = "a\nb\nc";
let mut lines = ~[];
for each_line(string) |line| { lines.push(line) }
for string.line_iter().advance |line| { lines.push(line) }
assert_eq!(subslice_offset(string, lines[0]), 0);
assert_eq!(subslice_offset(string, lines[1]), 2);
assert_eq!(subslice_offset(string, lines[2]), 4);
@@ -3730,78 +3516,6 @@ mod tests {
assert!(!contains_char("", 'a'));
}
#[test]
fn test_split_char_each() {
let data = "\nMary had a little lamb\nLittle lamb\n";
let mut ii = 0;
for each_split_char(data, ' ') |xx| {
match ii {
0 => assert!("\nMary" == xx),
1 => assert!("had" == xx),
2 => assert!("a" == xx),
3 => assert!("little" == xx),
_ => ()
}
ii += 1;
}
}
#[test]
fn test_splitn_char_each() {
let data = "\nMary had a little lamb\nLittle lamb\n";
let mut ii = 0;
for each_splitn_char(data, ' ', 2u) |xx| {
match ii {
0 => assert!("\nMary" == xx),
1 => assert!("had" == xx),
2 => assert!("a little lamb\nLittle lamb\n" == xx),
_ => ()
}
ii += 1;
}
}
#[test]
fn test_words_each() {
let data = "\nMary had a little lamb\nLittle lamb\n";
let mut ii = 0;
for each_word(data) |ww| {
match ii {
0 => assert!("Mary" == ww),
1 => assert!("had" == ww),
2 => assert!("a" == ww),
3 => assert!("little" == ww),
_ => ()
}
ii += 1;
}
each_word("", |_x| fail!()); // should not fail
}
#[test]
fn test_lines_each () {
let lf = "\nMary had a little lamb\nLittle lamb\n";
let mut ii = 0;
for each_line(lf) |x| {
match ii {
0 => assert!("" == x),
1 => assert!("Mary had a little lamb" == x),
2 => assert!("Little lamb" == x),
_ => ()
}
ii += 1;
}
}
#[test]
fn test_map() {
assert_eq!(~"", map("", |c| unsafe {libc::toupper(c as c_char)} as char));
@@ -4015,4 +3729,68 @@ mod tests {
assert_eq!(b, v[pos]);
}
}
#[test]
fn test_split_char_iterator() {
let data = "\nMäry häd ä little lämb\nLittle lämb\n";
let split: ~[&str] = data.split_iter(' ').collect();
assert_eq!(split, ~["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
let split: ~[&str] = data.split_iter(|c: char| c == ' ').collect();
assert_eq!(split, ~["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
// Unicode
let split: ~[&str] = data.split_iter('ä').collect();
assert_eq!(split, ~["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
let split: ~[&str] = data.split_iter(|c: char| c == 'ä').collect();
assert_eq!(split, ~["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
}
#[test]
fn test_splitn_char_iterator() {
let data = "\nMäry häd ä little lämb\nLittle lämb\n";
let split: ~[&str] = data.splitn_iter(' ', 3).collect();
assert_eq!(split, ~["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
let split: ~[&str] = data.splitn_iter(|c: char| c == ' ', 3).collect();
assert_eq!(split, ~["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
// Unicode
let split: ~[&str] = data.splitn_iter('ä', 3).collect();
assert_eq!(split, ~["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
let split: ~[&str] = data.splitn_iter(|c: char| c == 'ä', 3).collect();
assert_eq!(split, ~["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
}
#[test]
fn test_split_char_iterator_no_trailing() {
let data = "\nMäry häd ä little lämb\nLittle lämb\n";
let split: ~[&str] = data.split_options_iter('\n', 1000, true).collect();
assert_eq!(split, ~["", "Märy häd ä little lämb", "Little lämb", ""]);
let split: ~[&str] = data.split_options_iter('\n', 1000, false).collect();
assert_eq!(split, ~["", "Märy häd ä little lämb", "Little lämb"]);
}
#[test]
fn test_word_iter() {
let data = "\n \tMäry häd\tä little lämb\nLittle lämb\n";
let words: ~[&str] = data.word_iter().collect();
assert_eq!(words, ~["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"])
}
#[test]
fn test_line_iter() {
let data = "\nMäry häd ä little lämb\n\nLittle lämb\n";
let lines: ~[&str] = data.line_iter().collect();
assert_eq!(lines, ~["", "Märy häd ä little lämb", "", "Little lämb"]);
let data = "\nMäry häd ä little lämb\n\nLittle lämb"; // no trailing \n
let lines: ~[&str] = data.line_iter().collect();
assert_eq!(lines, ~["", "Märy häd ä little lämb", "", "Little lämb"]);
}
}