std: replace str::each_split* with an iterator

2013-06-09 23:10:50 +10:00
parent 2ff6b298c5
commit 1e8982bdb2
19 changed files with 270 additions and 516 deletions
--- a/src/libstd/str.rs
+++ b/src/libstd/str.rs
@@ -25,7 +25,7 @@ use clone::Clone;
 use cmp::{TotalOrd, Ordering, Less, Equal, Greater};
 use container::Container;
 use iter::Times;
-use iterator::{Iterator, IteratorUtil};
+use iterator::{Iterator, IteratorUtil, FilterIterator};
 use libc;
 use option::{None, Option, Some};
 use old_iter::{BaseIter, EqIter};
@@ -633,128 +633,92 @@ pub fn slice<'a>(s: &'a str, begin: uint, end: uint) -> &'a str {
    unsafe { raw::slice_bytes(s, begin, end) }
 }

-/// Splits a string into substrings at each occurrence of a given character
-pub fn each_split_char<'a>(s: &'a str, sep: char,
-                           it: &fn(&'a str) -> bool) -> bool {
-    each_split_char_inner(s, sep, len(s), true, true, it)
+/// An iterator over the substrings of a string, separated by `sep`.
+pub struct StrCharSplitIterator<'self,Sep> {
+    priv string: &'self str,
+    priv position: uint,
+    priv sep: Sep,
+    /// The number of splits remaining
+    priv count: uint,
+    /// Whether an empty string at the end is allowed
+    priv allow_trailing_empty: bool,
+    priv finished: bool,
+    priv only_ascii: bool
 }

-/// Like `each_split_char`, but a trailing empty string is omitted
-pub fn each_split_char_no_trailing<'a>(s: &'a str,
-                                       sep: char,
-                                       it: &fn(&'a str) -> bool) -> bool {
-    each_split_char_inner(s, sep, len(s), true, false, it)
+/// An iterator over the words of a string, separated by an sequence of whitespace
+pub type WordIterator<'self> =
+    FilterIterator<'self, &'self str,
+             StrCharSplitIterator<'self, extern "Rust" fn(char) -> bool>>;
+
+/// A separator for splitting a string character-wise
+pub trait StrCharSplitSeparator {
+    /// Determine if the splitter should split at the given character
+    fn should_split(&self, char) -> bool;
+    /// Indicate if the splitter only uses ASCII characters, which
+    /// allows for a faster implementation.
+    fn only_ascii(&self) -> bool;
+}
+impl StrCharSplitSeparator for char {
+    #[inline(always)]
+    fn should_split(&self, c: char) -> bool { *self == c }
+
+    fn only_ascii(&self) -> bool { (*self as uint) < 128 }
+}
+impl<'self> StrCharSplitSeparator for &'self fn(char) -> bool {
+    #[inline(always)]
+    fn should_split(&self, c: char) -> bool { (*self)(c) }
+
+    fn only_ascii(&self) -> bool { false }
+}
+impl<'self> StrCharSplitSeparator for extern "Rust" fn(char) -> bool {
+    #[inline(always)]
+    fn should_split(&self, c: char) -> bool { (*self)(c) }
+
+    fn only_ascii(&self) -> bool { false }
 }

-/**
- * Splits a string into substrings at each occurrence of a given
- * character up to 'count' times.
- *
- * The character must be a valid UTF-8/ASCII character
- */
-pub fn each_splitn_char<'a>(s: &'a str,
-                            sep: char,
-                            count: uint,
-                            it: &fn(&'a str) -> bool) -> bool {
-    each_split_char_inner(s, sep, count, true, true, it)
-}
+impl<'self, Sep: StrCharSplitSeparator> Iterator<&'self str> for StrCharSplitIterator<'self, Sep> {
+    fn next(&mut self) -> Option<&'self str> {
+        if self.finished { return None }

-/// Like `each_split_char`, but omits empty strings
-pub fn each_split_char_nonempty<'a>(s: &'a str,
-                                    sep: char,
-                                    it: &fn(&'a str) -> bool) -> bool {
-    each_split_char_inner(s, sep, len(s), false, false, it)
-}
+        let l = self.string.len();
+        let start = self.position;

-fn each_split_char_inner<'a>(s: &'a str,
-                             sep: char,
-                             count: uint,
-                             allow_empty: bool,
-                             allow_trailing_empty: bool,
-                             it: &fn(&'a str) -> bool) -> bool {
-    if sep < 128u as char {
-        let (b, l) = (sep as u8, len(s));
-        let mut done = 0u;
-        let mut (i, start) = (0u, 0u);
-        while i < l && done < count {
-            if s[i] == b {
-                if allow_empty || start < i {
-                    if !it( unsafe{ raw::slice_bytes(s, start, i) } ) {
-                        return false;
-                    }
+        if self.only_ascii {
+            // this gives a *huge* speed up for splitting on ASCII
+            // characters (e.g. '\n' or ' ')
+            while self.position < l && self.count > 0 {
+                let byte = self.string[self.position];
+
+                if self.sep.should_split(byte as char) {
+                    let slice = unsafe { raw::slice_bytes(self.string, start, self.position) };
+                    self.position += 1;
+                    self.count -= 1;
+                    return Some(slice);
                }
-                start = i + 1u;
-                done += 1u;
+                self.position += 1;
            }
-            i += 1u;
-        }
-        // only slice a non-empty trailing substring
-        if allow_trailing_empty || start < l {
-            if !it( unsafe{ raw::slice_bytes(s, start, l) } ) { return false; }
-        }
-        return true;
-    }
-    return each_split_inner(s, |cur| cur == sep, count,
-                            allow_empty, allow_trailing_empty, it)
-}
+        } else {
+            while self.position < l && self.count > 0 {
+                let CharRange {ch, next} = char_range_at(self.string, self.position);

-/// Splits a string into substrings using a character function
-pub fn each_split<'a>(s: &'a str,
-                      sepfn: &fn(char) -> bool,
-                      it: &fn(&'a str) -> bool) -> bool {
-    each_split_inner(s, sepfn, len(s), true, true, it)
-}
-
-/// Like `each_split`, but a trailing empty string is omitted
-pub fn each_split_no_trailing<'a>(s: &'a str,
-                                  sepfn: &fn(char) -> bool,
-                                  it: &fn(&'a str) -> bool) -> bool {
-    each_split_inner(s, sepfn, len(s), true, false, it)
-}
-
-/**
- * Splits a string into substrings using a character function, cutting at
- * most `count` times.
- */
-pub fn each_splitn<'a>(s: &'a str,
-                       sepfn: &fn(char) -> bool,
-                       count: uint,
-                       it: &fn(&'a str) -> bool) -> bool {
-    each_split_inner(s, sepfn, count, true, true, it)
-}
-
-/// Like `each_split`, but omits empty strings
-pub fn each_split_nonempty<'a>(s: &'a str,
-                               sepfn: &fn(char) -> bool,
-                               it: &fn(&'a str) -> bool) -> bool {
-    each_split_inner(s, sepfn, len(s), false, false, it)
-}
-
-fn each_split_inner<'a>(s: &'a str,
-                        sepfn: &fn(cc: char) -> bool,
-                        count: uint,
-                        allow_empty: bool,
-                        allow_trailing_empty: bool,
-                        it: &fn(&'a str) -> bool) -> bool {
-    let l = len(s);
-    let mut (i, start, done) = (0u, 0u, 0u);
-    while i < l && done < count {
-        let CharRange {ch, next} = char_range_at(s, i);
-        if sepfn(ch) {
-            if allow_empty || start < i {
-                if !it( unsafe{ raw::slice_bytes(s, start, i) } ) {
-                    return false;
+                if self.sep.should_split(ch) {
+                    let slice = unsafe { raw::slice_bytes(self.string, start, self.position) };
+                    self.position = next;
+                    self.count -= 1;
+                    return Some(slice);
                }
+                self.position = next;
            }
-            start = next;
-            done += 1u;
        }
-        i = next;
+        self.finished = true;
+        if self.allow_trailing_empty || start < l {
+            Some(unsafe { raw::slice_bytes(self.string, start, l) })
+        } else {
+            None
+        }
    }
-    if allow_trailing_empty || start < l {
-        if !it( unsafe{ raw::slice_bytes(s, start, l) } ) { return false; }
-    }
-    return true;
 }

 // See Issue #1932 for why this is a naive search
@@ -875,19 +839,12 @@ pub fn levdistance(s: &str, t: &str) -> uint {
    return dcol[tlen];
 }

-/**
- * Splits a string into substrings separated by LF ('\n').
- */
-pub fn each_line<'a>(s: &'a str, it: &fn(&'a str) -> bool) -> bool {
-    each_split_char_no_trailing(s, '\n', it)
-}
-
 /**
 * Splits a string into substrings separated by LF ('\n')
 * and/or CR LF ("\r\n")
 */
 pub fn each_line_any<'a>(s: &'a str, it: &fn(&'a str) -> bool) -> bool {
-    for each_line(s) |s| {
+    for s.line_iter().advance |s| {
        let l = s.len();
        if l > 0u && s[l - 1u] == '\r' as u8 {
            if !it( unsafe { raw::slice_bytes(s, 0, l - 1) } ) { return false; }
@@ -898,11 +855,6 @@ pub fn each_line_any<'a>(s: &'a str, it: &fn(&'a str) -> bool) -> bool {
    return true;
 }

-/// Splits a string into substrings separated by whitespace
-pub fn each_word<'a>(s: &'a str, it: &fn(&'a str) -> bool) -> bool {
-    each_split_nonempty(s, char::is_whitespace, it)
-}
-
 /** Splits a string into substrings with possibly internal whitespace,
 *  each of them at most `lim` bytes long. The substrings have leading and trailing
 *  whitespace removed, and are only cut at whitespace boundaries.
@@ -2216,7 +2168,7 @@ pub fn as_buf<T>(s: &str, f: &fn(*u8, uint) -> T) -> T {
 * ~~~ {.rust}
 * let string = "a\nb\nc";
 * let mut lines = ~[];
- * for each_line(string) |line| { lines.push(line) }
+ * for string.line_iter().advance |line| { lines.push(line) }
 *
 * assert!(subslice_offset(string, lines[0]) == 0); // &"a"
 * assert!(subslice_offset(string, lines[1]) == 2); // &"b"
@@ -2523,6 +2475,18 @@ pub trait StrSlice<'self> {
    fn rev_iter(&self) -> StrCharRevIterator<'self>;
    fn bytes_iter(&self) -> StrBytesIterator<'self>;
    fn bytes_rev_iter(&self) -> StrBytesRevIterator<'self>;
+    fn split_iter<Sep: StrCharSplitSeparator>(&self, sep: Sep) -> StrCharSplitIterator<'self, Sep>;
+    fn splitn_iter<Sep: StrCharSplitSeparator>(&self, sep: Sep, count: uint)
+        -> StrCharSplitIterator<'self, Sep>;
+    fn split_options_iter<Sep: StrCharSplitSeparator>(&self, sep: Sep,
+                                                      count: uint, allow_trailing_empty: bool)
+        -> StrCharSplitIterator<'self, Sep>;
+    /// An iterator over the lines of a string (subsequences separated
+    /// by `\n`).
+    fn line_iter(&self) -> StrCharSplitIterator<'self, char>;
+    /// An iterator over the words of a string (subsequences separated
+    /// by any sequence of whitespace).
+    fn word_iter(&self) -> WordIterator<'self>;
    fn ends_with(&self, needle: &str) -> bool;
    fn is_empty(&self) -> bool;
    fn is_whitespace(&self) -> bool;
@@ -2530,8 +2494,6 @@ pub trait StrSlice<'self> {
    fn len(&self) -> uint;
    fn char_len(&self) -> uint;
    fn slice(&self, begin: uint, end: uint) -> &'self str;
-    fn each_split(&self, sepfn: &fn(char) -> bool, it: &fn(&'self str) -> bool) -> bool;
-    fn each_split_char(&self, sep: char, it: &fn(&'self str) -> bool) -> bool;
    fn each_split_str<'a>(&self, sep: &'a str, it: &fn(&'self str) -> bool) -> bool;
    fn starts_with<'a>(&self, needle: &'a str) -> bool;
    fn substr(&self, begin: uint, n: uint) -> &'self str;
@@ -2597,6 +2559,36 @@ impl<'self> StrSlice<'self> for &'self str {
        StrBytesRevIterator { it: as_bytes_slice(*self).rev_iter() }
    }

+    fn split_iter<Sep: StrCharSplitSeparator>(&self, sep: Sep) -> StrCharSplitIterator<'self, Sep> {
+        self.split_options_iter(sep, self.len(), true)
+    }
+
+    fn splitn_iter<Sep: StrCharSplitSeparator>(&self, sep: Sep, count: uint)
+        -> StrCharSplitIterator<'self, Sep> {
+        self.split_options_iter(sep, count, true)
+    }
+    fn split_options_iter<Sep: StrCharSplitSeparator>(&self, sep: Sep,
+                                                      count: uint, allow_trailing_empty: bool)
+        -> StrCharSplitIterator<'self, Sep> {
+        let only_ascii = sep.only_ascii();
+        StrCharSplitIterator {
+            string: *self,
+            position: 0,
+            sep: sep,
+            count: count,
+            allow_trailing_empty: allow_trailing_empty,
+            finished: false,
+            only_ascii: only_ascii
+        }
+    }
+
+    fn line_iter(&self) -> StrCharSplitIterator<'self, char> {
+        self.split_options_iter('\n', self.len(), false)
+    }
+    fn word_iter(&self) -> WordIterator<'self> {
+        self.split_iter(char::is_whitespace).filter(|s| !s.is_empty())
+    }
+

    /// Returns true if one string ends with another
    #[inline]
@@ -2637,18 +2629,6 @@ impl<'self> StrSlice<'self> for &'self str {
    fn slice(&self, begin: uint, end: uint) -> &'self str {
        slice(*self, begin, end)
    }
-    /// Splits a string into substrings using a character function
-    #[inline]
-    fn each_split(&self, sepfn: &fn(char) -> bool, it: &fn(&'self str) -> bool) -> bool {
-        each_split(*self, sepfn, it)
-    }
-    /**
-     * Splits a string into substrings at each occurrence of a given character
-     */
-    #[inline]
-    fn each_split_char(&self, sep: char, it: &fn(&'self str) -> bool) -> bool {
-        each_split_char(*self, sep, it)
-    }
    /**
     * Splits a string into a vector of the substrings separated by a given
     * string
@@ -2904,131 +2884,6 @@ mod tests {
        let _cc3 = pop_char(&mut data);
    }

-    #[test]
-    fn test_split_char() {
-        fn t(s: &str, c: char, u: &[~str]) {
-            debug!("split_byte: %?", s);
-            let mut v = ~[];
-            for each_split_char(s, c) |s| { v.push(s.to_owned()) }
-            debug!("split_byte to: %?", v);
-            assert!(v.iter().zip(u.iter()).all(|(a,b)| a == b));
-        }
-        t("abc.hello.there", '.', [~"abc", ~"hello", ~"there"]);
-        t(".hello.there", '.', [~"", ~"hello", ~"there"]);
-        t("...hello.there.", '.', [~"", ~"", ~"", ~"hello", ~"there", ~""]);
-
-        t("", 'z', [~""]);
-        t("z", 'z', [~"",~""]);
-        t("ok", 'z', [~"ok"]);
-    }
-
-    #[test]
-    fn test_split_char_2() {
-        fn t(s: &str, c: char, u: &[~str]) {
-            debug!("split_byte: %?", s);
-            let mut v = ~[];
-            for each_split_char(s, c) |s| { v.push(s.to_owned()) }
-            debug!("split_byte to: %?", v);
-            assert!(v.iter().zip(u.iter()).all(|(a,b)| a == b));
-        }
-        let data = "ประเทศไทย中华Việt Nam";
-        t(data, 'V', [~"ประเทศไทย中华", ~"iệt Nam"]);
-        t(data, 'ท', [~"ประเ", ~"ศไ", ~"ย中华Việt Nam"]);
-    }
-
-    #[test]
-    fn test_splitn_char() {
-        fn t(s: &str, c: char, n: uint, u: &[~str]) {
-            debug!("splitn_byte: %?", s);
-            let mut v = ~[];
-            for each_splitn_char(s, c, n) |s| { v.push(s.to_owned()) }
-            debug!("split_byte to: %?", v);
-            debug!("comparing vs. %?", u);
-            assert!(v.iter().zip(u.iter()).all(|(a,b)| a == b));
-        }
-        t("abc.hello.there", '.', 0u, [~"abc.hello.there"]);
-        t("abc.hello.there", '.', 1u, [~"abc", ~"hello.there"]);
-        t("abc.hello.there", '.', 2u, [~"abc", ~"hello", ~"there"]);
-        t("abc.hello.there", '.', 3u, [~"abc", ~"hello", ~"there"]);
-        t(".hello.there", '.', 0u, [~".hello.there"]);
-        t(".hello.there", '.', 1u, [~"", ~"hello.there"]);
-        t("...hello.there.", '.', 3u, [~"", ~"", ~"", ~"hello.there."]);
-        t("...hello.there.", '.', 5u, [~"", ~"", ~"", ~"hello", ~"there", ~""]);
-
-        t("", 'z', 5u, [~""]);
-        t("z", 'z', 5u, [~"",~""]);
-        t("ok", 'z', 5u, [~"ok"]);
-        t("z", 'z', 0u, [~"z"]);
-        t("w.x.y", '.', 0u, [~"w.x.y"]);
-        t("w.x.y", '.', 1u, [~"w",~"x.y"]);
-    }
-
-    #[test]
-    fn test_splitn_char_2() {
-        fn t(s: &str, c: char, n: uint, u: &[~str]) {
-            debug!("splitn_byte: %?", s);
-            let mut v = ~[];
-            for each_splitn_char(s, c, n) |s| { v.push(s.to_owned()) }
-            debug!("split_byte to: %?", v);
-            debug!("comparing vs. %?", u);
-            assert!(v.iter().zip(u.iter()).all(|(a,b)| a == b));
-        }
-
-        t("ประเทศไทย中华Việt Nam", '华', 1u, [~"ประเทศไทย中", ~"Việt Nam"]);
-        t("zzXXXzYYYzWWWz", 'z', 3u, [~"", ~"", ~"XXX", ~"YYYzWWWz"]);
-        t("z", 'z', 5u, [~"",~""]);
-        t("", 'z', 5u, [~""]);
-        t("ok", 'z', 5u, [~"ok"]);
-    }
-
-    #[test]
-    fn test_splitn_char_3() {
-        fn t(s: &str, c: char, n: uint, u: &[~str]) {
-            debug!("splitn_byte: %?", s);
-            let mut v = ~[];
-            for each_splitn_char(s, c, n) |s| { v.push(s.to_owned()) }
-            debug!("split_byte to: %?", v);
-            debug!("comparing vs. %?", u);
-            assert!(v.iter().zip(u.iter()).all(|(a,b)| a == b));
-        }
-        let data = "ประเทศไทย中华Việt Nam";
-        t(data, 'V', 1u, [~"ประเทศไทย中华", ~"iệt Nam"]);
-        t(data, 'ท', 1u, [~"ประเ", ~"ศไทย中华Việt Nam"]);
-    }
-
-    #[test]
-    fn test_split_char_no_trailing() {
-        fn t(s: &str, c: char, u: &[~str]) {
-            debug!("split_byte: %?", s);
-            let mut v = ~[];
-            for each_split_char_no_trailing(s, c) |s| { v.push(s.to_owned()) }
-            debug!("split_byte to: %?", v);
-            assert!(v.iter().zip(u.iter()).all(|(a,b)| a == b));
-        }
-        t("abc.hello.there", '.', [~"abc", ~"hello", ~"there"]);
-        t(".hello.there", '.', [~"", ~"hello", ~"there"]);
-        t("...hello.there.", '.', [~"", ~"", ~"", ~"hello", ~"there"]);
-
-        t("...hello.there.", '.', [~"", ~"", ~"", ~"hello", ~"there"]);
-        t("", 'z', []);
-        t("z", 'z', [~""]);
-        t("ok", 'z', [~"ok"]);
-    }
-
-    #[test]
-    fn test_split_char_no_trailing_2() {
-        fn t(s: &str, c: char, u: &[~str]) {
-            debug!("split_byte: %?", s);
-            let mut v = ~[];
-            for each_split_char_no_trailing(s, c) |s| { v.push(s.to_owned()) }
-            debug!("split_byte to: %?", v);
-            assert!(v.iter().zip(u.iter()).all(|(a,b)| a == b));
-        }
-        let data = "ประเทศไทย中华Việt Nam";
-        t(data, 'V', [~"ประเทศไทย中华", ~"iệt Nam"]);
-        t(data, 'ท', [~"ประเ", ~"ศไ", ~"ย中华Việt Nam"]);
-    }
-
    #[test]
    fn test_split_str() {
        fn t<'a>(s: &str, sep: &'a str, u: &[~str]) {
@@ -3053,75 +2908,6 @@ mod tests {
    }


-    #[test]
-    fn test_split() {
-        fn t(s: &str, sepf: &fn(char) -> bool, u: &[~str]) {
-            let mut v = ~[];
-            for each_split(s, sepf) |s| { v.push(s.to_owned()) }
-            assert!(v.iter().zip(u.iter()).all(|(a,b)| a == b));
-        }
-
-        t("ประเทศไทย中华Việt Nam", |cc| cc == '华', [~"ประเทศไทย中", ~"Việt Nam"]);
-        t("zzXXXzYYYz", char::is_lowercase, [~"", ~"", ~"XXX", ~"YYY", ~""]);
-        t("zzXXXzYYYz", char::is_uppercase, [~"zz", ~"", ~"", ~"z", ~"", ~"", ~"z"]);
-        t("z", |cc| cc == 'z', [~"",~""]);
-        t("", |cc| cc == 'z', [~""]);
-        t("ok", |cc| cc == 'z', [~"ok"]);
-    }
-
-    #[test]
-    fn test_split_no_trailing() {
-        fn t(s: &str, sepf: &fn(char) -> bool, u: &[~str]) {
-            let mut v = ~[];
-            for each_split_no_trailing(s, sepf) |s| { v.push(s.to_owned()) }
-            assert!(v.iter().zip(u.iter()).all(|(a,b)| a == b));
-        }
-
-        t("ประเทศไทย中华Việt Nam", |cc| cc == '华', [~"ประเทศไทย中", ~"Việt Nam"]);
-        t("zzXXXzYYYz", char::is_lowercase, [~"", ~"", ~"XXX", ~"YYY"]);
-        t("zzXXXzYYYz", char::is_uppercase, [~"zz", ~"", ~"", ~"z", ~"", ~"", ~"z"]);
-        t("z", |cc| cc == 'z', [~""]);
-        t("", |cc| cc == 'z', []);
-        t("ok", |cc| cc == 'z', [~"ok"]);
-    }
-
-    #[test]
-    fn test_lines() {
-        let lf = "\nMary had a little lamb\nLittle lamb\n";
-        let crlf = "\r\nMary had a little lamb\r\nLittle lamb\r\n";
-
-        fn t(s: &str, f: &fn(&str, &fn(&str) -> bool) -> bool, u: &[~str]) {
-            let mut v = ~[];
-            for f(s) |s| { v.push(s.to_owned()) }
-            assert!(v.iter().zip(u.iter()).all(|(a,b)| a == b));
-        }
-
-        t(lf, each_line, [~"", ~"Mary had a little lamb", ~"Little lamb"]);
-        t(lf, each_line_any, [~"", ~"Mary had a little lamb", ~"Little lamb"]);
-        t(crlf, each_line, [~"\r", ~"Mary had a little lamb\r", ~"Little lamb\r"]);
-        t(crlf, each_line_any, [~"", ~"Mary had a little lamb", ~"Little lamb"]);
-        t("", each_line, []);
-        t("", each_line_any, []);
-        t("\n", each_line, [~""]);
-        t("\n", each_line_any, [~""]);
-        t("banana", each_line, [~"banana"]);
-        t("banana", each_line_any, [~"banana"]);
-    }
-
-    #[test]
-    fn test_words() {
-        fn t(s: &str, f: &fn(&str, &fn(&str) -> bool) -> bool, u: &[~str]) {
-            let mut v = ~[];
-            for f(s) |s| { v.push(s.to_owned()) }
-            assert!(v.iter().zip(u.iter()).all(|(a,b)| a == b));
-        }
-        let data = "\nMary had a little lamb\nLittle lamb\n";
-
-        t(data, each_word, [~"Mary",~"had",~"a",~"little",~"lamb",~"Little",~"lamb"]);
-        t("ok", each_word, [~"ok"]);
-        t("", each_word, []);
-    }
-
    #[test]
    fn test_split_within() {
        fn t(s: &str, i: uint, u: &[~str]) {
@@ -3671,7 +3457,7 @@ mod tests {

        let string = "a\nb\nc";
        let mut lines = ~[];
-        for each_line(string) |line| { lines.push(line) }
+        for string.line_iter().advance |line| { lines.push(line) }
        assert_eq!(subslice_offset(string, lines[0]), 0);
        assert_eq!(subslice_offset(string, lines[1]), 2);
        assert_eq!(subslice_offset(string, lines[2]), 4);
@@ -3730,78 +3516,6 @@ mod tests {
        assert!(!contains_char("", 'a'));
    }

-    #[test]
-    fn test_split_char_each() {
-        let data = "\nMary had a little lamb\nLittle lamb\n";
-
-        let mut ii = 0;
-
-        for each_split_char(data, ' ') |xx| {
-            match ii {
-              0 => assert!("\nMary" == xx),
-              1 => assert!("had"    == xx),
-              2 => assert!("a"      == xx),
-              3 => assert!("little" == xx),
-              _ => ()
-            }
-            ii += 1;
-        }
-    }
-
-    #[test]
-    fn test_splitn_char_each() {
-        let data = "\nMary had a little lamb\nLittle lamb\n";
-
-        let mut ii = 0;
-
-        for each_splitn_char(data, ' ', 2u) |xx| {
-            match ii {
-              0 => assert!("\nMary" == xx),
-              1 => assert!("had"    == xx),
-              2 => assert!("a little lamb\nLittle lamb\n" == xx),
-              _ => ()
-            }
-            ii += 1;
-        }
-    }
-
-    #[test]
-    fn test_words_each() {
-        let data = "\nMary had a little lamb\nLittle lamb\n";
-
-        let mut ii = 0;
-
-        for each_word(data) |ww| {
-            match ii {
-              0 => assert!("Mary"   == ww),
-              1 => assert!("had"    == ww),
-              2 => assert!("a"      == ww),
-              3 => assert!("little" == ww),
-              _ => ()
-            }
-            ii += 1;
-        }
-
-        each_word("", |_x| fail!()); // should not fail
-    }
-
-    #[test]
-    fn test_lines_each () {
-        let lf = "\nMary had a little lamb\nLittle lamb\n";
-
-        let mut ii = 0;
-
-        for each_line(lf) |x| {
-            match ii {
-                0 => assert!("" == x),
-                1 => assert!("Mary had a little lamb" == x),
-                2 => assert!("Little lamb" == x),
-                _ => ()
-            }
-            ii += 1;
-        }
-    }
-
    #[test]
    fn test_map() {
        assert_eq!(~"", map("", |c| unsafe {libc::toupper(c as c_char)} as char));
@@ -4015,4 +3729,68 @@ mod tests {
            assert_eq!(b, v[pos]);
        }
    }
+
+    #[test]
+    fn test_split_char_iterator() {
+        let data = "\nMäry häd ä little lämb\nLittle lämb\n";
+
+        let split: ~[&str] = data.split_iter(' ').collect();
+        assert_eq!(split, ~["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
+
+        let split: ~[&str] = data.split_iter(|c: char| c == ' ').collect();
+        assert_eq!(split, ~["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
+
+        // Unicode
+        let split: ~[&str] = data.split_iter('ä').collect();
+        assert_eq!(split, ~["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
+
+        let split: ~[&str] = data.split_iter(|c: char| c == 'ä').collect();
+        assert_eq!(split, ~["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
+    }
+    #[test]
+    fn test_splitn_char_iterator() {
+        let data = "\nMäry häd ä little lämb\nLittle lämb\n";
+
+        let split: ~[&str] = data.splitn_iter(' ', 3).collect();
+        assert_eq!(split, ~["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
+
+        let split: ~[&str] = data.splitn_iter(|c: char| c == ' ', 3).collect();
+        assert_eq!(split, ~["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
+
+        // Unicode
+        let split: ~[&str] = data.splitn_iter('ä', 3).collect();
+        assert_eq!(split, ~["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
+
+        let split: ~[&str] = data.splitn_iter(|c: char| c == 'ä', 3).collect();
+        assert_eq!(split, ~["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
+    }
+
+    #[test]
+    fn test_split_char_iterator_no_trailing() {
+        let data = "\nMäry häd ä little lämb\nLittle lämb\n";
+
+        let split: ~[&str] = data.split_options_iter('\n', 1000, true).collect();
+        assert_eq!(split, ~["", "Märy häd ä little lämb", "Little lämb", ""]);
+
+        let split: ~[&str] = data.split_options_iter('\n', 1000, false).collect();
+        assert_eq!(split, ~["", "Märy häd ä little lämb", "Little lämb"]);
+    }
+
+    #[test]
+    fn test_word_iter() {
+        let data = "\n \tMäry   häd\tä  little lämb\nLittle lämb\n";
+        let words: ~[&str] = data.word_iter().collect();
+        assert_eq!(words, ~["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"])
+    }
+
+    #[test]
+    fn test_line_iter() {
+        let data = "\nMäry häd ä little lämb\n\nLittle lämb\n";
+        let lines: ~[&str] = data.line_iter().collect();
+        assert_eq!(lines, ~["", "Märy häd ä little lämb", "", "Little lämb"]);
+
+        let data = "\nMäry häd ä little lämb\n\nLittle lämb"; // no trailing \n
+        let lines: ~[&str] = data.line_iter().collect();
+        assert_eq!(lines, ~["", "Märy häd ä little lämb", "", "Little lämb"]);
+    }
 }