std::str: Use iterators instead of while loops for CharSplitIterator

Embed an iterator in the CharSplitIterator struct, and combine that with the former bool `only_ascii`; so use an enum instead.
2013-08-19 15:34:48 +02:00
parent 30ab96b272
commit 8fe8302887
1 changed files with 44 additions and 32 deletions
--- a/src/libstd/str.rs
+++ b/src/libstd/str.rs
@@ -23,7 +23,7 @@ use clone::Clone;
 use container::{Container, Mutable};
 use iter::Times;
 use iterator::{Iterator, FromIterator, Extendable};
-use iterator::{Filter, AdditiveIterator, Map};
+use iterator::{Filter, AdditiveIterator, Map, Enumerate};
 use iterator::{Invert, DoubleEndedIterator};
 use libc;
 use num::{Saturating, Zero};
@@ -359,9 +359,18 @@ pub type ByteIterator<'self> =
 /// Use with the `std::iterator` module.
 pub type ByteRevIterator<'self> = Invert<ByteIterator<'self>>;
 /// An iterator over byte index and either &u8 or char
 #[deriving(Clone)]
 enum OffsetIterator<'self> {
    // use ByteIterator here when it can be cloned
    ByteOffset(Enumerate<vec::VecIterator<'self, u8>>),
    CharOffset(CharOffsetIterator<'self>),
 }
 /// An iterator over the substrings of a string, separated by `sep`.
 #[deriving(Clone)]
 pub struct CharSplitIterator<'self,Sep> {
    priv iter: OffsetIterator<'self>,
    priv string: &'self str,
    priv position: uint,
    priv sep: Sep,
@@ -370,7 +379,6 @@ pub struct CharSplitIterator<'self,Sep> {
    /// Whether an empty string at the end is allowed
    priv allow_trailing_empty: bool,
    priv finished: bool,
    priv only_ascii: bool
 }
 /// An iterator over the words of a string, separated by an sequence of whitespace
@@ -386,39 +394,39 @@ impl<'self, Sep: CharEq> Iterator<&'self str> for CharSplitIterator<'self, Sep>
    fn next(&mut self) -> Option<&'self str> {
        if self.finished { return None }
        let l = self.string.len();
        let start = self.position;
        let len = self.string.len();
-        if self.only_ascii {
+        if self.count > 0 {
            match self.iter {
                // this gives a *huge* speed up for splitting on ASCII
                // characters (e.g. '\n' or ' ')
-            while self.position < l && self.count > 0 {
+                ByteOffset(ref mut iter) =>
-                let byte = self.string[self.position];
+                    for (idx, &byte) in *iter {
                        if self.sep.matches(byte as char) {
-                    let slice = unsafe { raw::slice_bytes(self.string, start, self.position) };
+                            self.position = idx + 1;
                    self.position += 1;
                            self.count -= 1;
-                    return Some(slice);
+                            return Some(unsafe {
                                raw::slice_bytes(self.string, start, idx)
                            })
                        }
-                self.position += 1;
+                    },
-            }
+                CharOffset(ref mut iter) =>
-        } else {
+                    for (idx, ch) in *iter {
            while self.position < l && self.count > 0 {
                let CharRange {ch, next} = self.string.char_range_at(self.position);
                        if self.sep.matches(ch) {
-                    let slice = unsafe { raw::slice_bytes(self.string, start, self.position) };
+                            // skip over the separator
-                    self.position = next;
+                            self.position = self.string.char_range_at(idx).next;
                            self.count -= 1;
-                    return Some(slice);
+                            return Some(unsafe {
                                raw::slice_bytes(self.string, start, idx)
                            })
                        }
-                self.position = next;
+                    },
            }
        }
        self.finished = true;
-        if self.allow_trailing_empty || start < l {
+        if self.allow_trailing_empty || start < len {
-            Some(unsafe { raw::slice_bytes(self.string, start, l) })
+            Some(unsafe { raw::slice_bytes(self.string, start, len) })
        } else {
            None
        }
@@ -1327,15 +1335,19 @@ impl<'self> StrSlice<'self> for &'self str {
    #[inline]
    fn split_options_iter<Sep: CharEq>(&self, sep: Sep, count: uint, allow_trailing_empty: bool)
        -> CharSplitIterator<'self, Sep> {
-        let only_ascii = sep.only_ascii();
+        let iter = if sep.only_ascii() {
            ByteOffset(self.as_bytes().iter().enumerate())
        } else {
            CharOffset(self.char_offset_iter())
        };
        CharSplitIterator {
            iter: iter,
            string: *self,
            position: 0,
            sep: sep,
            count: count,
            allow_trailing_empty: allow_trailing_empty,
            finished: false,
            only_ascii: only_ascii
        }
    }