std::str: Use iterators instead of while loops for CharSplitIterator
Embed an iterator in the CharSplitIterator struct, and combine that with the former bool `only_ascii`; so use an enum instead.
This commit is contained in:
@@ -23,7 +23,7 @@ use clone::Clone;
|
|||||||
use container::{Container, Mutable};
|
use container::{Container, Mutable};
|
||||||
use iter::Times;
|
use iter::Times;
|
||||||
use iterator::{Iterator, FromIterator, Extendable};
|
use iterator::{Iterator, FromIterator, Extendable};
|
||||||
use iterator::{Filter, AdditiveIterator, Map};
|
use iterator::{Filter, AdditiveIterator, Map, Enumerate};
|
||||||
use iterator::{Invert, DoubleEndedIterator};
|
use iterator::{Invert, DoubleEndedIterator};
|
||||||
use libc;
|
use libc;
|
||||||
use num::{Saturating, Zero};
|
use num::{Saturating, Zero};
|
||||||
@@ -359,9 +359,18 @@ pub type ByteIterator<'self> =
|
|||||||
/// Use with the `std::iterator` module.
|
/// Use with the `std::iterator` module.
|
||||||
pub type ByteRevIterator<'self> = Invert<ByteIterator<'self>>;
|
pub type ByteRevIterator<'self> = Invert<ByteIterator<'self>>;
|
||||||
|
|
||||||
|
/// An iterator over byte index and either &u8 or char
|
||||||
|
#[deriving(Clone)]
|
||||||
|
enum OffsetIterator<'self> {
|
||||||
|
// use ByteIterator here when it can be cloned
|
||||||
|
ByteOffset(Enumerate<vec::VecIterator<'self, u8>>),
|
||||||
|
CharOffset(CharOffsetIterator<'self>),
|
||||||
|
}
|
||||||
|
|
||||||
/// An iterator over the substrings of a string, separated by `sep`.
|
/// An iterator over the substrings of a string, separated by `sep`.
|
||||||
#[deriving(Clone)]
|
#[deriving(Clone)]
|
||||||
pub struct CharSplitIterator<'self,Sep> {
|
pub struct CharSplitIterator<'self,Sep> {
|
||||||
|
priv iter: OffsetIterator<'self>,
|
||||||
priv string: &'self str,
|
priv string: &'self str,
|
||||||
priv position: uint,
|
priv position: uint,
|
||||||
priv sep: Sep,
|
priv sep: Sep,
|
||||||
@@ -370,7 +379,6 @@ pub struct CharSplitIterator<'self,Sep> {
|
|||||||
/// Whether an empty string at the end is allowed
|
/// Whether an empty string at the end is allowed
|
||||||
priv allow_trailing_empty: bool,
|
priv allow_trailing_empty: bool,
|
||||||
priv finished: bool,
|
priv finished: bool,
|
||||||
priv only_ascii: bool
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// An iterator over the words of a string, separated by an sequence of whitespace
|
/// An iterator over the words of a string, separated by an sequence of whitespace
|
||||||
@@ -386,39 +394,39 @@ impl<'self, Sep: CharEq> Iterator<&'self str> for CharSplitIterator<'self, Sep>
|
|||||||
fn next(&mut self) -> Option<&'self str> {
|
fn next(&mut self) -> Option<&'self str> {
|
||||||
if self.finished { return None }
|
if self.finished { return None }
|
||||||
|
|
||||||
let l = self.string.len();
|
|
||||||
let start = self.position;
|
let start = self.position;
|
||||||
|
let len = self.string.len();
|
||||||
|
|
||||||
if self.only_ascii {
|
if self.count > 0 {
|
||||||
|
match self.iter {
|
||||||
// this gives a *huge* speed up for splitting on ASCII
|
// this gives a *huge* speed up for splitting on ASCII
|
||||||
// characters (e.g. '\n' or ' ')
|
// characters (e.g. '\n' or ' ')
|
||||||
while self.position < l && self.count > 0 {
|
ByteOffset(ref mut iter) =>
|
||||||
let byte = self.string[self.position];
|
for (idx, &byte) in *iter {
|
||||||
|
|
||||||
if self.sep.matches(byte as char) {
|
if self.sep.matches(byte as char) {
|
||||||
let slice = unsafe { raw::slice_bytes(self.string, start, self.position) };
|
self.position = idx + 1;
|
||||||
self.position += 1;
|
|
||||||
self.count -= 1;
|
self.count -= 1;
|
||||||
return Some(slice);
|
return Some(unsafe {
|
||||||
|
raw::slice_bytes(self.string, start, idx)
|
||||||
|
})
|
||||||
}
|
}
|
||||||
self.position += 1;
|
},
|
||||||
}
|
CharOffset(ref mut iter) =>
|
||||||
} else {
|
for (idx, ch) in *iter {
|
||||||
while self.position < l && self.count > 0 {
|
|
||||||
let CharRange {ch, next} = self.string.char_range_at(self.position);
|
|
||||||
|
|
||||||
if self.sep.matches(ch) {
|
if self.sep.matches(ch) {
|
||||||
let slice = unsafe { raw::slice_bytes(self.string, start, self.position) };
|
// skip over the separator
|
||||||
self.position = next;
|
self.position = self.string.char_range_at(idx).next;
|
||||||
self.count -= 1;
|
self.count -= 1;
|
||||||
return Some(slice);
|
return Some(unsafe {
|
||||||
|
raw::slice_bytes(self.string, start, idx)
|
||||||
|
})
|
||||||
}
|
}
|
||||||
self.position = next;
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
self.finished = true;
|
self.finished = true;
|
||||||
if self.allow_trailing_empty || start < l {
|
if self.allow_trailing_empty || start < len {
|
||||||
Some(unsafe { raw::slice_bytes(self.string, start, l) })
|
Some(unsafe { raw::slice_bytes(self.string, start, len) })
|
||||||
} else {
|
} else {
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
@@ -1327,15 +1335,19 @@ impl<'self> StrSlice<'self> for &'self str {
|
|||||||
#[inline]
|
#[inline]
|
||||||
fn split_options_iter<Sep: CharEq>(&self, sep: Sep, count: uint, allow_trailing_empty: bool)
|
fn split_options_iter<Sep: CharEq>(&self, sep: Sep, count: uint, allow_trailing_empty: bool)
|
||||||
-> CharSplitIterator<'self, Sep> {
|
-> CharSplitIterator<'self, Sep> {
|
||||||
let only_ascii = sep.only_ascii();
|
let iter = if sep.only_ascii() {
|
||||||
|
ByteOffset(self.as_bytes().iter().enumerate())
|
||||||
|
} else {
|
||||||
|
CharOffset(self.char_offset_iter())
|
||||||
|
};
|
||||||
CharSplitIterator {
|
CharSplitIterator {
|
||||||
|
iter: iter,
|
||||||
string: *self,
|
string: *self,
|
||||||
position: 0,
|
position: 0,
|
||||||
sep: sep,
|
sep: sep,
|
||||||
count: count,
|
count: count,
|
||||||
allow_trailing_empty: allow_trailing_empty,
|
allow_trailing_empty: allow_trailing_empty,
|
||||||
finished: false,
|
finished: false,
|
||||||
only_ascii: only_ascii
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user