Write multibyte case for str Chars iterator in-line

Thanks to comments from @alexcrichton, write the next/next_back function
bodies without nested functions in a more top-to-bottom flow style.

Also improve comment style and motivate the unsafe blocks with comments.
This commit is contained in:
root
2014-07-19 00:02:30 +02:00
parent bbb299ad98
commit 4592164869

View File

@@ -133,40 +133,35 @@ impl<'a> Iterator<char> for Chars<'a> {
#[inline] #[inline]
fn next(&mut self) -> Option<char> { fn next(&mut self) -> Option<char> {
// Decode UTF-8, using the valid UTF-8 invariant // Decode UTF-8, using the valid UTF-8 invariant
#[inline] let x = match self.iter.next() {
fn decode_multibyte<'a>(x: u8, it: &mut slice::Items<'a, u8>) -> char { None => return None,
// NOTE: Performance is very sensitive to the exact formulation here Some(&next_byte) if next_byte < 128 => return Some(next_byte as char),
Some(&next_byte) => next_byte,
};
// Multibyte case follows
// Decode from a byte combination out of: [[[x y] z] w] // Decode from a byte combination out of: [[[x y] z] w]
// NOTE: Performance is sensitive to the exact formulation here
let init = utf8_first_byte!(x, 2); let init = utf8_first_byte!(x, 2);
let y = unwrap_or_0(it.next()); let y = unwrap_or_0(self.iter.next());
let mut ch = utf8_acc_cont_byte!(init, y); let mut ch = utf8_acc_cont_byte!(init, y);
if x >= 0xE0 { if x >= 0xE0 {
/* [[x y z] w] case // [[x y z] w] case
* 5th bit in 0xE0 .. 0xEF is always clear, so `init` is still valid */ // 5th bit in 0xE0 .. 0xEF is always clear, so `init` is still valid
let z = unwrap_or_0(it.next()); let z = unwrap_or_0(self.iter.next());
let y_z = utf8_acc_cont_byte!((y & CONT_MASK) as u32, z); let y_z = utf8_acc_cont_byte!((y & CONT_MASK) as u32, z);
ch = init << 12 | y_z; ch = init << 12 | y_z;
if x >= 0xF0 { if x >= 0xF0 {
/* [x y z w] case // [x y z w] case
* use only the lower 3 bits of `init` */ // use only the lower 3 bits of `init`
let w = unwrap_or_0(it.next()); let w = unwrap_or_0(self.iter.next());
ch = (init & 7) << 18 | utf8_acc_cont_byte!(y_z, w); ch = (init & 7) << 18 | utf8_acc_cont_byte!(y_z, w);
} }
} }
unsafe {
mem::transmute(ch)
}
}
match self.iter.next() { // str invariant says `ch` is a valid Unicode Scalar Value
None => None, unsafe {
Some(&next_byte) => { Some(mem::transmute(ch))
if next_byte < 128 {
Some(next_byte as char)
} else {
Some(decode_multibyte(next_byte, &mut self.iter))
}
}
} }
} }
@@ -180,17 +175,22 @@ impl<'a> Iterator<char> for Chars<'a> {
impl<'a> DoubleEndedIterator<char> for Chars<'a> { impl<'a> DoubleEndedIterator<char> for Chars<'a> {
#[inline] #[inline]
fn next_back(&mut self) -> Option<char> { fn next_back(&mut self) -> Option<char> {
#[inline] let w = match self.iter.next_back() {
fn decode_multibyte_back<'a>(w: u8, it: &mut slice::Items<'a, u8>) -> char { None => return None,
Some(&back_byte) if back_byte < 128 => return Some(back_byte as char),
Some(&back_byte) => back_byte,
};
// Multibyte case follows
// Decode from a byte combination out of: [x [y [z w]]] // Decode from a byte combination out of: [x [y [z w]]]
let mut ch; let mut ch;
let z = unwrap_or_0(it.next_back()); let z = unwrap_or_0(self.iter.next_back());
ch = utf8_first_byte!(z, 2); ch = utf8_first_byte!(z, 2);
if utf8_is_cont_byte!(z) { if utf8_is_cont_byte!(z) {
let y = unwrap_or_0(it.next_back()); let y = unwrap_or_0(self.iter.next_back());
ch = utf8_first_byte!(y, 3); ch = utf8_first_byte!(y, 3);
if utf8_is_cont_byte!(y) { if utf8_is_cont_byte!(y) {
let x = unwrap_or_0(it.next_back()); let x = unwrap_or_0(self.iter.next_back());
ch = utf8_first_byte!(x, 4); ch = utf8_first_byte!(x, 4);
ch = utf8_acc_cont_byte!(ch, y); ch = utf8_acc_cont_byte!(ch, y);
} }
@@ -198,20 +198,9 @@ impl<'a> DoubleEndedIterator<char> for Chars<'a> {
} }
ch = utf8_acc_cont_byte!(ch, w); ch = utf8_acc_cont_byte!(ch, w);
// str invariant says `ch` is a valid Unicode Scalar Value
unsafe { unsafe {
mem::transmute(ch) Some(mem::transmute(ch))
}
}
match self.iter.next_back() {
None => None,
Some(&back_byte) => {
if back_byte < 128 {
Some(back_byte as char)
} else {
Some(decode_multibyte_back(back_byte, &mut self.iter))
}
}
} }
} }
} }