Write multibyte case for str Chars iterator in-line
Thanks to comments from @alexcrichton, write the next/next_back function bodies without nested functions in a more top-to-bottom flow style. Also improve comment style and motivate the unsafe blocks with comments.
This commit is contained in:
@@ -133,40 +133,35 @@ impl<'a> Iterator<char> for Chars<'a> {
|
|||||||
#[inline]
|
#[inline]
|
||||||
fn next(&mut self) -> Option<char> {
|
fn next(&mut self) -> Option<char> {
|
||||||
// Decode UTF-8, using the valid UTF-8 invariant
|
// Decode UTF-8, using the valid UTF-8 invariant
|
||||||
#[inline]
|
let x = match self.iter.next() {
|
||||||
fn decode_multibyte<'a>(x: u8, it: &mut slice::Items<'a, u8>) -> char {
|
None => return None,
|
||||||
// NOTE: Performance is very sensitive to the exact formulation here
|
Some(&next_byte) if next_byte < 128 => return Some(next_byte as char),
|
||||||
// Decode from a byte combination out of: [[[x y] z] w]
|
Some(&next_byte) => next_byte,
|
||||||
let init = utf8_first_byte!(x, 2);
|
};
|
||||||
let y = unwrap_or_0(it.next());
|
|
||||||
let mut ch = utf8_acc_cont_byte!(init, y);
|
// Multibyte case follows
|
||||||
if x >= 0xE0 {
|
// Decode from a byte combination out of: [[[x y] z] w]
|
||||||
/* [[x y z] w] case
|
// NOTE: Performance is sensitive to the exact formulation here
|
||||||
* 5th bit in 0xE0 .. 0xEF is always clear, so `init` is still valid */
|
let init = utf8_first_byte!(x, 2);
|
||||||
let z = unwrap_or_0(it.next());
|
let y = unwrap_or_0(self.iter.next());
|
||||||
let y_z = utf8_acc_cont_byte!((y & CONT_MASK) as u32, z);
|
let mut ch = utf8_acc_cont_byte!(init, y);
|
||||||
ch = init << 12 | y_z;
|
if x >= 0xE0 {
|
||||||
if x >= 0xF0 {
|
// [[x y z] w] case
|
||||||
/* [x y z w] case
|
// 5th bit in 0xE0 .. 0xEF is always clear, so `init` is still valid
|
||||||
* use only the lower 3 bits of `init` */
|
let z = unwrap_or_0(self.iter.next());
|
||||||
let w = unwrap_or_0(it.next());
|
let y_z = utf8_acc_cont_byte!((y & CONT_MASK) as u32, z);
|
||||||
ch = (init & 7) << 18 | utf8_acc_cont_byte!(y_z, w);
|
ch = init << 12 | y_z;
|
||||||
}
|
if x >= 0xF0 {
|
||||||
}
|
// [x y z w] case
|
||||||
unsafe {
|
// use only the lower 3 bits of `init`
|
||||||
mem::transmute(ch)
|
let w = unwrap_or_0(self.iter.next());
|
||||||
|
ch = (init & 7) << 18 | utf8_acc_cont_byte!(y_z, w);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
match self.iter.next() {
|
// str invariant says `ch` is a valid Unicode Scalar Value
|
||||||
None => None,
|
unsafe {
|
||||||
Some(&next_byte) => {
|
Some(mem::transmute(ch))
|
||||||
if next_byte < 128 {
|
|
||||||
Some(next_byte as char)
|
|
||||||
} else {
|
|
||||||
Some(decode_multibyte(next_byte, &mut self.iter))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -180,38 +175,32 @@ impl<'a> Iterator<char> for Chars<'a> {
|
|||||||
impl<'a> DoubleEndedIterator<char> for Chars<'a> {
|
impl<'a> DoubleEndedIterator<char> for Chars<'a> {
|
||||||
#[inline]
|
#[inline]
|
||||||
fn next_back(&mut self) -> Option<char> {
|
fn next_back(&mut self) -> Option<char> {
|
||||||
#[inline]
|
let w = match self.iter.next_back() {
|
||||||
fn decode_multibyte_back<'a>(w: u8, it: &mut slice::Items<'a, u8>) -> char {
|
None => return None,
|
||||||
// Decode from a byte combination out of: [x [y [z w]]]
|
Some(&back_byte) if back_byte < 128 => return Some(back_byte as char),
|
||||||
let mut ch;
|
Some(&back_byte) => back_byte,
|
||||||
let z = unwrap_or_0(it.next_back());
|
};
|
||||||
ch = utf8_first_byte!(z, 2);
|
|
||||||
if utf8_is_cont_byte!(z) {
|
|
||||||
let y = unwrap_or_0(it.next_back());
|
|
||||||
ch = utf8_first_byte!(y, 3);
|
|
||||||
if utf8_is_cont_byte!(y) {
|
|
||||||
let x = unwrap_or_0(it.next_back());
|
|
||||||
ch = utf8_first_byte!(x, 4);
|
|
||||||
ch = utf8_acc_cont_byte!(ch, y);
|
|
||||||
}
|
|
||||||
ch = utf8_acc_cont_byte!(ch, z);
|
|
||||||
}
|
|
||||||
ch = utf8_acc_cont_byte!(ch, w);
|
|
||||||
|
|
||||||
unsafe {
|
// Multibyte case follows
|
||||||
mem::transmute(ch)
|
// Decode from a byte combination out of: [x [y [z w]]]
|
||||||
|
let mut ch;
|
||||||
|
let z = unwrap_or_0(self.iter.next_back());
|
||||||
|
ch = utf8_first_byte!(z, 2);
|
||||||
|
if utf8_is_cont_byte!(z) {
|
||||||
|
let y = unwrap_or_0(self.iter.next_back());
|
||||||
|
ch = utf8_first_byte!(y, 3);
|
||||||
|
if utf8_is_cont_byte!(y) {
|
||||||
|
let x = unwrap_or_0(self.iter.next_back());
|
||||||
|
ch = utf8_first_byte!(x, 4);
|
||||||
|
ch = utf8_acc_cont_byte!(ch, y);
|
||||||
}
|
}
|
||||||
|
ch = utf8_acc_cont_byte!(ch, z);
|
||||||
}
|
}
|
||||||
|
ch = utf8_acc_cont_byte!(ch, w);
|
||||||
|
|
||||||
match self.iter.next_back() {
|
// str invariant says `ch` is a valid Unicode Scalar Value
|
||||||
None => None,
|
unsafe {
|
||||||
Some(&back_byte) => {
|
Some(mem::transmute(ch))
|
||||||
if back_byte < 128 {
|
|
||||||
Some(back_byte as char)
|
|
||||||
} else {
|
|
||||||
Some(decode_multibyte_back(back_byte, &mut self.iter))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user