Refactor low-level UTF-16 decoding.

* Rename `utf16_items` to `decode_utf16`. "Items" is meaningless.
* Move it to `rustc_unicode::char`, exposed in `std::char`.
* Generalize it to any `u16` iterable, not just `&[u16]`.
* Make it yield `Result` instead of a custom `Utf16Item` enum that was isomorphic to `Result`. This enable using the `FromIterator for Result` impl.
* Add a `REPLACEMENT_CHARACTER` constant.
* Document how `result.unwrap_or(REPLACEMENT_CHARACTER)` replaces `Utf16Item::to_char_lossy`.
This commit is contained in:
Simon Sapin
2015-08-13 18:39:46 +02:00
parent c408b78633
commit 6174b8d726
10 changed files with 164 additions and 61 deletions

View File

@@ -211,3 +211,12 @@ fn test_len_utf16() {
assert!('\u{a66e}'.len_utf16() == 1);
assert!('\u{1f4a9}'.len_utf16() == 2);
}
#[test]
fn test_decode_utf16() {
fn check(s: &[u16], expected: &[Result<char, u16>]) {
assert_eq!(::std::char::decode_utf16(s.iter().cloned()).collect::<Vec<_>>(), expected);
}
check(&[0xD800, 0x41, 0x42], &[Err(0xD800), Ok('A'), Ok('B')]);
check(&[0xD800, 0], &[Err(0xD800), Ok('\0')]);
}

View File

@@ -19,6 +19,7 @@
#![feature(float_from_str_radix)]
#![feature(flt2dec)]
#![feature(dec2flt)]
#![feature(decode_utf16)]
#![feature(fmt_radix)]
#![feature(iter_arith)]
#![feature(iter_arith)]