Auto merge of #27233 - tbu-:pr_wtf8, r=alexcrichton
This commit is contained in:
@@ -84,10 +84,18 @@ pub fn from_u32(i: u32) -> Option<char> {
|
|||||||
if (i > MAX as u32) || (i >= 0xD800 && i <= 0xDFFF) {
|
if (i > MAX as u32) || (i >= 0xD800 && i <= 0xDFFF) {
|
||||||
None
|
None
|
||||||
} else {
|
} else {
|
||||||
Some(unsafe { transmute(i) })
|
Some(unsafe { from_u32_unchecked(i) })
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Converts a `u32` to an `char`, not checking whether it is a valid unicode
|
||||||
|
/// codepoint.
|
||||||
|
#[inline]
|
||||||
|
#[unstable(feature = "char_from_unchecked", reason = "recently added API")]
|
||||||
|
pub unsafe fn from_u32_unchecked(i: u32) -> char {
|
||||||
|
transmute(i)
|
||||||
|
}
|
||||||
|
|
||||||
/// Converts a number to the character representing it.
|
/// Converts a number to the character representing it.
|
||||||
///
|
///
|
||||||
/// # Return value
|
/// # Return value
|
||||||
@@ -115,12 +123,11 @@ pub fn from_digit(num: u32, radix: u32) -> Option<char> {
|
|||||||
panic!("from_digit: radix is too high (maximum 36)");
|
panic!("from_digit: radix is too high (maximum 36)");
|
||||||
}
|
}
|
||||||
if num < radix {
|
if num < radix {
|
||||||
unsafe {
|
let num = num as u8;
|
||||||
if num < 10 {
|
if num < 10 {
|
||||||
Some(transmute('0' as u32 + num))
|
Some((b'0' + num) as char)
|
||||||
} else {
|
} else {
|
||||||
Some(transmute('a' as u32 + num - 10))
|
Some((b'a' + num - 10) as char)
|
||||||
}
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
None
|
None
|
||||||
@@ -318,16 +325,13 @@ impl Iterator for EscapeUnicode {
|
|||||||
Some('{')
|
Some('{')
|
||||||
}
|
}
|
||||||
EscapeUnicodeState::Value(offset) => {
|
EscapeUnicodeState::Value(offset) => {
|
||||||
let v = match ((self.c as i32) >> (offset * 4)) & 0xf {
|
let c = from_digit(((self.c as u32) >> (offset * 4)) & 0xf, 16).unwrap();
|
||||||
i @ 0 ... 9 => '0' as i32 + i,
|
|
||||||
i => 'a' as i32 + (i - 10)
|
|
||||||
};
|
|
||||||
if offset == 0 {
|
if offset == 0 {
|
||||||
self.state = EscapeUnicodeState::RightBrace;
|
self.state = EscapeUnicodeState::RightBrace;
|
||||||
} else {
|
} else {
|
||||||
self.state = EscapeUnicodeState::Value(offset - 1);
|
self.state = EscapeUnicodeState::Value(offset - 1);
|
||||||
}
|
}
|
||||||
Some(unsafe { transmute(v) })
|
Some(c)
|
||||||
}
|
}
|
||||||
EscapeUnicodeState::RightBrace => {
|
EscapeUnicodeState::RightBrace => {
|
||||||
self.state = EscapeUnicodeState::Done;
|
self.state = EscapeUnicodeState::Done;
|
||||||
|
|||||||
@@ -35,7 +35,7 @@ use core::iter::Iterator;
|
|||||||
use tables::{derived_property, property, general_category, conversions, charwidth};
|
use tables::{derived_property, property, general_category, conversions, charwidth};
|
||||||
|
|
||||||
// stable reexports
|
// stable reexports
|
||||||
pub use core::char::{MAX, from_u32, from_digit, EscapeUnicode, EscapeDefault};
|
pub use core::char::{MAX, from_u32, from_u32_unchecked, from_digit, EscapeUnicode, EscapeDefault};
|
||||||
|
|
||||||
// unstable reexports
|
// unstable reexports
|
||||||
#[allow(deprecated)]
|
#[allow(deprecated)]
|
||||||
|
|||||||
@@ -210,6 +210,7 @@
|
|||||||
#![feature(borrow_state)]
|
#![feature(borrow_state)]
|
||||||
#![feature(box_raw)]
|
#![feature(box_raw)]
|
||||||
#![feature(box_syntax)]
|
#![feature(box_syntax)]
|
||||||
|
#![feature(char_from_unchecked)]
|
||||||
#![feature(char_internals)]
|
#![feature(char_internals)]
|
||||||
#![feature(clone_from_slice)]
|
#![feature(clone_from_slice)]
|
||||||
#![feature(collections)]
|
#![feature(collections)]
|
||||||
|
|||||||
@@ -32,17 +32,18 @@ use core::str::next_code_point;
|
|||||||
|
|
||||||
use ascii::*;
|
use ascii::*;
|
||||||
use borrow::Cow;
|
use borrow::Cow;
|
||||||
|
use char;
|
||||||
use cmp;
|
use cmp;
|
||||||
use fmt;
|
use fmt;
|
||||||
use hash::{Hash, Hasher};
|
use hash::{Hash, Hasher};
|
||||||
use iter::FromIterator;
|
use iter::FromIterator;
|
||||||
use mem;
|
use mem;
|
||||||
use ops;
|
use ops;
|
||||||
|
use rustc_unicode::str::{Utf16Item, utf16_items};
|
||||||
use slice;
|
use slice;
|
||||||
use str;
|
use str;
|
||||||
use string::String;
|
use string::String;
|
||||||
use sys_common::AsInner;
|
use sys_common::AsInner;
|
||||||
use rustc_unicode::str::{Utf16Item, utf16_items};
|
|
||||||
use vec::Vec;
|
use vec::Vec;
|
||||||
|
|
||||||
const UTF8_REPLACEMENT_CHARACTER: &'static [u8] = b"\xEF\xBF\xBD";
|
const UTF8_REPLACEMENT_CHARACTER: &'static [u8] = b"\xEF\xBF\xBD";
|
||||||
@@ -107,7 +108,7 @@ impl CodePoint {
|
|||||||
pub fn to_char(&self) -> Option<char> {
|
pub fn to_char(&self) -> Option<char> {
|
||||||
match self.value {
|
match self.value {
|
||||||
0xD800 ... 0xDFFF => None,
|
0xD800 ... 0xDFFF => None,
|
||||||
_ => Some(unsafe { mem::transmute(self.value) })
|
_ => Some(unsafe { char::from_u32_unchecked(self.value) })
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -213,18 +214,16 @@ impl Wtf8Buf {
|
|||||||
// Attempt to not use an intermediate buffer by just pushing bytes
|
// Attempt to not use an intermediate buffer by just pushing bytes
|
||||||
// directly onto this string.
|
// directly onto this string.
|
||||||
let slice = slice::from_raw_parts_mut(
|
let slice = slice::from_raw_parts_mut(
|
||||||
self.bytes.as_mut_ptr().offset(cur_len as isize),
|
self.bytes.as_mut_ptr().offset(cur_len as isize), 4
|
||||||
4
|
|
||||||
);
|
);
|
||||||
let used = encode_utf8_raw(code_point.value, mem::transmute(slice))
|
let used = encode_utf8_raw(code_point.value, slice).unwrap();
|
||||||
.unwrap_or(0);
|
|
||||||
self.bytes.set_len(cur_len + used);
|
self.bytes.set_len(cur_len + used);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn as_slice(&self) -> &Wtf8 {
|
pub fn as_slice(&self) -> &Wtf8 {
|
||||||
unsafe { mem::transmute(&*self.bytes) }
|
unsafe { Wtf8::from_bytes_unchecked(&self.bytes) }
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Reserves capacity for at least `additional` more bytes to be inserted
|
/// Reserves capacity for at least `additional` more bytes to be inserted
|
||||||
@@ -457,7 +456,16 @@ impl Wtf8 {
|
|||||||
/// Since WTF-8 is a superset of UTF-8, this always succeeds.
|
/// Since WTF-8 is a superset of UTF-8, this always succeeds.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn from_str(value: &str) -> &Wtf8 {
|
pub fn from_str(value: &str) -> &Wtf8 {
|
||||||
unsafe { mem::transmute(value.as_bytes()) }
|
unsafe { Wtf8::from_bytes_unchecked(value.as_bytes()) }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Creates a WTF-8 slice from a WTF-8 byte slice.
|
||||||
|
///
|
||||||
|
/// Since the byte slice is not checked for valid WTF-8, this functions is
|
||||||
|
/// marked unsafe.
|
||||||
|
#[inline]
|
||||||
|
unsafe fn from_bytes_unchecked(value: &[u8]) -> &Wtf8 {
|
||||||
|
mem::transmute(value)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the length, in WTF-8 bytes.
|
/// Returns the length, in WTF-8 bytes.
|
||||||
@@ -682,7 +690,7 @@ fn decode_surrogate(second_byte: u8, third_byte: u8) -> u16 {
|
|||||||
#[inline]
|
#[inline]
|
||||||
fn decode_surrogate_pair(lead: u16, trail: u16) -> char {
|
fn decode_surrogate_pair(lead: u16, trail: u16) -> char {
|
||||||
let code_point = 0x10000 + ((((lead - 0xD800) as u32) << 10) | (trail - 0xDC00) as u32);
|
let code_point = 0x10000 + ((((lead - 0xD800) as u32) << 10) | (trail - 0xDC00) as u32);
|
||||||
unsafe { mem::transmute(code_point) }
|
unsafe { char::from_u32_unchecked(code_point) }
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Copied from core::str::StrPrelude::is_char_boundary
|
/// Copied from core::str::StrPrelude::is_char_boundary
|
||||||
@@ -699,7 +707,7 @@ pub fn is_code_point_boundary(slice: &Wtf8, index: usize) -> bool {
|
|||||||
#[inline]
|
#[inline]
|
||||||
pub unsafe fn slice_unchecked(s: &Wtf8, begin: usize, end: usize) -> &Wtf8 {
|
pub unsafe fn slice_unchecked(s: &Wtf8, begin: usize, end: usize) -> &Wtf8 {
|
||||||
// memory layout of an &[u8] and &Wtf8 are the same
|
// memory layout of an &[u8] and &Wtf8 are the same
|
||||||
mem::transmute(slice::from_raw_parts(
|
Wtf8::from_bytes_unchecked(slice::from_raw_parts(
|
||||||
s.bytes.as_ptr().offset(begin as isize),
|
s.bytes.as_ptr().offset(begin as isize),
|
||||||
end - begin
|
end - begin
|
||||||
))
|
))
|
||||||
@@ -821,7 +829,6 @@ mod tests {
|
|||||||
use prelude::v1::*;
|
use prelude::v1::*;
|
||||||
use borrow::Cow;
|
use borrow::Cow;
|
||||||
use super::*;
|
use super::*;
|
||||||
use mem::transmute;
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn code_point_from_u32() {
|
fn code_point_from_u32() {
|
||||||
@@ -962,7 +969,7 @@ mod tests {
|
|||||||
string.push_wtf8(Wtf8::from_str(" 💩"));
|
string.push_wtf8(Wtf8::from_str(" 💩"));
|
||||||
assert_eq!(string.bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9");
|
assert_eq!(string.bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9");
|
||||||
|
|
||||||
fn w(value: &[u8]) -> &Wtf8 { unsafe { transmute(value) } }
|
fn w(v: &[u8]) -> &Wtf8 { unsafe { Wtf8::from_bytes_unchecked(v) } }
|
||||||
|
|
||||||
let mut string = Wtf8Buf::new();
|
let mut string = Wtf8Buf::new();
|
||||||
string.push_wtf8(w(b"\xED\xA0\xBD")); // lead
|
string.push_wtf8(w(b"\xED\xA0\xBD")); // lead
|
||||||
|
|||||||
Reference in New Issue
Block a user