auto merge of #9108 : blake2-ppc/rust/hazards-on-overflow, r=alexcrichton

Fix uint overflow bugs in std::{at_vec, vec, str}

Closes #8742

Fix issue #8742, which summarized is: unsafe code in vec and str did assume
that a reservation for `X + Y` elements always succeeded, and didn't overflow.

Introduce the method `Vec::reserve_additional(n)` to make it easy to check for
overflow in `Vec::push` and `Vec::push_all`.

In std::str, simplify and remove a lot of the unsafe code and use `push_str`
instead. With improvements to `.push_str` and the new function
`vec::bytes::push_bytes`, it looks like this change has either no or positive
impact on performance.

I believe there are many places still where `v.reserve(A + B)` still can overflow.
This by itself is not an issue unless followed by (unsafe) code that steps aside
boundary checks.
This commit is contained in:
bors
2013-09-16 19:35:50 -07:00
5 changed files with 152 additions and 118 deletions

View File

@@ -230,13 +230,16 @@ pub mod raw {
// Implementation detail. Shouldn't be public
#[allow(missing_doc)]
pub fn reserve_raw(ty: *TyDesc, ptr: *mut *mut Box<Vec<()>>, n: uint) {
// check for `uint` overflow
unsafe {
let size_in_bytes = n * (*ty).size;
if size_in_bytes > (**ptr).data.alloc {
let total_size = size_in_bytes + sys::size_of::<Vec<()>>();
if n > (**ptr).data.alloc / (*ty).size {
let alloc = n * (*ty).size;
let total_size = alloc + sys::size_of::<Vec<()>>();
if alloc / (*ty).size != n || total_size < alloc {
fail!("vector size is too large: %u", n);
}
(*ptr) = local_realloc(*ptr as *(), total_size) as *mut Box<Vec<()>>;
(**ptr).data.alloc = size_in_bytes;
(**ptr).data.alloc = alloc;
}
}

View File

@@ -101,7 +101,17 @@ pub fn next_power_of_two(n: uint) -> uint {
let mut tmp: uint = n - 1u;
let mut shift: uint = 1u;
while shift <= halfbits { tmp |= tmp >> shift; shift <<= 1u; }
return tmp + 1u;
tmp + 1u
}
/// Returns the smallest power of 2 greater than or equal to `n`
#[inline]
pub fn next_power_of_two_opt(n: uint) -> Option<uint> {
let halfbits: uint = sys::size_of::<uint>() * 4u;
let mut tmp: uint = n - 1u;
let mut shift: uint = 1u;
while shift <= halfbits { tmp |= tmp >> shift; shift <<= 1u; }
tmp.checked_add(&1)
}
#[cfg(target_word_size = "32")]

View File

@@ -303,7 +303,7 @@ impl<T: Reader> ReaderUtil for T {
let start_len = buf.len();
let mut total_read = 0;
buf.reserve_at_least(start_len + len);
buf.reserve_additional(len);
vec::raw::set_len(buf, start_len + len);
do (|| {

View File

@@ -22,8 +22,7 @@ use char;
use char::Char;
use clone::{Clone, DeepClone};
use container::{Container, Mutable};
use num::Times;
use iter::{Iterator, FromIterator, Extendable};
use iter::{Iterator, FromIterator, Extendable, range};
use iter::{Filter, AdditiveIterator, Map};
use iter::{Invert, DoubleEndedIterator, ExactSize};
use libc;
@@ -33,7 +32,6 @@ use ptr;
use ptr::RawPtr;
use to_str::ToStr;
use uint;
use unstable::raw::{Repr, Slice};
use vec;
use vec::{OwnedVector, OwnedCopyableVector, ImmutableVector, MutableVector};
use default::Default;
@@ -185,23 +183,15 @@ impl<'self, S: Str> StrVector for &'self [S] {
fn concat(&self) -> ~str {
if self.is_empty() { return ~""; }
// `len` calculation may overflow but push_str but will check boundaries
let len = self.iter().map(|s| s.as_slice().len()).sum();
let mut s = with_capacity(len);
let mut result = with_capacity(len);
unsafe {
do s.as_mut_buf |buf, _| {
let mut buf = buf;
for ss in self.iter() {
do ss.as_slice().as_imm_buf |ssbuf, sslen| {
ptr::copy_memory(buf, ssbuf, sslen);
buf = buf.offset(sslen as int);
}
}
}
raw::set_len(&mut s, len);
for s in self.iter() {
result.push_str(s.as_slice())
}
s
result
}
/// Concatenate a vector of strings, placing a given separator between each.
@@ -212,34 +202,21 @@ impl<'self, S: Str> StrVector for &'self [S] {
if sep.is_empty() { return self.concat(); }
// this is wrong without the guarantee that `self` is non-empty
// `len` calculation may overflow but push_str but will check boundaries
let len = sep.len() * (self.len() - 1)
+ self.iter().map(|s| s.as_slice().len()).sum();
let mut s = ~"";
let mut result = with_capacity(len);
let mut first = true;
s.reserve(len);
unsafe {
do s.as_mut_buf |buf, _| {
do sep.as_imm_buf |sepbuf, seplen| {
let mut buf = buf;
for ss in self.iter() {
do ss.as_slice().as_imm_buf |ssbuf, sslen| {
if first {
first = false;
} else {
ptr::copy_memory(buf, sepbuf, seplen);
buf = buf.offset(seplen as int);
}
ptr::copy_memory(buf, ssbuf, sslen);
buf = buf.offset(sslen as int);
}
}
}
for s in self.iter() {
if first {
first = false;
} else {
result.push_str(sep);
}
raw::set_len(&mut s, len);
result.push_str(s.as_slice());
}
s
result
}
}
@@ -961,7 +938,6 @@ static TAG_CONT_U8: u8 = 128u8;
/// Unsafe operations
pub mod raw {
use option::Some;
use cast;
use libc;
use ptr;
@@ -1064,21 +1040,22 @@ pub mod raw {
}
}
/// Appends a byte to a string. (Not UTF-8 safe).
/// Appends a byte to a string.
/// The caller must preserve the valid UTF-8 property.
#[inline]
pub unsafe fn push_byte(s: &mut ~str, b: u8) {
let v: &mut ~[u8] = cast::transmute(s);
v.push(b);
as_owned_vec(s).push(b)
}
/// Appends a vector of bytes to a string. (Not UTF-8 safe).
unsafe fn push_bytes(s: &mut ~str, bytes: &[u8]) {
let new_len = s.len() + bytes.len();
s.reserve_at_least(new_len);
for byte in bytes.iter() { push_byte(&mut *s, *byte); }
/// Appends a vector of bytes to a string.
/// The caller must preserve the valid UTF-8 property.
#[inline]
pub unsafe fn push_bytes(s: &mut ~str, bytes: &[u8]) {
vec::bytes::push_bytes(as_owned_vec(s), bytes);
}
/// Removes the last byte from a string and returns it. (Not UTF-8 safe).
/// Removes the last byte from a string and returns it.
/// The caller must preserve the valid UTF-8 property.
pub unsafe fn pop_byte(s: &mut ~str) -> u8 {
let len = s.len();
assert!((len > 0u));
@@ -1087,7 +1064,8 @@ pub mod raw {
return b;
}
/// Removes the first byte from a string and returns it. (Not UTF-8 safe).
/// Removes the first byte from a string and returns it.
/// The caller must preserve the valid UTF-8 property.
pub unsafe fn shift_byte(s: &mut ~str) -> u8 {
let len = s.len();
assert!((len > 0u));
@@ -1096,6 +1074,13 @@ pub mod raw {
return b;
}
/// Access the str in its vector representation.
/// The caller must preserve the valid UTF-8 property when modifying.
#[inline]
pub unsafe fn as_owned_vec<'a>(s: &'a mut ~str) -> &'a mut ~[u8] {
cast::transmute(s)
}
/// Sets the length of a string
///
/// This will explicitly set the size of the string, without actually
@@ -1103,8 +1088,7 @@ pub mod raw {
/// the string is actually the specified size.
#[inline]
pub unsafe fn set_len(s: &mut ~str, new_len: uint) {
let v: &mut ~[u8] = cast::transmute(s);
vec::raw::set_len(v, new_len)
vec::raw::set_len(as_owned_vec(s), new_len)
}
/// Sets the length of a string
@@ -2061,22 +2045,11 @@ impl<'self> StrSlice<'self> for &'self str {
/// Given a string, make a new string with repeated copies of it.
fn repeat(&self, nn: uint) -> ~str {
do self.as_imm_buf |buf, len| {
let mut ret = with_capacity(nn * len);
unsafe {
do ret.as_mut_buf |rbuf, _len| {
let mut rbuf = rbuf;
do nn.times {
ptr::copy_memory(rbuf, buf, len);
rbuf = rbuf.offset(len as int);
}
}
raw::set_len(&mut ret, nn * len);
}
ret
let mut ret = with_capacity(nn * self.len());
for _ in range(0, nn) {
ret.push_str(*self);
}
ret
}
/// Retrieves the first character from a string slice and returns
@@ -2199,36 +2172,16 @@ impl OwnedStr for ~str {
/// Appends a string slice to the back of a string, without overallocating
#[inline]
fn push_str_no_overallocate(&mut self, rhs: &str) {
unsafe {
let llen = self.len();
let rlen = rhs.len();
self.reserve(llen + rlen);
do self.as_imm_buf |lbuf, _llen| {
do rhs.as_imm_buf |rbuf, _rlen| {
let dst = ptr::offset(lbuf, llen as int);
let dst = cast::transmute_mut_unsafe(dst);
ptr::copy_memory(dst, rbuf, rlen);
}
}
raw::set_len(self, llen + rlen);
}
let new_cap = self.len() + rhs.len();
self.reserve(new_cap);
self.push_str(rhs);
}
/// Appends a string slice to the back of a string
#[inline]
fn push_str(&mut self, rhs: &str) {
unsafe {
let llen = self.len();
let rlen = rhs.len();
self.reserve_at_least(llen + rlen);
do self.as_imm_buf |lbuf, _llen| {
do rhs.as_imm_buf |rbuf, _rlen| {
let dst = ptr::offset(lbuf, llen as int);
let dst = cast::transmute_mut_unsafe(dst);
ptr::copy_memory(dst, rbuf, rlen);
}
}
raw::set_len(self, llen + rlen);
raw::push_bytes(self, rhs.as_bytes());
}
}
@@ -2236,17 +2189,18 @@ impl OwnedStr for ~str {
#[inline]
fn push_char(&mut self, c: char) {
let cur_len = self.len();
self.reserve_at_least(cur_len + 4); // may use up to 4 bytes
// Attempt to not use an intermediate buffer by just pushing bytes
// directly onto this string.
// may use up to 4 bytes.
unsafe {
let v = self.repr();
let len = c.encode_utf8(cast::transmute(Slice {
data: ((&(*v).data) as *u8).offset(cur_len as int),
len: 4,
}));
raw::set_len(self, cur_len + len);
raw::as_owned_vec(self).reserve_additional(4);
// Attempt to not use an intermediate buffer by just pushing bytes
// directly onto this string.
let used = do self.as_mut_buf |buf, _| {
do vec::raw::mut_buf_as_slice(buf.offset(cur_len as int), 4) |slc| {
c.encode_utf8(slc)
}
};
raw::set_len(self, cur_len + used);
}
}
@@ -2306,8 +2260,7 @@ impl OwnedStr for ~str {
#[inline]
fn reserve(&mut self, n: uint) {
unsafe {
let v: &mut ~[u8] = cast::transmute(self);
(*v).reserve(n);
raw::as_owned_vec(self).reserve(n)
}
}
@@ -2329,7 +2282,7 @@ impl OwnedStr for ~str {
/// * n - The number of bytes to reserve space for
#[inline]
fn reserve_at_least(&mut self, n: uint) {
self.reserve(uint::next_power_of_two(n))
self.reserve(uint::next_power_of_two_opt(n).unwrap_or(n))
}
/// Returns the number of single-byte characters the string can hold without
@@ -2359,8 +2312,9 @@ impl OwnedStr for ~str {
#[inline]
fn as_mut_buf<T>(&mut self, f: &fn(*mut u8, uint) -> T) -> T {
let v: &mut ~[u8] = unsafe { cast::transmute(self) };
v.as_mut_buf(f)
unsafe {
raw::as_owned_vec(self).as_mut_buf(f)
}
}
}
@@ -3912,4 +3866,23 @@ mod bench {
with_capacity(100);
}
}
#[bench]
fn bench_push_str(bh: &mut BenchHarness) {
let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
do bh.iter {
let mut r = ~"";
r.push_str(s);
}
}
#[bench]
fn bench_connect(bh: &mut BenchHarness) {
let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
let sep = "";
let v = [s, s, s, s, s, s, s, s, s, s];
do bh.iter {
assert_eq!(v.connect(sep).len(), s.len() * 10 + sep.len() * 9);
}
}
}

View File

@@ -1245,6 +1245,7 @@ pub trait OwnedVector<T> {
fn reserve(&mut self, n: uint);
fn reserve_at_least(&mut self, n: uint);
fn reserve_additional(&mut self, n: uint);
fn capacity(&self) -> uint;
fn shrink_to_fit(&mut self);
@@ -1300,6 +1301,11 @@ impl<T> OwnedVector<T> for ~[T] {
* # Arguments
*
* * n - The number of elements to reserve space for
*
* # Failure
*
* This method always succeeds in reserving space for `n` elements, or it does
* not return.
*/
fn reserve(&mut self, n: uint) {
// Only make the (slow) call into the runtime if we have to
@@ -1340,7 +1346,26 @@ impl<T> OwnedVector<T> for ~[T] {
*/
#[inline]
fn reserve_at_least(&mut self, n: uint) {
self.reserve(uint::next_power_of_two(n));
self.reserve(uint::next_power_of_two_opt(n).unwrap_or(n));
}
/**
* Reserves capacity for at least `n` additional elements in the given vector.
*
* # Failure
*
* Fails if the new required capacity overflows uint.
*
* May also fail if `reserve` fails.
*/
#[inline]
fn reserve_additional(&mut self, n: uint) {
if self.capacity() - self.len() < n {
match self.len().checked_add(&n) {
None => fail!("vec::reserve_additional: `uint` overflow"),
Some(new_cap) => self.reserve_at_least(new_cap)
}
}
}
/// Returns the number of elements the vector can hold without reallocating.
@@ -1376,8 +1401,7 @@ impl<T> OwnedVector<T> for ~[T] {
let repr: **Box<Vec<()>> = cast::transmute(&mut *self);
let fill = (**repr).data.fill;
if (**repr).data.alloc <= fill {
let new_len = self.len() + 1;
self.reserve_at_least(new_len);
self.reserve_additional(1);
}
push_fast(self, t);
@@ -1385,8 +1409,7 @@ impl<T> OwnedVector<T> for ~[T] {
let repr: **Vec<()> = cast::transmute(&mut *self);
let fill = (**repr).fill;
if (**repr).alloc <= fill {
let new_len = self.len() + 1;
self.reserve_at_least(new_len);
self.reserve_additional(1);
}
push_fast(self, t);
@@ -1432,7 +1455,7 @@ impl<T> OwnedVector<T> for ~[T] {
let self_len = self.len();
let rhs_len = rhs.len();
let new_len = self_len + rhs_len;
self.reserve_at_least(new_len);
self.reserve_additional(rhs.len());
unsafe { // Note: infallible.
let self_p = vec::raw::to_mut_ptr(*self);
let rhs_p = vec::raw::to_ptr(rhs);
@@ -2221,6 +2244,23 @@ pub mod bytes {
// Bound checks are done at vec::raw::copy_memory.
unsafe { vec::raw::copy_memory(dst, src, count) }
}
/**
* Allocate space in `dst` and append the data in `src`.
*/
#[inline]
pub fn push_bytes(dst: &mut ~[u8], src: &[u8]) {
let old_len = dst.len();
dst.reserve_additional(src.len());
unsafe {
do dst.as_mut_buf |p_dst, len_dst| {
do src.as_imm_buf |p_src, len_src| {
ptr::copy_memory(p_dst.offset(len_dst as int), p_src, len_src)
}
}
vec::raw::set_len(dst, old_len + src.len());
}
}
}
impl<A: Clone> Clone for ~[A] {
@@ -3619,6 +3659,14 @@ mod tests {
v.push(2);
}
#[test]
#[should_fail]
fn test_overflow_does_not_cause_segfault_managed() {
let mut v = ~[@1];
v.reserve(-1);
v.push(@2);
}
#[test]
fn test_mut_split() {
let mut values = [1u8,2,3,4,5];