interpret: better control over whether we read data with provenance, and implicit provenance stripping where possible

This commit is contained in:
Ralf Jung
2022-06-02 20:30:29 -04:00
parent 5e6bb83268
commit 47d11a8483
22 changed files with 502 additions and 411 deletions

View File

@@ -264,9 +264,18 @@ impl<Tag, Extra> Allocation<Tag, Extra> {
/// Byte accessors.
impl<Tag: Provenance, Extra> Allocation<Tag, Extra> {
/// The last argument controls whether we error out when there are uninitialized
/// or pointer bytes. You should never call this, call `get_bytes` or
/// `get_bytes_with_uninit_and_ptr` instead,
/// This is the entirely abstraction-violating way to just grab the raw bytes without
/// caring about relocations. It just deduplicates some code between `read_scalar`
/// and `get_bytes_internal`.
fn get_bytes_even_more_internal(&self, range: AllocRange) -> &[u8] {
&self.bytes[range.start.bytes_usize()..range.end().bytes_usize()]
}
/// The last argument controls whether we error out when there are uninitialized or pointer
/// bytes. However, we *always* error when there are relocations overlapping the edges of the
/// range.
///
/// You should never call this, call `get_bytes` or `get_bytes_with_uninit_and_ptr` instead,
///
/// This function also guarantees that the resulting pointer will remain stable
/// even when new allocations are pushed to the `HashMap`. `mem_copy_repeatedly` relies
@@ -287,7 +296,7 @@ impl<Tag: Provenance, Extra> Allocation<Tag, Extra> {
self.check_relocation_edges(cx, range)?;
}
Ok(&self.bytes[range.start.bytes_usize()..range.end().bytes_usize()])
Ok(self.get_bytes_even_more_internal(range))
}
/// Checks that these bytes are initialized and not pointer bytes, and then return them
@@ -373,6 +382,9 @@ impl<Tag: Provenance, Extra> Allocation<Tag, Extra> {
/// Reads a *non-ZST* scalar.
///
/// If `read_provenance` is `true`, this will also read provenance; otherwise (if the machine
/// supports that) provenance is entirely ignored.
///
/// ZSTs can't be read because in order to obtain a `Pointer`, we need to check
/// for ZSTness anyway due to integer pointers being valid for ZSTs.
///
@@ -382,35 +394,47 @@ impl<Tag: Provenance, Extra> Allocation<Tag, Extra> {
&self,
cx: &impl HasDataLayout,
range: AllocRange,
read_provenance: bool,
) -> AllocResult<ScalarMaybeUninit<Tag>> {
// `get_bytes_with_uninit_and_ptr` tests relocation edges.
// We deliberately error when loading data that partially has provenance, or partially
// initialized data (that's the check below), into a scalar. The LLVM semantics of this are
// unclear so we are conservative. See <https://github.com/rust-lang/rust/issues/69488> for
// further discussion.
let bytes = self.get_bytes_with_uninit_and_ptr(cx, range)?;
// Uninit check happens *after* we established that the alignment is correct.
// We must not return `Ok()` for unaligned pointers!
if read_provenance {
assert_eq!(range.size, cx.data_layout().pointer_size);
}
// First and foremost, if anything is uninit, bail.
if self.is_init(range).is_err() {
// This inflates uninitialized bytes to the entire scalar, even if only a few
// bytes are uninitialized.
return Ok(ScalarMaybeUninit::Uninit);
}
// Now we do the actual reading.
let bits = read_target_uint(cx.data_layout().endian, bytes).unwrap();
// See if we got a pointer.
if range.size != cx.data_layout().pointer_size {
// Not a pointer.
// *Now*, we better make sure that the inside is free of relocations too.
self.check_relocations(cx, range)?;
} else {
// Maybe a pointer.
if let Some(&prov) = self.relocations.get(&range.start) {
let ptr = Pointer::new(prov, Size::from_bytes(bits));
return Ok(ScalarMaybeUninit::from_pointer(ptr, cx));
}
// If we are doing a pointer read, and there is a relocation exactly where we
// are reading, then we can put data and relocation back together and return that.
if read_provenance && let Some(&prov) = self.relocations.get(&range.start) {
// We already checked init and relocations, so we can use this function.
let bytes = self.get_bytes_even_more_internal(range);
let bits = read_target_uint(cx.data_layout().endian, bytes).unwrap();
let ptr = Pointer::new(prov, Size::from_bytes(bits));
return Ok(ScalarMaybeUninit::from_pointer(ptr, cx));
}
// We don't. Just return the bits.
// If we are *not* reading a pointer, and we can just ignore relocations,
// then do exactly that.
if !read_provenance && Tag::OFFSET_IS_ADDR {
// We just strip provenance.
let bytes = self.get_bytes_even_more_internal(range);
let bits = read_target_uint(cx.data_layout().endian, bytes).unwrap();
return Ok(ScalarMaybeUninit::Scalar(Scalar::from_uint(bits, range.size)));
}
// It's complicated. Better make sure there is no provenance anywhere.
// FIXME: If !OFFSET_IS_ADDR, this is the best we can do. But if OFFSET_IS_ADDR, then
// `read_pointer` is true and we ideally would distinguish the following two cases:
// - The entire `range` is covered by 2 relocations for the same provenance.
// Then we should return a pointer with that provenance.
// - The range has inhomogeneous provenance. Then we should return just the
// underlying bits.
let bytes = self.get_bytes(cx, range)?;
let bits = read_target_uint(cx.data_layout().endian, bytes).unwrap();
Ok(ScalarMaybeUninit::Scalar(Scalar::from_uint(bits, range.size)))
}
@@ -513,8 +537,9 @@ impl<Tag: Copy, Extra> Allocation<Tag, Extra> {
let start = range.start;
let end = range.end();
// We need to handle clearing the relocations from parts of a pointer. See
// <https://github.com/rust-lang/rust/issues/87184> for details.
// We need to handle clearing the relocations from parts of a pointer.
// FIXME: Miri should preserve partial relocations; see
// https://github.com/rust-lang/miri/issues/2181.
if first < start {
if Tag::ERR_ON_PARTIAL_PTR_OVERWRITE {
return Err(AllocError::PartialPointerOverwrite(first));

View File

@@ -428,7 +428,7 @@ pub enum UnsupportedOpInfo {
/// Encountered a pointer where we needed raw bytes.
ReadPointerAsBytes,
/// Overwriting parts of a pointer; the resulting state cannot be represented in our
/// `Allocation` data structure.
/// `Allocation` data structure. See <https://github.com/rust-lang/miri/issues/2181>.
PartialPointerOverwrite(Pointer<AllocId>),
//
// The variants below are only reachable from CTFE/const prop, miri will never emit them.