rt: Inline a bunch of stack switching code
This commit is contained in:
@@ -13,53 +13,6 @@
|
|||||||
#include "globals.h"
|
#include "globals.h"
|
||||||
#include "rust_upcall.h"
|
#include "rust_upcall.h"
|
||||||
|
|
||||||
// The amount of extra space at the end of each stack segment, available
|
|
||||||
// to the rt, compiler and dynamic linker for running small functions
|
|
||||||
// FIXME: We want this to be 128 but need to slim the red zone calls down
|
|
||||||
#define RZ_LINUX_32 (1024*2)
|
|
||||||
#define RZ_LINUX_64 (1024*2)
|
|
||||||
#define RZ_MAC_32 (1024*20)
|
|
||||||
#define RZ_MAC_64 (1024*20)
|
|
||||||
#define RZ_WIN_32 (1024*20)
|
|
||||||
#define RZ_BSD_32 (1024*20)
|
|
||||||
#define RZ_BSD_64 (1024*20)
|
|
||||||
|
|
||||||
#ifdef __linux__
|
|
||||||
#ifdef __i386__
|
|
||||||
#define RED_ZONE_SIZE RZ_LINUX_32
|
|
||||||
#endif
|
|
||||||
#ifdef __x86_64__
|
|
||||||
#define RED_ZONE_SIZE RZ_LINUX_64
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
#ifdef __APPLE__
|
|
||||||
#ifdef __i386__
|
|
||||||
#define RED_ZONE_SIZE RZ_MAC_32
|
|
||||||
#endif
|
|
||||||
#ifdef __x86_64__
|
|
||||||
#define RED_ZONE_SIZE RZ_MAC_64
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
#ifdef __WIN32__
|
|
||||||
#ifdef __i386__
|
|
||||||
#define RED_ZONE_SIZE RZ_WIN_32
|
|
||||||
#endif
|
|
||||||
#ifdef __x86_64__
|
|
||||||
#define RED_ZONE_SIZE RZ_WIN_64
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
#ifdef __FreeBSD__
|
|
||||||
#ifdef __i386__
|
|
||||||
#define RED_ZONE_SIZE RZ_BSD_32
|
|
||||||
#endif
|
|
||||||
#ifdef __x86_64__
|
|
||||||
#define RED_ZONE_SIZE RZ_BSD_64
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
extern "C" CDECL void
|
|
||||||
record_sp(void *limit);
|
|
||||||
|
|
||||||
// Tasks
|
// Tasks
|
||||||
rust_task::rust_task(rust_task_thread *thread, rust_task_state state,
|
rust_task::rust_task(rust_task_thread *thread, rust_task_state state,
|
||||||
rust_task *spawner, const char *name,
|
rust_task *spawner, const char *name,
|
||||||
@@ -494,14 +447,6 @@ rust_task::get_next_stack_size(size_t min, size_t current, size_t requested) {
|
|||||||
return sz;
|
return sz;
|
||||||
}
|
}
|
||||||
|
|
||||||
// The amount of stack in a segment available to Rust code
|
|
||||||
static size_t
|
|
||||||
user_stack_size(stk_seg *stk) {
|
|
||||||
return (size_t)(stk->end
|
|
||||||
- (uintptr_t)&stk->data[0]
|
|
||||||
- RED_ZONE_SIZE);
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
void
|
||||||
rust_task::free_stack(stk_seg *stk) {
|
rust_task::free_stack(stk_seg *stk) {
|
||||||
LOGPTR(thread, "freeing stk segment", (uintptr_t)stk);
|
LOGPTR(thread, "freeing stk segment", (uintptr_t)stk);
|
||||||
@@ -509,37 +454,11 @@ rust_task::free_stack(stk_seg *stk) {
|
|||||||
destroy_stack(&local_region, stk);
|
destroy_stack(&local_region, stk);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct new_stack_args {
|
|
||||||
rust_task *task;
|
|
||||||
size_t requested_sz;
|
|
||||||
};
|
|
||||||
|
|
||||||
void
|
void
|
||||||
new_stack_slow(new_stack_args *args) {
|
new_stack_slow(new_stack_args *args) {
|
||||||
args->task->new_stack(args->requested_sz);
|
args->task->new_stack(args->requested_sz);
|
||||||
}
|
}
|
||||||
|
|
||||||
// NB: This runs on the Rust stack
|
|
||||||
// This is the new stack fast path, in which we
|
|
||||||
// reuse the next cached stack segment
|
|
||||||
void
|
|
||||||
rust_task::new_stack_fast(size_t requested_sz) {
|
|
||||||
// The minimum stack size, in bytes, of a Rust stack, excluding red zone
|
|
||||||
size_t min_sz = thread->min_stack_size;
|
|
||||||
|
|
||||||
// Try to reuse an existing stack segment
|
|
||||||
if (stk != NULL && stk->next != NULL) {
|
|
||||||
size_t next_sz = user_stack_size(stk->next);
|
|
||||||
if (min_sz <= next_sz && requested_sz <= next_sz) {
|
|
||||||
stk = stk->next;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
new_stack_args args = {this, requested_sz};
|
|
||||||
call_on_c_stack(&args, (void*)new_stack_slow);
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
void
|
||||||
rust_task::new_stack(size_t requested_sz) {
|
rust_task::new_stack(size_t requested_sz) {
|
||||||
LOG(this, mem, "creating new stack for task %" PRIxPTR, this);
|
LOG(this, mem, "creating new stack for task %" PRIxPTR, this);
|
||||||
@@ -596,63 +515,6 @@ rust_task::new_stack(size_t requested_sz) {
|
|||||||
total_stack_sz += user_stack_size(new_stk);
|
total_stack_sz += user_stack_size(new_stk);
|
||||||
}
|
}
|
||||||
|
|
||||||
void *
|
|
||||||
rust_task::next_stack(size_t stk_sz, void *args_addr, size_t args_sz) {
|
|
||||||
stk_seg *maybe_next_stack = NULL;
|
|
||||||
if (stk != NULL) {
|
|
||||||
maybe_next_stack = stk->prev;
|
|
||||||
}
|
|
||||||
|
|
||||||
new_stack_fast(stk_sz + args_sz);
|
|
||||||
A(thread, stk->end - (uintptr_t)stk->data >= stk_sz + args_sz,
|
|
||||||
"Did not receive enough stack");
|
|
||||||
uint8_t *new_sp = (uint8_t*)stk->end;
|
|
||||||
// Push the function arguments to the new stack
|
|
||||||
new_sp = align_down(new_sp - args_sz);
|
|
||||||
|
|
||||||
// When reusing a stack segment we need to tell valgrind that this area of
|
|
||||||
// memory is accessible before writing to it, because the act of popping
|
|
||||||
// the stack previously made all of the stack inaccessible.
|
|
||||||
if (maybe_next_stack == stk) {
|
|
||||||
// I don't know exactly where the region ends that valgrind needs us
|
|
||||||
// to mark accessible. On x86_64 these extra bytes aren't needed, but
|
|
||||||
// on i386 we get errors without.
|
|
||||||
int fudge_bytes = 16;
|
|
||||||
reuse_valgrind_stack(stk, new_sp - fudge_bytes);
|
|
||||||
}
|
|
||||||
|
|
||||||
memcpy(new_sp, args_addr, args_sz);
|
|
||||||
record_stack_limit();
|
|
||||||
return new_sp;
|
|
||||||
}
|
|
||||||
|
|
||||||
// NB: This runs on the Rust stack
|
|
||||||
void
|
|
||||||
rust_task::prev_stack() {
|
|
||||||
// We're not going to actually delete anything now because that would
|
|
||||||
// require switching to the C stack and be costly. Instead we'll just move
|
|
||||||
// up the link list and clean up later, either in new_stack or after our
|
|
||||||
// turn ends on the scheduler.
|
|
||||||
stk = stk->prev;
|
|
||||||
record_stack_limit();
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
|
||||||
rust_task::record_stack_limit() {
|
|
||||||
I(thread, stk);
|
|
||||||
// The function prolog compares the amount of stack needed to the end of
|
|
||||||
// the stack. As an optimization, when the frame size is less than 256
|
|
||||||
// bytes, it will simply compare %esp to to the stack limit instead of
|
|
||||||
// subtracting the frame size. As a result we need our stack limit to
|
|
||||||
// account for those 256 bytes.
|
|
||||||
const unsigned LIMIT_OFFSET = 256;
|
|
||||||
A(thread,
|
|
||||||
(uintptr_t)stk->end - RED_ZONE_SIZE
|
|
||||||
- (uintptr_t)stk->data >= LIMIT_OFFSET,
|
|
||||||
"Stack size must be greater than LIMIT_OFFSET");
|
|
||||||
record_sp(stk->data + LIMIT_OFFSET + RED_ZONE_SIZE);
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
void
|
||||||
rust_task::cleanup_after_turn() {
|
rust_task::cleanup_after_turn() {
|
||||||
// Delete any spare stack segments that were left
|
// Delete any spare stack segments that were left
|
||||||
|
|||||||
@@ -17,6 +17,50 @@
|
|||||||
#include "rust_stack.h"
|
#include "rust_stack.h"
|
||||||
#include "rust_port_selector.h"
|
#include "rust_port_selector.h"
|
||||||
|
|
||||||
|
// The amount of extra space at the end of each stack segment, available
|
||||||
|
// to the rt, compiler and dynamic linker for running small functions
|
||||||
|
// FIXME: We want this to be 128 but need to slim the red zone calls down
|
||||||
|
#define RZ_LINUX_32 (1024*2)
|
||||||
|
#define RZ_LINUX_64 (1024*2)
|
||||||
|
#define RZ_MAC_32 (1024*20)
|
||||||
|
#define RZ_MAC_64 (1024*20)
|
||||||
|
#define RZ_WIN_32 (1024*20)
|
||||||
|
#define RZ_BSD_32 (1024*20)
|
||||||
|
#define RZ_BSD_64 (1024*20)
|
||||||
|
|
||||||
|
#ifdef __linux__
|
||||||
|
#ifdef __i386__
|
||||||
|
#define RED_ZONE_SIZE RZ_LINUX_32
|
||||||
|
#endif
|
||||||
|
#ifdef __x86_64__
|
||||||
|
#define RED_ZONE_SIZE RZ_LINUX_64
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
#ifdef __APPLE__
|
||||||
|
#ifdef __i386__
|
||||||
|
#define RED_ZONE_SIZE RZ_MAC_32
|
||||||
|
#endif
|
||||||
|
#ifdef __x86_64__
|
||||||
|
#define RED_ZONE_SIZE RZ_MAC_64
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
#ifdef __WIN32__
|
||||||
|
#ifdef __i386__
|
||||||
|
#define RED_ZONE_SIZE RZ_WIN_32
|
||||||
|
#endif
|
||||||
|
#ifdef __x86_64__
|
||||||
|
#define RED_ZONE_SIZE RZ_WIN_64
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
#ifdef __FreeBSD__
|
||||||
|
#ifdef __i386__
|
||||||
|
#define RED_ZONE_SIZE RZ_BSD_32
|
||||||
|
#endif
|
||||||
|
#ifdef __x86_64__
|
||||||
|
#define RED_ZONE_SIZE RZ_BSD_64
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
struct rust_box;
|
struct rust_box;
|
||||||
|
|
||||||
struct frame_glue_fns {
|
struct frame_glue_fns {
|
||||||
@@ -303,6 +347,105 @@ rust_task::return_c_stack() {
|
|||||||
next_c_sp = 0;
|
next_c_sp = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// NB: This runs on the Rust stack
|
||||||
|
inline void *
|
||||||
|
rust_task::next_stack(size_t stk_sz, void *args_addr, size_t args_sz) {
|
||||||
|
stk_seg *maybe_next_stack = NULL;
|
||||||
|
if (stk != NULL) {
|
||||||
|
maybe_next_stack = stk->prev;
|
||||||
|
}
|
||||||
|
|
||||||
|
new_stack_fast(stk_sz + args_sz);
|
||||||
|
A(thread, stk->end - (uintptr_t)stk->data >= stk_sz + args_sz,
|
||||||
|
"Did not receive enough stack");
|
||||||
|
uint8_t *new_sp = (uint8_t*)stk->end;
|
||||||
|
// Push the function arguments to the new stack
|
||||||
|
new_sp = align_down(new_sp - args_sz);
|
||||||
|
|
||||||
|
// When reusing a stack segment we need to tell valgrind that this area of
|
||||||
|
// memory is accessible before writing to it, because the act of popping
|
||||||
|
// the stack previously made all of the stack inaccessible.
|
||||||
|
if (maybe_next_stack == stk) {
|
||||||
|
// I don't know exactly where the region ends that valgrind needs us
|
||||||
|
// to mark accessible. On x86_64 these extra bytes aren't needed, but
|
||||||
|
// on i386 we get errors without.
|
||||||
|
int fudge_bytes = 16;
|
||||||
|
reuse_valgrind_stack(stk, new_sp - fudge_bytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
memcpy(new_sp, args_addr, args_sz);
|
||||||
|
record_stack_limit();
|
||||||
|
return new_sp;
|
||||||
|
}
|
||||||
|
|
||||||
|
// The amount of stack in a segment available to Rust code
|
||||||
|
inline size_t
|
||||||
|
user_stack_size(stk_seg *stk) {
|
||||||
|
return (size_t)(stk->end
|
||||||
|
- (uintptr_t)&stk->data[0]
|
||||||
|
- RED_ZONE_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct new_stack_args {
|
||||||
|
rust_task *task;
|
||||||
|
size_t requested_sz;
|
||||||
|
};
|
||||||
|
|
||||||
|
void
|
||||||
|
new_stack_slow(new_stack_args *args);
|
||||||
|
|
||||||
|
// NB: This runs on the Rust stack
|
||||||
|
// This is the new stack fast path, in which we
|
||||||
|
// reuse the next cached stack segment
|
||||||
|
inline void
|
||||||
|
rust_task::new_stack_fast(size_t requested_sz) {
|
||||||
|
// The minimum stack size, in bytes, of a Rust stack, excluding red zone
|
||||||
|
size_t min_sz = thread->min_stack_size;
|
||||||
|
|
||||||
|
// Try to reuse an existing stack segment
|
||||||
|
if (stk != NULL && stk->next != NULL) {
|
||||||
|
size_t next_sz = user_stack_size(stk->next);
|
||||||
|
if (min_sz <= next_sz && requested_sz <= next_sz) {
|
||||||
|
stk = stk->next;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
new_stack_args args = {this, requested_sz};
|
||||||
|
call_on_c_stack(&args, (void*)new_stack_slow);
|
||||||
|
}
|
||||||
|
|
||||||
|
// NB: This runs on the Rust stack
|
||||||
|
inline void
|
||||||
|
rust_task::prev_stack() {
|
||||||
|
// We're not going to actually delete anything now because that would
|
||||||
|
// require switching to the C stack and be costly. Instead we'll just move
|
||||||
|
// up the link list and clean up later, either in new_stack or after our
|
||||||
|
// turn ends on the scheduler.
|
||||||
|
stk = stk->prev;
|
||||||
|
record_stack_limit();
|
||||||
|
}
|
||||||
|
|
||||||
|
extern "C" CDECL void
|
||||||
|
record_sp(void *limit);
|
||||||
|
|
||||||
|
inline void
|
||||||
|
rust_task::record_stack_limit() {
|
||||||
|
I(thread, stk);
|
||||||
|
// The function prolog compares the amount of stack needed to the end of
|
||||||
|
// the stack. As an optimization, when the frame size is less than 256
|
||||||
|
// bytes, it will simply compare %esp to to the stack limit instead of
|
||||||
|
// subtracting the frame size. As a result we need our stack limit to
|
||||||
|
// account for those 256 bytes.
|
||||||
|
const unsigned LIMIT_OFFSET = 256;
|
||||||
|
A(thread,
|
||||||
|
(uintptr_t)stk->end - RED_ZONE_SIZE
|
||||||
|
- (uintptr_t)stk->data >= LIMIT_OFFSET,
|
||||||
|
"Stack size must be greater than LIMIT_OFFSET");
|
||||||
|
record_sp(stk->data + LIMIT_OFFSET + RED_ZONE_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// Local Variables:
|
// Local Variables:
|
||||||
// mode: C++
|
// mode: C++
|
||||||
|
|||||||
Reference in New Issue
Block a user