wasmer_vm/probestack/
compiler_builtins.rs

1// Copyright 2017 The Rust Project Developers. See the COPYRIGHT
2// file at the top-level directory of this distribution and at
3// http://rust-lang.org/COPYRIGHT.
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
10
11//! This module defines the `__rust_probestack` intrinsic which is used in the
12//! implementation of "stack probes" on certain platforms.
13//!
14//! The purpose of a stack probe is to provide a static guarantee that if a
15//! thread has a guard page then a stack overflow is guaranteed to hit that
16//! guard page. If a function did not have a stack probe then there's a risk of
17//! having a stack frame *larger* than the guard page, so a function call could
18//! skip over the guard page entirely and then later hit maybe the heap or
19//! another thread, possibly leading to security vulnerabilities such as [The
20//! Stack Clash], for example.
21//!
22//! [The Stack Clash]: https://blog.qualys.com/securitylabs/2017/06/19/the-stack-clash
23//!
24//! The `__rust_probestack` is called in the prologue of functions whose stack
25//! size is larger than the guard page, for example larger than 4096 bytes on
26//! x86. This function is then responsible for "touching" all pages relevant to
27//! the stack to ensure that that if any of them are the guard page we'll hit
28//! them guaranteed.
29//!
30//! The precise ABI for how this function operates is defined by LLVM. There's
31//! no real documentation as to what this is, so you'd basically need to read
32//! the LLVM source code for reference. Often though the test cases can be
33//! illuminating as to the ABI that's generated, or just looking at the output
34//! of `llc`.
35//!
36//! Note that `#[naked]` is typically used here for the stack probe because the
37//! ABI corresponds to no actual ABI.
38//!
39//! Finally it's worth noting that at the time of this writing LLVM only has
40//! support for stack probes on x86 and x86_64. There's no support for stack
41//! probes on any other architecture like ARM or PowerPC64. LLVM I'm sure would
42//! be more than welcome to accept such a change!
43
44// Windows and Cygwin already has builtins to do this.
45#![cfg(not(any(windows, target_os = "cygwin")))]
46// We only define stack probing for these architectures today.
47#![cfg(any(target_arch = "x86_64", target_arch = "x86"))]
48
49// SAFETY: defined in this module.
50// FIXME(extern_custom): the ABI is not correct.
51unsafe extern "C" {
52    pub fn __rust_probestack();
53}
54
55// A wrapper for our implementation of __rust_probestack, which allows us to
56// keep the assembly inline while controlling all CFI directives in the assembly
57// emitted for the function.
58//
59// This is the ELF version.
60#[cfg(not(any(target_vendor = "apple", target_os = "uefi")))]
61macro_rules! define_rust_probestack {
62    ($body: expr) => {
63        concat!(
64            "
65            .pushsection .text.__rust_probestack
66            .globl __rust_probestack
67            .type  __rust_probestack, @function
68            .hidden __rust_probestack
69        __rust_probestack:
70            ",
71            $body,
72            "
73            .size __rust_probestack, . - __rust_probestack
74            .popsection
75            "
76        )
77    };
78}
79
80#[cfg(all(target_os = "uefi", target_arch = "x86_64"))]
81macro_rules! define_rust_probestack {
82    ($body: expr) => {
83        concat!(
84            "
85            .globl __rust_probestack
86        __rust_probestack:
87            ",
88            $body
89        )
90    };
91}
92
93// Same as above, but for Mach-O. Note that the triple underscore
94// is deliberate
95#[cfg(target_vendor = "apple")]
96macro_rules! define_rust_probestack {
97    ($body: expr) => {
98        concat!(
99            "
100            .globl ___rust_probestack
101        ___rust_probestack:
102            ",
103            $body
104        )
105    };
106}
107
108// In UEFI x86 arch, triple underscore is deliberate.
109#[cfg(all(target_os = "uefi", target_arch = "x86"))]
110macro_rules! define_rust_probestack {
111    ($body: expr) => {
112        concat!(
113            "
114            .globl ___rust_probestack
115        ___rust_probestack:
116            ",
117            $body
118        )
119    };
120}
121
122// Our goal here is to touch each page between %rsp+8 and %rsp+8-%rax,
123// ensuring that if any pages are unmapped we'll make a page fault.
124//
125// FIXME(abi_custom): This function is unsafe because it uses a custom ABI,
126// it does not actually match `extern "C"`.
127//
128// The ABI here is that the stack frame size is located in `%rax`. Upon
129// return we're not supposed to modify `%rsp` or `%rax`.
130//
131// Any changes to this function should be replicated to the SGX version below.
132#[cfg(all(
133    target_arch = "x86_64",
134    not(all(target_env = "sgx", target_vendor = "fortanix"))
135))]
136core::arch::global_asm!(
137    define_rust_probestack!(
138        "
139    .cfi_startproc
140    pushq  %rbp
141    .cfi_adjust_cfa_offset 8
142    .cfi_offset %rbp, -16
143    movq   %rsp, %rbp
144    .cfi_def_cfa_register %rbp
145
146    mov    %rax,%r11        // duplicate %rax as we're clobbering %r11
147
148    // Main loop, taken in one page increments. We're decrementing rsp by
149    // a page each time until there's less than a page remaining. We're
150    // guaranteed that this function isn't called unless there's more than a
151    // page needed.
152    //
153    // Note that we're also testing against `8(%rsp)` to account for the 8
154    // bytes pushed on the stack orginally with our return address. Using
155    // `8(%rsp)` simulates us testing the stack pointer in the caller's
156    // context.
157
158    // It's usually called when %rax >= 0x1000, but that's not always true.
159    // Dynamic stack allocation, which is needed to implement unsized
160    // rvalues, triggers stackprobe even if %rax < 0x1000.
161    // Thus we have to check %r11 first to avoid segfault.
162    cmp    $0x1000,%r11
163    jna    3f
1642:
165    sub    $0x1000,%rsp
166    test   %rsp,8(%rsp)
167    sub    $0x1000,%r11
168    cmp    $0x1000,%r11
169    ja     2b
170
1713:
172    // Finish up the last remaining stack space requested, getting the last
173    // bits out of r11
174    sub    %r11,%rsp
175    test   %rsp,8(%rsp)
176
177    // Restore the stack pointer to what it previously was when entering
178    // this function. The caller will readjust the stack pointer after we
179    // return.
180    add    %rax,%rsp
181
182    leave
183    .cfi_def_cfa_register %rsp
184    .cfi_adjust_cfa_offset -8
185    ret
186    .cfi_endproc
187    "
188    ),
189    options(att_syntax)
190);
191
192// This function is the same as above, except that some instructions are
193// [manually patched for LVI].
194//
195// [manually patched for LVI]: https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions
196#[cfg(all(
197    target_arch = "x86_64",
198    all(target_env = "sgx", target_vendor = "fortanix")
199))]
200core::arch::global_asm!(
201    define_rust_probestack!(
202        "
203    .cfi_startproc
204    pushq  %rbp
205    .cfi_adjust_cfa_offset 8
206    .cfi_offset %rbp, -16
207    movq   %rsp, %rbp
208    .cfi_def_cfa_register %rbp
209
210    mov    %rax,%r11        // duplicate %rax as we're clobbering %r11
211
212    // Main loop, taken in one page increments. We're decrementing rsp by
213    // a page each time until there's less than a page remaining. We're
214    // guaranteed that this function isn't called unless there's more than a
215    // page needed.
216    //
217    // Note that we're also testing against `8(%rsp)` to account for the 8
218    // bytes pushed on the stack orginally with our return address. Using
219    // `8(%rsp)` simulates us testing the stack pointer in the caller's
220    // context.
221
222    // It's usually called when %rax >= 0x1000, but that's not always true.
223    // Dynamic stack allocation, which is needed to implement unsized
224    // rvalues, triggers stackprobe even if %rax < 0x1000.
225    // Thus we have to check %r11 first to avoid segfault.
226    cmp    $0x1000,%r11
227    jna    3f
2282:
229    sub    $0x1000,%rsp
230    test   %rsp,8(%rsp)
231    sub    $0x1000,%r11
232    cmp    $0x1000,%r11
233    ja     2b
234
2353:
236    // Finish up the last remaining stack space requested, getting the last
237    // bits out of r11
238    sub    %r11,%rsp
239    test   %rsp,8(%rsp)
240
241    // Restore the stack pointer to what it previously was when entering
242    // this function. The caller will readjust the stack pointer after we
243    // return.
244    add    %rax,%rsp
245
246    leave
247    .cfi_def_cfa_register %rsp
248    .cfi_adjust_cfa_offset -8
249    pop %r11
250    lfence
251    jmp *%r11
252    .cfi_endproc
253    "
254    ),
255    options(att_syntax)
256);
257
258#[cfg(all(target_arch = "x86", not(target_os = "uefi")))]
259// This is the same as x86_64 above, only translated for 32-bit sizes. Note
260// that on Unix we're expected to restore everything as it was, this
261// function basically can't tamper with anything.
262//
263// FIXME(abi_custom): This function is unsafe because it uses a custom ABI,
264// it does not actually match `extern "C"`.
265//
266// The ABI here is the same as x86_64, except everything is 32-bits large.
267core::arch::global_asm!(
268    define_rust_probestack!(
269        "
270    .cfi_startproc
271    push   %ebp
272    .cfi_adjust_cfa_offset 4
273    .cfi_offset %ebp, -8
274    mov    %esp, %ebp
275    .cfi_def_cfa_register %ebp
276    push   %ecx
277    mov    %eax,%ecx
278
279    cmp    $0x1000,%ecx
280    jna    3f
2812:
282    sub    $0x1000,%esp
283    test   %esp,8(%esp)
284    sub    $0x1000,%ecx
285    cmp    $0x1000,%ecx
286    ja     2b
287
2883:
289    sub    %ecx,%esp
290    test   %esp,8(%esp)
291
292    add    %eax,%esp
293    pop    %ecx
294    leave
295    .cfi_def_cfa_register %esp
296    .cfi_adjust_cfa_offset -4
297    ret
298    .cfi_endproc
299    "
300    ),
301    options(att_syntax)
302);
303
304#[cfg(all(target_arch = "x86", target_os = "uefi"))]
305// UEFI target is windows like target. LLVM will do _chkstk things like windows.
306// probestack function will also do things like _chkstk in MSVC.
307// So we need to sub %ax %sp in probestack when arch is x86.
308//
309// FIXME(abi_custom): This function is unsafe because it uses a custom ABI,
310// it does not actually match `extern "C"`.
311//
312// REF: Rust commit(74e80468347)
313// rust\src\llvm-project\llvm\lib\Target\X86\X86FrameLowering.cpp: 805
314// Comments in LLVM:
315//   MSVC x32's _chkstk and cygwin/mingw's _alloca adjust %esp themselves.
316//   MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp
317//   themselves.
318core::arch::global_asm!(
319    define_rust_probestack!(
320        "
321    .cfi_startproc
322    push   %ebp
323    .cfi_adjust_cfa_offset 4
324    .cfi_offset %ebp, -8
325    mov    %esp, %ebp
326    .cfi_def_cfa_register %ebp
327    push   %ecx
328    push   %edx
329    mov    %eax,%ecx
330
331    cmp    $0x1000,%ecx
332    jna    3f
3332:
334    sub    $0x1000,%esp
335    test   %esp,8(%esp)
336    sub    $0x1000,%ecx
337    cmp    $0x1000,%ecx
338    ja     2b
339
3403:
341    sub    %ecx,%esp
342    test   %esp,8(%esp)
343    mov    4(%ebp),%edx
344    mov    %edx, 12(%esp)
345    add    %eax,%esp
346    pop    %edx
347    pop    %ecx
348    leave
349
350    sub   %eax, %esp
351    .cfi_def_cfa_register %esp
352    .cfi_adjust_cfa_offset -4
353    ret
354    .cfi_endproc
355    "
356    ),
357    options(att_syntax)
358);