dynasmrt/
cache_control.rs

1//! This module contains several utility functions to manage the state of the caches
2//! of the executing processor. On von Neumann architectures (like x86/AMD64), these are no-ops,
3//! as these processors ensure synchronization of the instruction and data caches internally.
4//! On modified Harvard architectures like ARMv8, these functions are needed to ensure that
5//! the data cache and instruction cache stay synchronized.
6
7/// This function should be called before any jit-compiled code is executed, on the thread that will
8/// execute this code.
9#[inline(always)]
10pub fn prepare_for_execution(slice: &[u8]) {
11    #![allow(unused_variables)]
12    #[cfg(target_arch="aarch64")]
13    {
14        aarch64::prepare_for_execution()
15    }
16    #[cfg(any(target_arch="riscv64", target_arch="riscv32"))]
17    {
18        riscv::enforce_ordering_dcache_icache(slice, true);
19    }
20}
21
22/// This function should be called after modification of any data that could've been loaded into the
23/// instruction cache previously. It will ensure that these modifications will be propagated into
24/// the instruction caches
25#[inline(always)]
26#[allow(unused_variables)]
27pub fn synchronize_icache(slice: &[u8]) {
28    #[cfg(all(target_arch="aarch64", not(target_os="macos")))]
29    {
30        aarch64::synchronize_icache(slice);
31    }
32    #[cfg(all(target_arch="aarch64", target_os="macos"))]
33    {
34        extern "C" {
35            pub fn sys_icache_invalidate(
36                start: *const std::ffi::c_void,
37                size: usize,
38            );
39        }
40        unsafe {
41            sys_icache_invalidate(slice.as_ptr() as *const std::ffi::c_void, slice.len());
42        }
43    }
44}
45
46#[cfg(target_arch="aarch64")]
47mod aarch64 {
48    use std::arch::asm;
49
50    /// return the cache line sizes as reported by the processor as a tuple of (dcache, icache)
51    fn get_cacheline_sizes() -> (usize, usize) {
52        let ctr_el0: usize;
53
54        // safety: we're just reading a system register (ctr_cl0) that can always be read
55        unsafe {
56            asm!(
57                "mrs {outreg}, ctr_el0",
58                outreg = lateout(reg) ctr_el0,
59                options(nomem, nostack, preserves_flags)
60            );
61        }
62
63        (
64            4 << ((ctr_el0 >> 16) & 0xF),
65            4 << (ctr_el0 & 0xF)
66        )
67    }
68
69    /// waits for any previous cache operations to complete. According to the Aarch64 manuals
70    /// `dsb ish` has bonus functionality where it will also wait for any previous cache maintenance
71    /// operations to complete before allowing execution to continue.
72    #[inline(always)]
73    fn wait_for_cache_ops_complete() {
74        // safety: this is purely a memory barrier.
75        unsafe {
76            asm!(
77                "dsb ish",
78                options(nostack, preserves_flags)
79            );
80        }
81    }
82
83    /// inform the processor that the dache line containing `addr` should be synchronized back to
84    /// the unified memory layer
85    #[inline(always)]
86    fn flush_dcache_line(addr: usize) {
87        // safety: flushing caches is always safe
88        unsafe {
89            asm!(
90                "dc cvau, {address}",
91                address = in(reg)addr,
92                options(nostack, preserves_flags)
93            );
94        }
95    }
96
97    /// inform the processor that icache line containing `addr` is invalid, and that it should be
98    /// re-fetched from unified memory
99    #[inline(always)]
100    fn invalidate_icache_line(addr: usize) {
101        // safety: invalidating caches is always safe
102        unsafe {
103            asm!(
104                "ic ivau, {address}",
105                address = in(reg)addr,
106                options(nostack, preserves_flags)
107            );
108        }
109    }
110
111    /// inform the current core that the pipeline might contain stale data that should
112    /// be re-fetched from the instruction cache
113    #[inline(always)]
114    fn invalidate_pipeline() {
115        // safety: this is just a barrier.
116        unsafe {
117            asm!(
118                "isb",
119                options(nostack, preserves_flags)
120            );
121        }
122    }
123
124    /// On Aarch64, after the data has been synchronized from the dcache to the icache
125    /// it is necessary to flush the pipelines of the cores that will execute the modified data
126    /// as some may already have been loaded into the pipeline.
127    #[inline(always)]
128    pub fn prepare_for_execution() {
129        invalidate_pipeline();
130    }
131
132    /// On Aarch64, we first need to flush data from the dcache to unified memory, and then 
133    /// inform the icache(s) that their current data might be invalid. This is a no-op if
134    /// the slice is zero length.
135    pub fn synchronize_icache(slice: &[u8]) {
136        if slice.len() == 0 {
137            return;
138        }
139
140        let start_addr = slice.as_ptr() as usize;
141        let end_addr = start_addr + slice.len();
142
143        // query the cache line sizes
144        let (dcache_line_size, icache_line_size) = get_cacheline_sizes();
145
146        // dcache cleaning loop
147        let mut addr = start_addr & !(dcache_line_size - 1);
148        while addr < end_addr {
149            flush_dcache_line(addr);
150            addr += dcache_line_size;
151        }
152
153        // need to wait for dcache cleaning to complete before invalidating the icache
154        wait_for_cache_ops_complete();
155
156        // icache invalidation loop
157        addr = start_addr & !(icache_line_size - 1);
158        while addr < end_addr {
159            invalidate_icache_line(addr);
160            addr += icache_line_size;
161        }
162
163        // wait for that to complete as well
164        wait_for_cache_ops_complete();
165    }
166}
167
168#[cfg(any(target_arch="riscv64", target_arch="riscv32"))]
169mod riscv {
170    // On risc-v, the story about how we synchronize caches is confused.
171    // The data sheet states that we ought to do the following.
172    // 1: on the assembling hart, perform a data fence to ensure that
173    //    any stores will be visible to other harts
174    // 2: on the executing hart, perform a fence.i instruction fence to
175    //    ensure that all the observed stores are visible to our instruction
176    //    fetches
177    //
178    // however, this doesn't solve all problems. Namely, the OS might just move our process
179    // from the current hart to another hart after the fence.i instruction. So it basically
180    // offers no guarantees. for this reason, linux has removed FENCE.I from the user ABI, and
181    // instead offered a syscall for managing this.
182    // this is `riscv_flush_icache()`, which has options to apply to a single thread, or all threads
183    // and over a range of addresses.
184    // as there are no other operating systems targetting risc-v right now, this is the only choice
185    // we have.
186    use std::ffi::{c_void, c_long, c_int};
187
188    #[cfg(unix)]
189    extern "C" {
190        #[link_name="__riscv_flush_icache"]
191        fn riscv_flush_icache(start: *const c_void, end: *const c_void, flags: c_long) -> c_int;
192    }
193
194    pub fn enforce_ordering_dcache_icache(slice: &[u8], local: bool) {
195        let range = slice.as_ptr_range();
196        let start = range.start as *const c_void;
197        let end = range.end as *const c_void;
198        let mut flags: c_long = 0;
199        if local {
200            flags |= 1;
201        }
202        let rv;
203        unsafe {
204            rv = riscv_flush_icache(start, end, flags);
205        }
206        assert!(rv == 0, "riscv_flush_icache failed, returned {rv}");
207    }
208}