dynasmrt/cache_control.rs
1//! This module contains several utility functions to manage the state of the caches
2//! of the executing processor. On von Neumann architectures (like x86/AMD64), these are no-ops,
3//! as these processors ensure synchronization of the instruction and data caches internally.
4//! On modified Harvard architectures like ARMv8, these functions are needed to ensure that
5//! the data cache and instruction cache stay synchronized.
6
7/// This function should be called before any jit-compiled code is executed, on the thread that will
8/// execute this code.
9#[inline(always)]
10pub fn prepare_for_execution(slice: &[u8]) {
11 #![allow(unused_variables)]
12 #[cfg(target_arch="aarch64")]
13 {
14 aarch64::prepare_for_execution()
15 }
16 #[cfg(any(target_arch="riscv64", target_arch="riscv32"))]
17 {
18 riscv::enforce_ordering_dcache_icache(slice, true);
19 }
20}
21
22/// This function should be called after modification of any data that could've been loaded into the
23/// instruction cache previously. It will ensure that these modifications will be propagated into
24/// the instruction caches
25#[inline(always)]
26#[allow(unused_variables)]
27pub fn synchronize_icache(slice: &[u8]) {
28 #[cfg(all(target_arch="aarch64", not(target_os="macos")))]
29 {
30 aarch64::synchronize_icache(slice);
31 }
32 #[cfg(all(target_arch="aarch64", target_os="macos"))]
33 {
34 extern "C" {
35 pub fn sys_icache_invalidate(
36 start: *const std::ffi::c_void,
37 size: usize,
38 );
39 }
40 unsafe {
41 sys_icache_invalidate(slice.as_ptr() as *const std::ffi::c_void, slice.len());
42 }
43 }
44}
45
46#[cfg(target_arch="aarch64")]
47mod aarch64 {
48 use std::arch::asm;
49
50 /// return the cache line sizes as reported by the processor as a tuple of (dcache, icache)
51 fn get_cacheline_sizes() -> (usize, usize) {
52 let ctr_el0: usize;
53
54 // safety: we're just reading a system register (ctr_cl0) that can always be read
55 unsafe {
56 asm!(
57 "mrs {outreg}, ctr_el0",
58 outreg = lateout(reg) ctr_el0,
59 options(nomem, nostack, preserves_flags)
60 );
61 }
62
63 (
64 4 << ((ctr_el0 >> 16) & 0xF),
65 4 << (ctr_el0 & 0xF)
66 )
67 }
68
69 /// waits for any previous cache operations to complete. According to the Aarch64 manuals
70 /// `dsb ish` has bonus functionality where it will also wait for any previous cache maintenance
71 /// operations to complete before allowing execution to continue.
72 #[inline(always)]
73 fn wait_for_cache_ops_complete() {
74 // safety: this is purely a memory barrier.
75 unsafe {
76 asm!(
77 "dsb ish",
78 options(nostack, preserves_flags)
79 );
80 }
81 }
82
83 /// inform the processor that the dache line containing `addr` should be synchronized back to
84 /// the unified memory layer
85 #[inline(always)]
86 fn flush_dcache_line(addr: usize) {
87 // safety: flushing caches is always safe
88 unsafe {
89 asm!(
90 "dc cvau, {address}",
91 address = in(reg)addr,
92 options(nostack, preserves_flags)
93 );
94 }
95 }
96
97 /// inform the processor that icache line containing `addr` is invalid, and that it should be
98 /// re-fetched from unified memory
99 #[inline(always)]
100 fn invalidate_icache_line(addr: usize) {
101 // safety: invalidating caches is always safe
102 unsafe {
103 asm!(
104 "ic ivau, {address}",
105 address = in(reg)addr,
106 options(nostack, preserves_flags)
107 );
108 }
109 }
110
111 /// inform the current core that the pipeline might contain stale data that should
112 /// be re-fetched from the instruction cache
113 #[inline(always)]
114 fn invalidate_pipeline() {
115 // safety: this is just a barrier.
116 unsafe {
117 asm!(
118 "isb",
119 options(nostack, preserves_flags)
120 );
121 }
122 }
123
124 /// On Aarch64, after the data has been synchronized from the dcache to the icache
125 /// it is necessary to flush the pipelines of the cores that will execute the modified data
126 /// as some may already have been loaded into the pipeline.
127 #[inline(always)]
128 pub fn prepare_for_execution() {
129 invalidate_pipeline();
130 }
131
132 /// On Aarch64, we first need to flush data from the dcache to unified memory, and then
133 /// inform the icache(s) that their current data might be invalid. This is a no-op if
134 /// the slice is zero length.
135 pub fn synchronize_icache(slice: &[u8]) {
136 if slice.len() == 0 {
137 return;
138 }
139
140 let start_addr = slice.as_ptr() as usize;
141 let end_addr = start_addr + slice.len();
142
143 // query the cache line sizes
144 let (dcache_line_size, icache_line_size) = get_cacheline_sizes();
145
146 // dcache cleaning loop
147 let mut addr = start_addr & !(dcache_line_size - 1);
148 while addr < end_addr {
149 flush_dcache_line(addr);
150 addr += dcache_line_size;
151 }
152
153 // need to wait for dcache cleaning to complete before invalidating the icache
154 wait_for_cache_ops_complete();
155
156 // icache invalidation loop
157 addr = start_addr & !(icache_line_size - 1);
158 while addr < end_addr {
159 invalidate_icache_line(addr);
160 addr += icache_line_size;
161 }
162
163 // wait for that to complete as well
164 wait_for_cache_ops_complete();
165 }
166}
167
168#[cfg(any(target_arch="riscv64", target_arch="riscv32"))]
169mod riscv {
170 // On risc-v, the story about how we synchronize caches is confused.
171 // The data sheet states that we ought to do the following.
172 // 1: on the assembling hart, perform a data fence to ensure that
173 // any stores will be visible to other harts
174 // 2: on the executing hart, perform a fence.i instruction fence to
175 // ensure that all the observed stores are visible to our instruction
176 // fetches
177 //
178 // however, this doesn't solve all problems. Namely, the OS might just move our process
179 // from the current hart to another hart after the fence.i instruction. So it basically
180 // offers no guarantees. for this reason, linux has removed FENCE.I from the user ABI, and
181 // instead offered a syscall for managing this.
182 // this is `riscv_flush_icache()`, which has options to apply to a single thread, or all threads
183 // and over a range of addresses.
184 // as there are no other operating systems targetting risc-v right now, this is the only choice
185 // we have.
186 use std::ffi::{c_void, c_long, c_int};
187
188 #[cfg(unix)]
189 extern "C" {
190 #[link_name="__riscv_flush_icache"]
191 fn riscv_flush_icache(start: *const c_void, end: *const c_void, flags: c_long) -> c_int;
192 }
193
194 pub fn enforce_ordering_dcache_icache(slice: &[u8], local: bool) {
195 let range = slice.as_ptr_range();
196 let start = range.start as *const c_void;
197 let end = range.end as *const c_void;
198 let mut flags: c_long = 0;
199 if local {
200 flags |= 1;
201 }
202 let rv;
203 unsafe {
204 rv = riscv_flush_icache(start, end, flags);
205 }
206 assert!(rv == 0, "riscv_flush_icache failed, returned {rv}");
207 }
208}