dynasmrt/
cache_control.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
//! This module contains several utility functions to manage the state of the caches
//! of the executing processor. On von Neumann architectures (like x86/AMD64), these are no-ops,
//! as these processors ensure synchronization of the instruction and data caches internally.
//! On modified Harvard architectures like ARMv8, these functions are needed to ensure that
//! the data cache and instruction cache stay synchronized.

/// This function should be called before any jit-compiled code is executed, on the thread that will
/// execute this code.
#[inline(always)]
pub fn prepare_for_execution(slice: &[u8]) {
    #![allow(unused_variables)]
    #[cfg(target_arch="aarch64")]
    {
        aarch64::prepare_for_execution()
    }
    #[cfg(any(target_arch="riscv64", target_arch="riscv32"))]
    {
        riscv::enforce_ordering_dcache_icache(slice, true);
    }
}

/// This function should be called after modification of any data that could've been loaded into the
/// instruction cache previously. It will ensure that these modifications will be propagated into
/// the instruction caches
#[inline(always)]
#[allow(unused_variables)]
pub fn synchronize_icache(slice: &[u8]) {
    #[cfg(target_arch="aarch64")]
    {
        aarch64::synchronize_icache(slice);
    }
}

#[cfg(target_arch="aarch64")]
mod aarch64 {
    use std::arch::asm;

    /// return the cache line sizes as reported by the processor as a tuple of (dcache, icache)
    fn get_cacheline_sizes() -> (usize, usize) {
        let ctr_el0: usize;

        // safety: we're just reading a system register (ctr_cl0) that can always be read
        unsafe {
            asm!(
                "mrs {outreg}, ctr_el0",
                outreg = lateout(reg) ctr_el0,
                options(nomem, nostack, preserves_flags)
            );
        }

        (
            4 << ((ctr_el0 >> 16) & 0xF),
            4 << (ctr_el0 & 0xF)
        )
    }

    /// waits for any previous cache operations to complete. According to the Aarch64 manuals
    /// `dsb ish` has bonus functionality where it will also wait for any previous cache maintenance
    /// operations to complete before allowing execution to continue.
    #[inline(always)]
    fn wait_for_cache_ops_complete() {
        // safety: this is purely a memory barrier.
        unsafe {
            asm!(
                "dsb ish",
                options(nostack, preserves_flags)
            );
        }
    }

    /// inform the processor that the dache line containing `addr` should be synchronized back to
    /// the unified memory layer
    #[inline(always)]
    fn flush_dcache_line(addr: usize) {
        // safety: flushing caches is always safe
        unsafe {
            asm!(
                "dc cvau, {address}",
                address = in(reg)addr,
                options(nostack, preserves_flags)
            );
        }
    }

    /// inform the processor that icache line containing `addr` is invalid, and that it should be
    /// re-fetched from unified memory
    #[inline(always)]
    fn invalidate_icache_line(addr: usize) {
        // safety: invalidating caches is always safe
        unsafe {
            asm!(
                "ic ivau, {address}",
                address = in(reg)addr,
                options(nostack, preserves_flags)
            );
        }
    }

    /// inform the current core that the pipeline might contain stale data that should
    /// be re-fetched from the instruction cache
    #[inline(always)]
    fn invalidate_pipeline() {
        // safety: this is just a barrier.
        unsafe {
            asm!(
                "isb",
                options(nostack, preserves_flags)
            );
        }
    }

    /// On Aarch64, after the data has been synchronized from the dcache to the icache
    /// it is necessary to flush the pipelines of the cores that will execute the modified data
    /// as some may already have been loaded into the pipeline.
    #[inline(always)]
    pub fn prepare_for_execution() {
        invalidate_pipeline();
    }

    /// On Aarch64, we first need to flush data from the dcache to unified memory, and then 
    /// inform the icache(s) that their current data might be invalid. This is a no-op if
    /// the slice is zero length.
    pub fn synchronize_icache(slice: &[u8]) {
        if slice.len() == 0 {
            return;
        }

        let start_addr = slice.as_ptr() as usize;
        let end_addr = start_addr + slice.len();

        // query the cache line sizes
        let (dcache_line_size, icache_line_size) = get_cacheline_sizes();

        // dcache cleaning loop
        let mut addr = start_addr & !(dcache_line_size - 1);
        while addr < end_addr {
            flush_dcache_line(addr);
            addr += dcache_line_size;
        }

        // need to wait for dcache cleaning to complete before invalidating the icache
        wait_for_cache_ops_complete();

        // icache invalidation loop
        addr = start_addr & !(icache_line_size - 1);
        while addr < end_addr {
            invalidate_icache_line(addr);
            addr += icache_line_size;
        }

        // wait for that to complete as well
        wait_for_cache_ops_complete();
    }
}

#[cfg(any(target_arch="riscv64", target_arch="riscv32"))]
mod riscv {
    // On risc-v, the story about how we synchronize caches is confused.
    // The data sheet states that we ought to do the following.
    // 1: on the assembling hart, perform a data fence to ensure that
    //    any stores will be visible to other harts
    // 2: on the executing hart, perform a fence.i instruction fence to
    //    ensure that all the observed stores are visible to our instruction
    //    fetches
    //
    // however, this doesn't solve all problems. Namely, the OS might just move our process
    // from the current hart to another hart after the fence.i instruction. So it basically
    // offers no guarantees. for this reason, linux has removed FENCE.I from the user ABI, and
    // instead offered a syscall for managing this.
    // this is `riscv_flush_icache()`, which has options to apply to a single thread, or all threads
    // and over a range of addresses.
    // as there are no other operating systems targetting risc-v right now, this is the only choice
    // we have.
    use std::ffi::{c_void, c_long, c_int};

    #[cfg(unix)]
    extern "C" {
        #[link_name="__riscv_flush_icache"]
        fn riscv_flush_icache(start: *const c_void, end: *const c_void, flags: c_long) -> c_int;
    }

    pub fn enforce_ordering_dcache_icache(slice: &[u8], local: bool) {
        let range = slice.as_ptr_range();
        let start = range.start as *const c_void;
        let end = range.end as *const c_void;
        let mut flags: c_long = 0;
        if local {
            flags |= 1;
        }
        let rv;
        unsafe {
            rv = riscv_flush_icache(start, end, flags);
        }
        assert!(rv == 0, "riscv_flush_icache failed, returned {rv}");
    }
}