1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
use dynasmrt::x64::Assembler;
use dynasmrt::{self, DynasmApi, DynasmLabelApi, AssemblyOffset, DynamicLabel};

use std::{i32, i64, u8};
use std::collections::HashMap;
use std::cmp::{min, max};

use super::cached_map::{CacheEntry, CACHE_MASK};

use ::program::{Program, Command, Integer};
use super::{Options};

use super::jit_state::JitState;

/// Returns a buffer containing the output of the compilation process of the
/// specified program. This is mainly useful for debugging and optimizing the
/// performance of the JIT compiler.
pub fn debug_compile(program: &Program, options: Options) -> Vec<u8> {
    let mut compiler = JitCompiler::new(&program, options);

    // first compile everything (except the starting block as there's no reason to do that)
    // if !program.commands.is_empty() {
    //     compiler.compile_index(0);
    // }

    use program::Command::*;
    for (i, c) in program.commands.iter().enumerate() {
        let i = match *c {
            Label | Call {..} if i + 1 != program.commands.len() => i + 1,
            _ => continue
        };

        compiler.compile_index(i);
    }
    compiler.commit();

    let executor = compiler.executor();
    let mut retval = Vec::new();
    retval.extend_from_slice(&executor.lock());
    retval
}

// The used register allocation. This is here as allocator needs it
macro_rules! dynasm {
    ($ops:expr ; $($t:tt)*) => {
        dynasmrt::dynasm!($ops
            ; .arch x64
            ; .alias state, rcx
            ; .alias stack, rdx // initialized after a call to get_stack
            ; .alias retval, rax
            ; .alias temp0, rax // rax is used as a general temp reg
            // r8, r9, r10 and r11 are used as temp regs
            ; $($t)*
        )
    }
}

use super::allocator::RegAllocator;

// some utility defines
macro_rules! epilogue {
    ($ops:expr) => {dynasm!($ops
        ; add rsp, BYTE 0x28
        ; ret
    )};
    ($ops:expr, , $command_index:expr) => {dynasm!($ops
        ; mov retval, DWORD $command_index as _
        ; add rsp, BYTE 0x28
        ; ret
    )};
    ($ops:expr, $stack_effect:expr, $command_index:expr) => {dynasm!($ops
        ; mov retval, DWORD $command_index as _
        ; add QWORD state => JitState.stack_change, DWORD $stack_effect as _
        ; add rsp, BYTE 0x28
        ; ret
    )};
    ($ops:expr, $stack_effect:expr) => {dynasm!($ops
        ; add QWORD state => JitState.stack_change, DWORD $stack_effect as _
        ; add rsp, BYTE 0x28
        ; ret
    )};
}

macro_rules! call_extern {
    ($ops:expr, $addr:ident, $offset:expr) => {dynasm!($ops
        ; lea stack, stack => Integer[$offset]
        ; call QWORD [->$addr]
        ; mov state, [rsp + 0x30]
        ; mov stack, [rsp + 0x38]
    )}
}

#[cfg(target_arch = "x86_64")]
pub struct JitCompiler<'a> {
    options: Options,
    pub commands: &'a [Command],
    blocks: HashMap<usize, JitBlock>,
    fixups: HashMap<usize, Vec<FixUp>>,
    fixup_queue: Vec<(usize, DynamicLabel)>,
    ops: Assembler
}

#[derive(Debug)]
enum FixUp {
    Jump(AssemblyOffset, AssemblyOffset),
    Lea(AssemblyOffset, AssemblyOffset)
}

#[derive(Debug, Clone, Copy)]
pub struct JitBlock {
    start:   AssemblyOffset,
    chained: DynamicLabel
}

#[cfg(target_arch = "x86_64")]
impl<'a> JitCompiler<'a> {
    pub fn new(program: &'a Program, options: Options) -> JitCompiler<'a> {
        let mut comp = JitCompiler {
            options: options,
            commands: &program.commands,
            blocks: HashMap::new(),
            fixups: HashMap::new(),
            fixup_queue: Vec::new(),
            ops: Assembler::new().unwrap()
        };

        // create the import section
        dynasm!(comp.ops
            ;->buffer_base:
            ;->cache_bypass_get:
            ; .qword JitState::cache_bypass_get as _
            ;->cache_evict:
            ; .qword JitState::cache_evict as _
            ;->print_num:
            ; .qword JitState::print_num as _
            ;->print_char:
            ; .qword JitState::print_char as _
            ;->input_char:
            ; .qword JitState::input_char as _
            ;->call:
            ; .qword JitState::call as _
            ;->ret:
            ; .qword JitState::ret as _
            ;->get_stack:
            ; .qword JitState::get_stack as _
        );

        comp
    }

    /// Compiles an extended basic block starting at command_index
    pub fn compile(&mut self, start_index: usize) -> Result<JitBlock, String> {
        use program::Command::*;

        // stack effect calculation accumulators.
        // stack_effect will always be the change in stack BEFORE the op while the op is matched,
        // but min/max_stack will take this op into account if it exits there.
        let mut stack_effect: i32 = 0;
        let mut min_stack   : i32 = 0;
        let mut max_stack   : i32 = 0;

        //  function prologue. when called we start here, if we jump from another jit block we start at chained
        let block = JitBlock {
            start: self.ops.offset(),
            chained: self.ops.new_dynamic_label()
        };
        self.blocks.insert(start_index, block);
        let stack_fixes;
        dynasm!(self.ops
            ; sub rsp, BYTE 0x28
            ; mov [rsp + 0x30], state // rcx
            ;=>block.chained

            // get the stack handle, bail out if we don't (this indicates that a stack error would occur)
            ;; stack_fixes = self.ops.offset()

            // prep args for get stack (rcx is already set to state). min_stack and max_stack are later fixed up
            ; mov rdx, DWORD 0
            ; mov r8, DWORD 0
            ; lea r9, [rsp + 0x40] // this is where stack_start will be stored
            ; call QWORD [->get_stack]
            ; test retval, retval
            ; jnz >badstack
        );
        epilogue!(self.ops, , start_index);

        dynasm!(self.ops
            ;badstack:
            // restore state and put the stack ptr we got in memory
            ; mov stack, retval
            ; mov state, [rsp + 0x30]
            ; mov [rsp + 0x38], stack
            // we're done now. state, stack and stack_start are in memory, state and stack are in rcx and rdx
        );

        // register allocation manager
        let mut allocator = RegAllocator::new();

        let mut commands = self.commands[start_index..].iter();
        let mut command_index = start_index;
        loop {
            if let Some(c) = commands.next() {
                // offset to the topmost item of the stack at the start of a command
                let offset: i32 = stack_effect - 1;

                let (stack_change, stack_extra) = match *c {
                    Push {value} => if value > i32::MAX as Integer || value < i32::MIN as Integer {
                        let mut top = 0;
                        allocator.stage(&mut self.ops).free(&mut top).finish();
                        dynasm!(self.ops
                            ; mov Rq(top), QWORD value as i64
                        );
                        allocator.set_offset(top, offset + 1);
                        (1i32, 1)
                    } else {
                        let value = value as i32;
                        // Optimizations for operations commonly preceded by a Push. tends to shave
                        // away at least 2 instructions that hit memory
                        let c2 = commands.as_slice().get(0);
                        match c2 {
                            Some(&Add) => {
                                let mut left = 0;
                                allocator.stage(&mut self.ops).load(&mut left, offset).finish();
                                if value == 1 {
                                    dynasm!(self.ops; inc Rq(left));
                                } else if value == -1 {
                                    dynasm!(self.ops; dec Rq(left));
                                } else {
                                    dynasm!(self.ops; add Rq(left), value);
                                }
                                if !self.options.contains(Options::IGNORE_OVERFLOW) {
                                    dynasm!(self.ops
                                        ; jno >overflow
                                        ; sub Rq(left), value
                                        ;; allocator.spill_error(&mut self.ops)
                                        ;; epilogue!(self.ops, stack_effect, command_index)
                                        ;overflow:
                                    );
                                }
                                allocator.modify(left);
                                commands.next();
                                command_index += 1;
                                (0, 1)
                            },
                            Some(&Subtract) => {
                                let mut left = 0;
                                allocator.stage(&mut self.ops).load(&mut left, offset).finish();
                                if value == 1 {
                                    dynasm!(self.ops; dec Rq(left));
                                } else if value == -1 {
                                    dynasm!(self.ops; inc Rq(left));
                                } else {
                                    dynasm!(self.ops; sub Rq(left), value);
                                }
                                if !self.options.contains(Options::IGNORE_OVERFLOW) {
                                    dynasm!(self.ops
                                        ; jno >overflow
                                        ; add Rq(left), value
                                        ;; allocator.spill_error(&mut self.ops)
                                        ;; epilogue!(self.ops, stack_effect, command_index)
                                        ;overflow:
                                    );
                                }
                                allocator.modify(left);
                                commands.next();
                                command_index += 1;
                                (0, 1)
                            },
                            Some(&Multiply) => {
                                if !self.options.contains(Options::IGNORE_OVERFLOW) {
                                    let mut left = 0;
                                    let mut res = 0;
                                    allocator.stage(&mut self.ops).load(&mut left, offset).free(&mut res).finish();
                                    dynasm!(self.ops
                                        ; imul Rq(res), Rq(left), value
                                        ; jno >overflow
                                        ;; allocator.spill_error(&mut self.ops)
                                        ;; epilogue!(self.ops, stack_effect, command_index)
                                        ;overflow:
                                    );
                                    allocator.forget(left);
                                    allocator.set_offset(res, offset);
                                } else {
                                    let mut left = 0;
                                    allocator.stage(&mut self.ops).load(&mut left, offset).finish();
                                    dynasm!(self.ops
                                        ; imul Rq(left), Rq(left), value
                                    );
                                    allocator.modify(left);
                                }
                                commands.next();
                                command_index += 1;
                                (0, 1)
                            },
                            _ => {
                                let mut top = 0;
                                allocator.stage(&mut self.ops).free(&mut top).finish();
                                dynasm!(self.ops
                                    ; mov Rq(top), DWORD value
                                );
                                allocator.set_offset(top, offset + 1);
                                (1, 1)
                            }
                        }
                    },
                    PushBig {..} => {
                        allocator.spill_forget(&mut self.ops);
                        epilogue!(self.ops, stack_effect, command_index);
                        break;
                    },
                    Duplicate => { // note: I tried optimizing dup -> jz and dup -> jn but this actually benchmarked slower on my machine.
                        let mut lo = 0;
                        let mut hi = 0;
                        allocator.stage(&mut self.ops).load(&mut lo, offset).free(&mut hi).finish();
                        dynasm!(self.ops
                            ; mov Rq(hi), Rq(lo)
                        );
                        allocator.set_offset(hi, offset + 1);
                        (1, 2)
                    },
                    Swap => {
                        let mut lo = 0;
                        let mut hi = 0;
                        allocator.stage(&mut self.ops).load(&mut lo, offset - 1).load(&mut hi, offset).finish();
                        allocator.set_offset(lo, offset);
                        allocator.set_offset(hi, offset - 1);
                        (0, 2)
                    },
                    Copy {index} => {
                        // load the source into a register, and move it to the top of the stack register.
                        // this might result in an extra reg allocation and a move if the value of the source,
                        // but the source might've also already been in a register.
                        // mov's are basically free though.
                        let mut src = 0;
                        let mut dest = 0;
                        allocator.stage(&mut self.ops).load(&mut src, offset - index as i32).free(&mut dest).finish();
                        dynasm!(self.ops
                            ; mov Rq(dest), Rq(src)
                        );
                        allocator.set_offset(dest, offset + 1);

                        // if index is 0, we need at least 1 item on the stack previously exisiting
                        // etcetera. And another 1 is added as stack_extra is relative to the end
                        // of the command stack effect
                        (1, 2 + index as i32)
                    },
                    Discard => {
                        allocator.forget_offsets(|x| x == offset);
                        (-1, 0)
                    },
                    Slide {amount} => {
                        let mut top = 0;
                        allocator.stage(&mut self.ops).load(&mut top, offset).finish();
                        allocator.forget_offsets(|x| x >= (offset - amount as i32) && x != offset);
                        allocator.set_offset(top, offset - amount as i32);
                        (-(amount as i32), 1)
                    },
                    Add => {
                        let mut left = 0;
                        let mut right = 0;
                        allocator.stage(&mut self.ops).load(&mut left, offset - 1).load(&mut right, offset).finish();
                        dynasm!(self.ops
                            ; add Rq(left), Rq(right)
                        );

                        if !self.options.contains(Options::IGNORE_OVERFLOW) {
                            dynasm!(self.ops
                                ; jno >overflow
                                ; sub Rq(left), Rq(right)
                                ;; allocator.spill_error(&mut self.ops)
                                ;; epilogue!(self.ops, stack_effect, command_index)
                                ;overflow:
                            );
                        }
                        allocator.modify(left);
                        allocator.forget(right);
                        (-1, 1)
                    },
                    Subtract => {
                        let mut left = 0;
                        let mut right = 0;
                        allocator.stage(&mut self.ops).load(&mut left, offset - 1).load(&mut right, offset).finish();
                        dynasm!(self.ops
                            ; sub Rq(left), Rq(right)
                        );

                        if !self.options.contains(Options::IGNORE_OVERFLOW) {
                            dynasm!(self.ops
                                ; jno >overflow
                                ; add Rq(left), Rq(right)
                                ;; allocator.spill_error(&mut self.ops)
                                ;; epilogue!(self.ops, stack_effect, command_index)
                                ;overflow:
                            );
                        }
                        allocator.modify(left);
                        allocator.forget(right);
                        (-1, 1)
                    },
                    Multiply => {
                        if !self.options.contains(Options::IGNORE_OVERFLOW) {
                            let mut left = 0;
                            let mut right = 0;
                            let mut res = 0;
                            allocator.stage(&mut self.ops).load(&mut left, offset - 1).load(&mut right, offset).free(&mut res).finish();
                            dynasm!(self.ops
                                ; mov Rq(res), Rq(left)
                                ; imul Rq(res), Rq(right)
                                ; jno >overflow
                                ;; allocator.spill_error(&mut self.ops)
                                ;; epilogue!(self.ops, stack_effect, command_index)
                                ;overflow:
                            );
                            allocator.forget(left);
                            allocator.forget(right);
                            allocator.set_offset(res, offset - 1);
                        } else {
                            let mut left = 0;
                            let mut right = 0;
                            allocator.stage(&mut self.ops).load(&mut left, offset - 1).load(&mut right, offset).finish();
                            dynasm!(self.ops
                                ; imul Rq(left), Rq(right)
                            );
                            allocator.forget(right);
                            allocator.modify(left);
                        }
                        (-1, 1)
                    },
                    Divide => {
                        let mut left = 0;
                        let mut right = 0;
                        allocator.stage(&mut self.ops).load(&mut left, offset - 1).load(&mut right, offset).finish();
                        dynasm!(self.ops
                            ; cmp Rq(right), BYTE 0
                            ; je BYTE >error
                            ; cmp Rq(right), BYTE -1
                            ; jne >correct
                            ; mov temp0, QWORD i64::MIN
                            ; cmp Rq(left), temp0
                            ; jne >correct
                            ;error:
                            ;;allocator.spill_error(&mut self.ops)
                            ;;epilogue!(self.ops, stack_effect, command_index)
                            ;correct:
                            ; mov rax, Rq(left)
                            ; cqo
                            ; idiv Rq(right)
                            ; mov stack, [rsp + 0x38]
                            ; mov Rq(left), rax 
                        );
                        allocator.modify(left);
                        allocator.forget(right);
                        (-1, 1)
                    },
                    Modulo => {
                        let mut left = 0;
                        let mut right = 0;
                        allocator.stage(&mut self.ops).load(&mut left, offset - 1).load(&mut right, offset).finish();
                        dynasm!(self.ops
                            ; cmp Rq(right), BYTE 0
                            ; je BYTE >error
                            ; cmp Rq(right), BYTE -1
                            ; jne >correct
                            ; mov temp0, QWORD i64::MIN
                            ; cmp Rq(left), temp0
                            ; jne >correct
                            ;error:
                            ;; allocator.spill_error(&mut self.ops)
                            ;; epilogue!(self.ops, stack_effect, command_index)
                            ;correct:
                            ; mov rax, Rq(left)
                            ; cqo
                            ; idiv Rq(right)
                            ; mov Rq(left), rdx
                            ; mov stack, [rsp + 0x38]
                        );
                        allocator.modify(left);
                        allocator.forget(right);
                        (-1, 1)
                    },
                    Set => {
                        let mut key = 0;
                        let mut temp1 = 0;
                        // key is not flushed as we pass it by registers. value is flushed as we need to access it in cache_evict possibly
                        allocator.stage(&mut self.ops).load(&mut key, offset - 1).free(&mut temp1).finish();
                        allocator.forget(key);
                        allocator.spill_forget(&mut self.ops);

                        dynasm!(self.ops
                            // calculate cache entry location
                            ; mov temp0, Rq(key)
                            ; and temp0, CACHE_MASK as i32
                            ; shl temp0, 4 // mul sizeof<CacheEntry>
                            ; add temp0, state => JitState.heap_cache
                            // key | 1
                            ; mov Rq(temp1), Rq(key)
                            ; or  Rq(temp1), BYTE 1
                            // if entry.key == key | 1
                            ; cmp temp0 => CacheEntry.key, Rq(temp1)
                            ; je >equal
                            // if entry.key == 0
                            ; cmp QWORD temp0 => CacheEntry.key, BYTE 0
                            ; je >zero
                            // cache_evict(state, stack, *entry, key)
                            ; mov r9, Rq(key)
                            ; mov r8, temp0
                            // pushes the old entry into the hashmap, and puts the new entry (key from register, value from stack) into storage
                            ;;call_extern!(self.ops, cache_evict, offset)
                            ; jmp >end
                            // entry.key = key | 1
                            ;zero:
                            ; mov temp0 => CacheEntry.key, Rq(temp1)
                            // and finally copy the value over
                            ;equal:
                            ; mov Rq(temp1), stack => Integer[offset]
                            ; mov temp0 => CacheEntry.value, Rq(temp1)
                            ;end:
                        );
                        (-2, 0)
                    },
                    Get => {
                        let mut key = 0;
                        // spill everything while loading the key and getting a temp reg
                        allocator.stage(&mut self.ops).load(&mut key, offset).finish();
                        allocator.spill_forget(&mut self.ops);

                        dynasm!(self.ops
                            // calculate cache entry location
                            ; mov temp0, Rq(key)
                            ; and temp0, CACHE_MASK as i32
                            ; shl temp0, 4 // mul sizeof<CacheEntry>
                            ; add temp0, state => JitState.heap_cache
                            // if entry.key == key | 1
                            ; or  Rq(key), BYTE 1
                            ; cmp temp0 => CacheEntry.key, Rq(key)
                            ; je >equal
                            // not in cache
                            ;;call_extern!(self.ops, cache_bypass_get, offset)
                        );
                        if !self.options.contains(Options::UNCHECKED_HEAP) {
                            dynasm!(self.ops
                                ; test al, al
                                ; jz BYTE >end
                                ;;epilogue!(self.ops, stack_effect, command_index)
                            );
                        } else {
                            dynasm!(self.ops
                                ; jmp BYTE >end
                            );
                        }
                        dynasm!(self.ops
                            // also not in the map leads to error branch
                            ;equal:
                            // read the value from cache and put it on top of the stack
                            ; mov temp0, temp0 => CacheEntry.value
                            ; mov stack => Integer[offset], temp0
                            ;end:
                        );
                        (0, 1)
                    },
                    // we're done here
                    Label => {
                        allocator.spill_forget(&mut self.ops);
                        dynasm!(self.ops
                            ; add QWORD state => JitState.stack_change, DWORD stack_effect
                        );
                        let target = command_index + 1;
                        if let Some(block) = self.blocks.get(&target) {
                            dynasm!(self.ops
                                ; jmp =>block.chained
                            );
                        } else {
                            let start = self.ops.offset();
                            epilogue!(self.ops, , target);
                            Self::add_fixup(&mut self.fixups, target, FixUp::Jump(start, self.ops.offset()));
                        }
                        break;
                    },
                    Call {index} => {
                        allocator.spill_forget(&mut self.ops);
                        if let Some(block) = self.blocks.get(&(command_index + 1)) {
                            dynasm!(self.ops
                                ; lea r9, [=>block.chained]
                            );
                        } else {
                            let start = self.ops.offset();
                            dynasm!(self.ops
                                ; lea r9, [->buffer_base]
                            );
                            Self::add_fixup(&mut self.fixups, command_index + 1, FixUp::Lea(start, self.ops.offset()));
                        }
                        dynasm!(self.ops
                            ; lea temp0, [->buffer_base]
                            ; sub r9, temp0
                            ; mov r8, command_index as i32 + 1
                            ;; call_extern!(self.ops, call, offset)
                            ; add QWORD state => JitState.stack_change, DWORD stack_effect
                        );
                        if let Some(block) = self.blocks.get(&index) {
                            dynasm!(self.ops
                                ; jmp =>block.chained
                            );
                        } else {
                            let start = self.ops.offset();
                            epilogue!(self.ops, , index);
                            Self::add_fixup(&mut self.fixups, index, FixUp::Jump(start, self.ops.offset()));
                        }
                        break;
                    },
                    Jump {index} => {
                        allocator.spill_forget(&mut self.ops);
                        dynasm!(self.ops
                            ; add QWORD state => JitState.stack_change, DWORD stack_effect
                        );
                        if let Some(block) = self.blocks.get(&index) {
                            dynasm!(self.ops
                                ; jmp =>block.chained
                            );
                        } else {
                            let start = self.ops.offset();
                            epilogue!(self.ops, , index);
                            Self::add_fixup(&mut self.fixups, index, FixUp::Jump(start, self.ops.offset()));
                        }
                        break;
                    },
                    JumpIfZero {index} => {
                        let mut top = 0;
                        allocator.stage(&mut self.ops).load(&mut top, offset).finish();
                        dynasm!(self.ops
                            ; cmp Rq(top), BYTE 0
                            ; jnz >no_branch
                            ;; allocator.spill_error(&mut self.ops)
                            ; add QWORD state => JitState.stack_change, DWORD stack_effect - 1 // we pop a value of before returning
                        );
                        if let Some(block) = self.blocks.get(&index) {
                            dynasm!(self.ops
                                ; jmp =>block.chained
                            );
                        } else {
                            let start = self.ops.offset();
                            epilogue!(self.ops, , index);
                            Self::add_fixup(&mut self.fixups, index, FixUp::Jump(start, self.ops.offset()));
                        }
                        dynasm!(self.ops
                            ;no_branch:
                        );
                        allocator.forget(top);
                        (-1, 0)
                    },
                    JumpIfNegative {index} => {
                        let mut top = 0;
                        allocator.stage(&mut self.ops).load(&mut top, offset).finish();
                        dynasm!(self.ops
                            ; cmp Rq(top), BYTE 0
                            ; jge >no_branch
                            ;; allocator.spill_error(&mut self.ops)
                            ; add QWORD state => JitState.stack_change, DWORD stack_effect - 1 // we pop a value of before returning
                        );
                        if let Some(block) = self.blocks.get(&index) {
                            dynasm!(self.ops
                                ; jmp =>block.chained
                            );
                        } else {
                            let start = self.ops.offset();
                            epilogue!(self.ops, , index);
                            Self::add_fixup(&mut self.fixups, index, FixUp::Jump(start, self.ops.offset()));
                        }
                        dynasm!(self.ops
                            ;no_branch:
                        );
                        allocator.forget(top);
                        (-1, 0)
                    },
                    EndSubroutine => { // we have to dynamically determine if we're going back to the interpreter or compiled code
                        allocator.spill_forget(&mut self.ops);
                        dynasm!(self.ops
                            ; add QWORD state => JitState.stack_change, DWORD stack_effect
                            ; mov r8, command_index as i32
                            ; lea r9, [rsp + 0x48]
                            ;; call_extern!(self.ops, ret, offset)
                            ; test retval, retval
                            ; jz >interpret
                            ; lea r9, [->buffer_base]
                            ; add r9, retval
                            ; jmp r9
                            ;interpret:
                            ; mov retval, [rsp + 0x48]
                            ;; epilogue!(self.ops)
                        );
                        break;
                    },
                    EndProgram => {
                        allocator.spill_forget(&mut self.ops);
                        epilogue!(self.ops, stack_effect, command_index);
                        break;
                    },
                    PrintChar => {
                        allocator.spill_forget(&mut self.ops);
                        dynasm!(self.ops
                            ;; call_extern!(self.ops, print_char, offset)
                            ; test al, al
                            ; jz >io_fail
                            ;; epilogue!(self.ops, stack_effect, command_index)
                            ;io_fail:
                        );
                        (-1, 0)
                    },
                    PrintNum => {
                        allocator.spill_forget(&mut self.ops);
                        dynasm!(self.ops
                            ;; call_extern!(self.ops, print_num, offset)
                            ; test al, al
                            ; jz >io_fail
                            ;; epilogue!(self.ops, stack_effect, command_index)
                            ;io_fail:
                        );
                        (-1, 0)
                    },
                    InputChar => {
                        allocator.spill_forget(&mut self.ops);
                        dynasm!(self.ops
                            ;; call_extern!(self.ops, input_char, offset)
                            ; test al, al
                            ; jz >io_fail
                            ;; epilogue!(self.ops, stack_effect, command_index)
                            ;io_fail:
                        );
                        (-1, 0)
                    },
                    InputNum => {
                        allocator.spill_forget(&mut self.ops);
                        epilogue!(self.ops, stack_effect, command_index);
                        break;
                    }
                };

                stack_effect += stack_change;
                let stack_bot = stack_effect - stack_extra;

                max_stack = max(max_stack, stack_effect);
                min_stack = min(min_stack, stack_bot);

                command_index += 1;
            } else {
                // we hit program end. uh, okay I guess.
                allocator.spill_forget(&mut self.ops);
                epilogue!(self.ops, stack_effect, command_index);
                break;
            }
        }

        let mut ops = self.ops.alter_uncommitted();
        ops.goto(stack_fixes);
        dynasm!(ops
            ; mov rdx, -min_stack
            ; mov r8, max_stack
        );

        // register fixups for when a commit is made
        self.fixup_queue.push((start_index, block.chained));

        Ok(block)
    }

    pub fn commit(&mut self) {
        self.ops.commit().unwrap();
        
        if !self.fixup_queue.is_empty() {
            let fixup_queue = &mut self.fixup_queue;
            let fixups = &mut self.fixups;

            self.ops.alter(|ops| {
                for (target, label) in fixup_queue.drain(..) {
                    if let Some(mut fixups) = fixups.remove(&target) {
                        for fixup in fixups.drain(..) {
                            match fixup {
                                FixUp::Jump(start, end) => dynasm!(ops
                                    ;; ops.goto(start)
                                    ; jmp =>label
                                    ;; ops.check(end).unwrap()
                                ),
                                FixUp::Lea(start, end) => dynasm!(ops
                                    ;; ops.goto(start)
                                    ; lea r9, [=>label]
                                    ;; ops.check(end).unwrap()
                                )
                            }
                        }
                    }
                }
            }).unwrap();
        }
    }

    fn add_fixup(fixups: &mut HashMap<usize, Vec<FixUp>>, target: usize, fixup: FixUp) {
        fixups.entry(target).or_insert_with(|| Vec::new()).push(fixup);
    }

    pub fn compile_index(&mut self, target: usize) -> Option<AssemblyOffset> {
        if !self.blocks.contains_key(&target) {
            let block = self.compile(target).unwrap();
            Some(block.start)
        } else {
            None
        }
    }

    pub fn executor(&self) -> dynasmrt::Executor {
        self.ops.reader()
    }
}