comparison m68k_core_x86.c @ 2350:f8b5142c06aa

Allow 68K to return mid-instruction. Adjust how 68K interrupt ack works so int2 busy flag timing is more correct. Fix some other SCD timing issues
author Michael Pavone <pavone@retrodev.com>
date Mon, 16 Oct 2023 23:30:04 -0700
parents f0fc6c09517d
children bf4f1a8d1d48
comparison
equal deleted inserted replaced
2349:f0fc6c09517d 2350:f8b5142c06aa
2582 2582
2583 2583
2584 call(&native, opts->bp_stub); 2584 call(&native, opts->bp_stub);
2585 } 2585 }
2586 2586
2587 void init_m68k_opts(m68k_options * opts, memmap_chunk * memmap, uint32_t num_chunks, uint32_t clock_divider, sync_fun sync_components) 2587 void init_m68k_opts(m68k_options * opts, memmap_chunk * memmap, uint32_t num_chunks, uint32_t clock_divider, sync_fun sync_components, int_ack_fun int_ack)
2588 { 2588 {
2589 memset(opts, 0, sizeof(*opts)); 2589 memset(opts, 0, sizeof(*opts));
2590 opts->gen.memmap = memmap; 2590 opts->gen.memmap = memmap;
2591 opts->gen.memmap_chunks = num_chunks; 2591 opts->gen.memmap_chunks = num_chunks;
2592 opts->gen.address_size = SZ_D; 2592 opts->gen.address_size = SZ_D;
2634 opts->gen.cycles = RAX; 2634 opts->gen.cycles = RAX;
2635 opts->gen.limit = RBP; 2635 opts->gen.limit = RBP;
2636 opts->gen.scratch1 = RCX; 2636 opts->gen.scratch1 = RCX;
2637 opts->gen.align_error_mask = 1; 2637 opts->gen.align_error_mask = 1;
2638 opts->sync_components = sync_components; 2638 opts->sync_components = sync_components;
2639 opts->int_ack = int_ack;
2639 2640
2640 2641
2641 opts->gen.native_code_map = malloc(sizeof(native_map_slot) * NATIVE_MAP_CHUNKS); 2642 opts->gen.native_code_map = malloc(sizeof(native_map_slot) * NATIVE_MAP_CHUNKS);
2642 memset(opts->gen.native_code_map, 0, sizeof(native_map_slot) * NATIVE_MAP_CHUNKS); 2643 memset(opts->gen.native_code_map, 0, sizeof(native_map_slot) * NATIVE_MAP_CHUNKS);
2643 opts->gen.deferred = NULL; 2644 opts->gen.deferred = NULL;
2647 memset(opts->gen.ram_inst_sizes, 0, inst_size_size); 2648 memset(opts->gen.ram_inst_sizes, 0, inst_size_size);
2648 2649
2649 code_info *code = &opts->gen.code; 2650 code_info *code = &opts->gen.code;
2650 init_code_info(code); 2651 init_code_info(code);
2651 2652
2653 opts->save_context_scratch = code->cur;
2654 mov_rrdisp(code, opts->gen.scratch1, opts->gen.context_reg, offsetof(m68k_context, scratch1), SZ_D);
2655 mov_rrdisp(code, opts->gen.scratch2, opts->gen.context_reg, offsetof(m68k_context, scratch2), SZ_D);
2652 opts->gen.save_context = code->cur; 2656 opts->gen.save_context = code->cur;
2653 for (int i = 0; i < 5; i++) 2657 for (int i = 0; i < 5; i++)
2654 if (opts->flag_regs[i] >= 0) { 2658 if (opts->flag_regs[i] >= 0) {
2655 mov_rrdisp(code, opts->flag_regs[i], opts->gen.context_reg, offsetof(m68k_context, flags) + i, SZ_B); 2659 mov_rrdisp(code, opts->flag_regs[i], opts->gen.context_reg, offsetof(m68k_context, flags) + i, SZ_B);
2656 } 2660 }
2664 } 2668 }
2665 } 2669 }
2666 mov_rrdisp(code, opts->gen.cycles, opts->gen.context_reg, offsetof(m68k_context, current_cycle), SZ_D); 2670 mov_rrdisp(code, opts->gen.cycles, opts->gen.context_reg, offsetof(m68k_context, current_cycle), SZ_D);
2667 retn(code); 2671 retn(code);
2668 2672
2673 opts->load_context_scratch = code->cur;
2674 mov_rdispr(code, opts->gen.context_reg, offsetof(m68k_context, scratch1), opts->gen.scratch1, SZ_D);
2675 mov_rdispr(code, opts->gen.context_reg, offsetof(m68k_context, scratch2), opts->gen.scratch2, SZ_D);
2669 opts->gen.load_context = code->cur; 2676 opts->gen.load_context = code->cur;
2670 for (int i = 0; i < 5; i++) 2677 for (int i = 0; i < 5; i++)
2671 { 2678 {
2672 if (opts->flag_regs[i] >= 0) { 2679 if (opts->flag_regs[i] >= 0) {
2673 mov_rdispr(code, opts->gen.context_reg, offsetof(m68k_context, flags) + i, opts->flag_regs[i], SZ_B); 2680 mov_rdispr(code, opts->gen.context_reg, offsetof(m68k_context, flags) + i, opts->flag_regs[i], SZ_B);
2697 } 2704 }
2698 #else 2705 #else
2699 mov_rdispr(code, RSP, 20, opts->gen.scratch2, SZ_D); 2706 mov_rdispr(code, RSP, 20, opts->gen.scratch2, SZ_D);
2700 mov_rdispr(code, RSP, 24, opts->gen.context_reg, SZ_D); 2707 mov_rdispr(code, RSP, 24, opts->gen.context_reg, SZ_D);
2701 #endif 2708 #endif
2709 movzx_rdispr(code, opts->gen.context_reg, offsetof(m68k_context, stack_storage_count), opts->gen.scratch1, SZ_B, SZ_D);
2710 mov_rrdisp(code, RSP, opts->gen.context_reg, offsetof(m68k_context, host_sp_entry), SZ_PTR);
2711 cmp_ir(code, 0, opts->gen.scratch1, SZ_D);
2712 code_ptr normal_start = code->cur + 1;
2713 jcc(code, CC_Z, normal_start);
2714 uint32_t stack_off_save = code->stack_off;
2715 mov_rr(code, opts->gen.context_reg, opts->gen.scratch2, SZ_PTR);
2716 #ifdef X86_64
2717 shl_ir(code, 3, opts->gen.scratch1, SZ_D);
2718 #else
2719 shl_ir(code, 2, opts->gen.scratch1, SZ_D);
2720 #endif
2721 add_ir(code, offsetof(m68k_context, stack_storage) - sizeof(void *), opts->gen.scratch2, SZ_PTR);
2722 add_rr(code, opts->gen.scratch1, opts->gen.scratch2, SZ_PTR);
2723 code_ptr loop_top = code->cur;
2724 cmp_ir(code, 0, opts->gen.scratch1, SZ_D);
2725 code_ptr loop_bot = code->cur + 1;
2726 jcc(code, CC_Z, loop_bot);
2727 sub_ir(code, sizeof(void*), opts->gen.scratch1, SZ_D);
2728 mov_rindr(code, opts->gen.scratch2, opts->gen.cycles, SZ_PTR);
2729 sub_ir(code, sizeof(void*), opts->gen.scratch2, SZ_PTR);
2730 push_r(code, opts->gen.cycles);
2731 jmp(code, loop_top);
2732 *loop_bot = code->cur - (loop_bot + 1);
2733 call_noalign(code, opts->load_context_scratch);
2734 push_rdisp(code, opts->gen.context_reg, offsetof(m68k_context, resume_pc));
2735 retn(code);
2736
2737 code->stack_off = stack_off_save;
2738 *normal_start = code->cur - (normal_start + 1);
2702 call(code, opts->gen.load_context); 2739 call(code, opts->gen.load_context);
2703 call_r(code, opts->gen.scratch2); 2740 call_r(code, opts->gen.scratch2);
2704 call(code, opts->gen.save_context); 2741 call(code, opts->gen.save_context);
2742 mov_irdisp(code, 0, opts->gen.context_reg, offsetof(m68k_context, stack_storage_count), SZ_B);
2705 restore_callee_save_regs(code); 2743 restore_callee_save_regs(code);
2706 retn(code); 2744 retn(code);
2707 2745
2708 opts->native_addr = code->cur; 2746 opts->native_addr = code->cur;
2709 call(code, opts->gen.save_context); 2747 call(code, opts->gen.save_context);
2731 opts->gen.handle_cycle_limit = code->cur; 2769 opts->gen.handle_cycle_limit = code->cur;
2732 cmp_rdispr(code, opts->gen.context_reg, offsetof(m68k_context, sync_cycle), opts->gen.cycles, SZ_D); 2770 cmp_rdispr(code, opts->gen.context_reg, offsetof(m68k_context, sync_cycle), opts->gen.cycles, SZ_D);
2733 code_ptr skip_sync = code->cur + 1; 2771 code_ptr skip_sync = code->cur + 1;
2734 jcc(code, CC_C, code->cur + 2); 2772 jcc(code, CC_C, code->cur + 2);
2735 opts->do_sync = code->cur; 2773 opts->do_sync = code->cur;
2736 push_r(code, opts->gen.scratch1); 2774 call(code, opts->save_context_scratch);
2737 push_r(code, opts->gen.scratch2);
2738 call(code, opts->gen.save_context);
2739 xor_rr(code, opts->gen.scratch1, opts->gen.scratch1, SZ_D); 2775 xor_rr(code, opts->gen.scratch1, opts->gen.scratch1, SZ_D);
2740 call_args_abi(code, (code_ptr)opts->sync_components, 2, opts->gen.context_reg, opts->gen.scratch1); 2776 call_args_abi(code, (code_ptr)opts->sync_components, 2, opts->gen.context_reg, opts->gen.scratch1);
2741 mov_rr(code, RAX, opts->gen.context_reg, SZ_PTR); 2777 mov_rr(code, RAX, opts->gen.context_reg, SZ_PTR);
2742 call(code, opts->gen.load_context); 2778 cmp_irdisp(code, 0, RAX, offsetof(m68k_context, should_return), SZ_B);
2743 pop_r(code, opts->gen.scratch2); 2779 code_ptr do_return = code->cur + 1;
2744 pop_r(code, opts->gen.scratch1); 2780 jcc(code, CC_NZ, do_return);
2781 call(code, opts->load_context_scratch);
2745 *skip_sync = code->cur - (skip_sync+1); 2782 *skip_sync = code->cur - (skip_sync+1);
2746 retn(code); 2783 retn(code);
2747 2784 stack_off_save = code->stack_off;
2785 *do_return = code->cur - (do_return + 1);
2786 pop_r(code, opts->gen.scratch1);
2787 mov_rrdisp(code, opts->gen.scratch1, opts->gen.context_reg, offsetof(m68k_context, resume_pc), SZ_PTR);
2788 mov_rdispr(code, opts->gen.context_reg, offsetof(m68k_context, host_sp_entry), opts->gen.scratch2, SZ_PTR);
2789 mov_rr(code, opts->gen.context_reg, opts->aregs[7], SZ_PTR);
2790 xor_rr(code, opts->gen.scratch1, opts->gen.scratch1, SZ_B);
2791 add_ir(code, offsetof(m68k_context, stack_storage), opts->aregs[7], SZ_PTR);
2792 loop_top = code->cur;
2793 cmp_rr(code, opts->gen.scratch2, RSP, SZ_PTR);
2794 code_ptr done_stack_save = code->cur + 1;
2795 jcc(code, CC_Z, done_stack_save);
2796 pop_r(code, opts->gen.cycles);
2797 add_ir(code, 1, opts->gen.scratch1, SZ_B);
2798 mov_rrind(code, opts->gen.cycles, opts->aregs[7], SZ_PTR);
2799 add_ir(code, sizeof(void*), opts->aregs[7], SZ_PTR);
2800 jmp(code, loop_top);
2801 *done_stack_save = code->cur - (done_stack_save + 1);
2802 mov_rrdisp(code, opts->gen.scratch1, opts->gen.context_reg, offsetof(m68k_context, stack_storage_count), SZ_B);
2803 restore_callee_save_regs(code);
2804 retn(code);
2805 code->stack_off = stack_off_save;
2806
2748 opts->gen.handle_code_write = (code_ptr)m68k_handle_code_write; 2807 opts->gen.handle_code_write = (code_ptr)m68k_handle_code_write;
2749 2808
2750 check_alloc_code(code, 256); 2809 check_alloc_code(code, 256);
2751 opts->gen.handle_align_error_write = code->cur; 2810 opts->gen.handle_align_error_write = code->cur;
2752 code->cur += 256; 2811 code->cur += 256;
3105 cycles(&opts->gen, 6); 3164 cycles(&opts->gen, 6);
3106 //save SR to stack 3165 //save SR to stack
3107 areg_to_native(opts, 7, opts->gen.scratch2); 3166 areg_to_native(opts, 7, opts->gen.scratch2);
3108 call(code, opts->write_16); 3167 call(code, opts->write_16);
3109 //interrupt ack cycle 3168 //interrupt ack cycle
3110 //the Genesis responds to these exclusively with !VPA which means its a slow 3169 cycles(&opts->gen, 4); //base interrupt ack cycle count
3111 //6800 operation. documentation says these can take between 10 and 19 cycles. 3170 call(code, opts->gen.save_context);
3112 //actual results measurements seem to suggest it's actually between 9 and 18 3171 call_args_abi(code, (code_ptr)opts->int_ack, 1, opts->gen.context_reg);
3113 //WARNING: this code might break with register assignment changes 3172 mov_rr(code, RAX, opts->gen.context_reg, SZ_PTR);
3114 //save RDX 3173 call(code, opts->gen.load_context);
3115 push_r(code, RDX); 3174 cycles(&opts->gen, 4); //idle period after int ack
3116 //save cycle count
3117 mov_rr(code, RAX, opts->gen.scratch1, SZ_D);
3118 //clear top doubleword of dividend
3119 xor_rr(code, RDX, RDX, SZ_D);
3120 //set divisor to clock divider
3121 mov_ir(code, opts->gen.clock_divider, opts->gen.scratch2, SZ_D);
3122 div_r(code, opts->gen.scratch2, SZ_D);
3123 //discard remainder
3124 xor_rr(code, RDX, RDX, SZ_D);
3125 //set divisor to 10, the period of E
3126 mov_ir(code, 10, opts->gen.scratch2, SZ_D);
3127 div_r(code, opts->gen.scratch2, SZ_D);
3128 //delay will be (9 + 4 + the remainder) * clock_divider
3129 //the extra 4 is to cover the idle bus period after the ack
3130 add_ir(code, 9 + 4, RDX, SZ_D);
3131 mov_ir(code, opts->gen.clock_divider, RAX, SZ_D);
3132 mul_r(code, RDX, SZ_D);
3133 pop_r(code, RDX);
3134 //add saved cycle count to result
3135 add_rr(code, opts->gen.scratch1, RAX, SZ_D);
3136 3175
3137 //update status register 3176 //update status register
3138 and_irdisp(code, 0x78, opts->gen.context_reg, offsetof(m68k_context, status), SZ_B); 3177 and_irdisp(code, 0x78, opts->gen.context_reg, offsetof(m68k_context, status), SZ_B);
3139 mov_rdispr(code, opts->gen.context_reg, offsetof(m68k_context, int_num), opts->gen.scratch1, SZ_B); 3178 mov_rdispr(code, opts->gen.context_reg, offsetof(m68k_context, int_num), opts->gen.scratch1, SZ_B);
3140 //clear trace pending flag 3179 //clear trace pending flag
3152 call(code, opts->write_32_lowfirst); 3191 call(code, opts->write_32_lowfirst);
3153 3192
3154 //grab saved interrupt number 3193 //grab saved interrupt number
3155 xor_rr(code, opts->gen.scratch1, opts->gen.scratch1, SZ_D); 3194 xor_rr(code, opts->gen.scratch1, opts->gen.scratch1, SZ_D);
3156 mov_rdispr(code, opts->gen.context_reg, offsetof(m68k_context, int_pending), opts->gen.scratch1, SZ_B); 3195 mov_rdispr(code, opts->gen.context_reg, offsetof(m68k_context, int_pending), opts->gen.scratch1, SZ_B);
3157 //ack the interrupt (happens earlier on hardware, but shouldn't be an observable difference)
3158 mov_rrdisp(code, opts->gen.scratch1, opts->gen.context_reg, offsetof(m68k_context, int_ack), SZ_W);
3159 //calculate the vector address 3196 //calculate the vector address
3160 shl_ir(code, 2, opts->gen.scratch1, SZ_D); 3197 shl_ir(code, 2, opts->gen.scratch1, SZ_D);
3161 add_ir(code, 0x60, opts->gen.scratch1, SZ_D); 3198 add_ir(code, 0x60, opts->gen.scratch1, SZ_D);
3162 //clear out pending flag 3199 //clear out pending flag
3163 mov_irdisp(code, INT_PENDING_NONE, opts->gen.context_reg, offsetof(m68k_context, int_pending), SZ_B); 3200 mov_irdisp(code, INT_PENDING_NONE, opts->gen.context_reg, offsetof(m68k_context, int_pending), SZ_B);