# HG changeset patch # User Mike Pavone # Date 1374388828 25200 # Node ID e730fc04016947b82f572e5287938252b6271bfe # Parent 1e828ed04a7c9b77bdf253c3ac0161b782afb7ff Fix performance regression from stop instruction work diff -r 1e828ed04a7c -r e730fc040169 blastem.c --- a/blastem.c Fri Jul 19 22:44:00 2013 -0700 +++ b/blastem.c Sat Jul 20 23:40:28 2013 -0700 @@ -52,7 +52,7 @@ uint8_t block[SMD_BLOCK_SIZE]; filesize -= SMD_HEADER_SIZE; fseek(f, SMD_HEADER_SIZE, SEEK_SET); - + uint16_t * dst = cart; while (filesize > 0) { fread(block, 1, SMD_BLOCK_SIZE, f); @@ -139,15 +139,15 @@ if (next_hint < context->int_cycle) { context->int_cycle = next_hint; context->int_num = 4; - + } } } } context->target_cycle = context->int_cycle < context->sync_cycle ? context->int_cycle : context->sync_cycle; - /*printf("Cyc: %d, Trgt: %d, Int Cyc: %d, Int: %d, Mask: %X, V: %d, H: %d, HICount: %d, HReg: %d, Line: %d\n", - context->current_cycle, context->target_cycle, context->int_cycle, context->int_num, (context->status & 0x7), + /*printf("Cyc: %d, Trgt: %d, Int Cyc: %d, Int: %d, Mask: %X, V: %d, H: %d, HICount: %d, HReg: %d, Line: %d\n", + context->current_cycle, context->target_cycle, context->int_cycle, context->int_num, (context->status & 0x7), v_context->regs[REG_MODE_2] & 0x20, v_context->regs[REG_MODE_1] & 0x10, v_context->hint_counter, v_context->regs[REG_HINT], v_context->cycles / MCLKS_LINE);*/ } @@ -199,7 +199,7 @@ //printf("YM | Cycle: %d, bpos: %d, PSG | Cycle: %d, bpos: %d\n", gen->ym->current_cycle, gen->ym->buffer_pos, gen->psg->cycles, gen->psg->buffer_pos * 2); psg_run(gen->psg, target); ym_run(gen->ym, target); - + //printf("Target: %d, YM bufferpos: %d, PSG bufferpos: %d\n", target, gen->ym->buffer_pos, gen->psg->buffer_pos * 2); } @@ -221,7 +221,7 @@ } //printf("reached frame end | 68K Cycles: %d, MCLK Cycles: %d\n", context->current_cycle, mclks); vdp_run_context(v_context, mclks_per_frame); - + if (!headless) { break_on_sync |= wait_render_frame(v_context, frame_limit); } @@ -258,12 +258,6 @@ context->int_ack = 0; } adjust_int_cycle(context, v_context); - if (context->current_cycle <= context->sync_cycle) { - context->sync_cycle = context->current_cycle + 4; - if (context->sync_cycle < context->int_cycle) { - context->target_cycle = context->sync_cycle; - } - } if (break_on_sync && address) { break_on_sync = 0; debugger(context, address); @@ -504,7 +498,7 @@ } if (value & 1) { dputs("bus requesting Z80"); - + if(!reset && !busreq) { busack_cycle = ((gen->z80->current_cycle + Z80_ACK_DELAY) * MCLKS_PER_Z80) / MCLKS_PER_68K;//context->current_cycle + Z80_ACK_DELAY; new_busack = Z80_REQ_ACK; @@ -526,7 +520,7 @@ } //busack_cycle = CYCLE_NEVER; //busack = Z80_REQ_BUSY; - + } } else if (location == 0x1200) { sync_z80(gen->z80, context->current_cycle * MCLKS_PER_68K); @@ -1441,7 +1435,7 @@ //Z80 debug commands switch(input_buf[1]) { - case 'b': + case 'b': param = find_param(input_buf); if (!param) { fputs("zb command requires a parameter\n", stderr); @@ -1560,7 +1554,7 @@ context->flags[ZF_Z] = f & 1; f >>= 1; context->flags[ZF_S] = f; - + context->regs[Z80_A] = *curpos; curpos += 3; for (int reg = Z80_C; reg <= Z80_IYH; reg++) { @@ -1648,7 +1642,7 @@ adjust_int_cycle(gen->m68k, gen->vdp); fclose(gstfile); return pc; - + error_close: fclose(gstfile); error: @@ -1666,7 +1660,7 @@ const memmap_chunk static_map[] = { {0, 0x400000, 0xFFFFFF, 0, MMAP_READ, cart, NULL, NULL, NULL, NULL}, - {0xE00000, 0x1000000, 0xFFFF, 0, MMAP_READ | MMAP_WRITE | MMAP_CODE, ram, + {0xE00000, 0x1000000, 0xFFFF, 0, MMAP_READ | MMAP_WRITE | MMAP_CODE, ram, NULL, NULL, NULL, NULL}, {0xC00000, 0xE00000, 0x1FFFFF, 0, 0, NULL, (read_16_fun)vdp_port_read, (write_16_fun)vdp_port_write, @@ -1719,7 +1713,7 @@ memmap[0].mask = 0xFFFFFF; memmap[0].flags = MMAP_READ; memmap[0].buffer = cart; - + ram_start &= 0xFFFFFE; ram_end |= 1; memmap[1].start = ram_start; @@ -1736,7 +1730,7 @@ size /= 2; } memmap[1].buffer = gen->save_ram = malloc(size); - + memcpy(memmap+2, static_map+1, sizeof(static_map)-sizeof(static_map[0])); num_chunks = sizeof(static_map)/sizeof(memmap_chunk)+1; } else { @@ -1745,7 +1739,7 @@ memmap[0].mask = 0xFFFFFF; memmap[0].flags = MMAP_READ; memmap[0].buffer = cart; - + memmap[1].start = 0x200000; memmap[1].end = 0x400000; memmap[1].mask = 0x1FFFFF; @@ -1765,7 +1759,7 @@ memmap[num_chunks].end = 0xA13100; memmap[num_chunks].mask = 0xFF; memmap[num_chunks].write_16 = (write_16_fun)write_bank_reg_w; - memmap[num_chunks].write_8 = (write_8_fun)write_bank_reg_b; + memmap[num_chunks].write_8 = (write_8_fun)write_bank_reg_b; num_chunks++; ram_end++; size = ram_end-ram_start; @@ -1794,7 +1788,7 @@ init_x86_68k_opts(&opts, memmap, num_chunks); opts.address_log = address_log; init_68k_context(&context, opts.native_code_map, &opts); - + context.video_context = gen->vdp; context.system = gen; //cartridge ROM @@ -1998,15 +1992,15 @@ render_init(width, height, title, fps, fullscreen); } vdp_context v_context; - + init_vdp_context(&v_context); - + ym2612_context y_context; ym_init(&y_context, render_sample_rate(), fps == 60 ? MCLKS_NTSC : MCLKS_PAL, MCLKS_PER_YM, render_audio_buffer(), ym_log ? YM_OPT_WAVE_LOG : 0); - + psg_context p_context; psg_init(&p_context, render_sample_rate(), fps == 60 ? MCLKS_NTSC : MCLKS_PAL, MCLKS_PER_PSG, render_audio_buffer()); - + z80_context z_context; x86_z80_options z_opts; init_x86_z80_opts(&z_opts); @@ -2020,13 +2014,13 @@ z_context.sync_cycle = z_context.target_cycle = mclks_per_frame/MCLKS_PER_Z80; z_context.int_cycle = CYCLE_NEVER; z_context.mem_pointers[1] = z_context.mem_pointers[2] = (uint8_t *)cart; - + gen.z80 = &z_context; gen.vdp = &v_context; gen.ym = &y_context; gen.psg = &p_context; genesis = &gen; - + int fname_size = strlen(argv[1]); sram_filename = malloc(fname_size+6); memcpy(sram_filename, argv[1], fname_size); @@ -2041,7 +2035,7 @@ strcpy(sram_filename + fname_size, ".sram"); } set_keybindings(); - + init_run_cpu(&gen, debug, address_log, statefile); return 0; } diff -r 1e828ed04a7c -r e730fc040169 m68k_to_x86.c --- a/m68k_to_x86.c Fri Jul 19 22:44:00 2013 -0700 +++ b/m68k_to_x86.c Sat Jul 20 23:40:28 2013 -0700 @@ -123,9 +123,9 @@ //We only get one memory parameter, so if the dst operand is a register in memory, //we need to copy this to a temp register first reg = native_reg(&(inst->dst), opts); - if (reg >= 0 || inst->dst.addr_mode == MODE_UNUSED || !(inst->dst.addr_mode == MODE_REG || inst->dst.addr_mode == MODE_AREG) + if (reg >= 0 || inst->dst.addr_mode == MODE_UNUSED || !(inst->dst.addr_mode == MODE_REG || inst->dst.addr_mode == MODE_AREG) || inst->op == M68K_EXG) { - + ea->mode = MODE_REG_DISPLACE8; ea->base = CONTEXT; ea->disp = reg_offset(&(inst->src)); @@ -150,7 +150,7 @@ out = sub_irdisp8(out, dec_amount, CONTEXT, reg_offset(&(inst->src)), SZ_D); } case MODE_AREG_INDIRECT: - case MODE_AREG_POSTINC: + case MODE_AREG_POSTINC: if (opts->aregs[inst->src.params.regs.pri] >= 0) { out = mov_rr(out, opts->aregs[inst->src.params.regs.pri], SCRATCH1, SZ_D); } else { @@ -168,7 +168,7 @@ out = call(out, opts->read_32); break; } - + if (inst->src.addr_mode == MODE_AREG_POSTINC) { inc_amount = inst->extra.size == OPSIZE_WORD ? 2 : (inst->extra.size == OPSIZE_LONG ? 4 : (inst->src.params.regs.pri == 7 ? 2 : 1)); if (opts->aregs[inst->src.params.regs.pri] >= 0) { @@ -441,7 +441,7 @@ out = mov_rdisp8r(out, CONTEXT, reg_offset(&(inst->dst)), SCRATCH2, SZ_D); } } - + if (inst->dst.addr_mode == MODE_AREG_POSTINC) { inc_amount = inst->extra.size == OPSIZE_WORD ? 2 : (inst->extra.size == OPSIZE_LONG ? 4 : (inst->dst.params.regs.pri == 7 ? 2 : 1)); if (opts->aregs[inst->dst.params.regs.pri] >= 0) { @@ -781,7 +781,7 @@ dst = mov_ir(dst, 0, FLAG_V, SZ_B); dst = mov_ir(dst, 0, FLAG_C, SZ_B); } - + if (inst->dst.addr_mode != MODE_AREG) { if (src.mode == MODE_REG_DIRECT) { flags_reg = src.base; @@ -2459,7 +2459,7 @@ dst = pop_r(dst, SCRATCH2); dst = mov_rr(dst, reg, SCRATCH1, SZ_D); dst = shr_ir(dst, 16, SCRATCH1, SZ_D); - + } else { dst = mov_rdisp8r(dst, CONTEXT, reg_offset(&(inst->src))+3, SCRATCH1, SZ_B); dst = push_r(dst, SCRATCH2); @@ -2527,7 +2527,7 @@ dst = push_r(dst, SCRATCH1); dst = call(dst, opts->read_8); if (reg >= 0) { - + dst = shl_ir(dst, 8, SCRATCH1, SZ_W); dst = mov_rr(dst, SCRATCH1, reg, SZ_W); dst = pop_r(dst, SCRATCH1); @@ -2628,7 +2628,7 @@ } else { dst = mov_rdisp8r(dst, src_op->base, src_op->disp, RCX, SZ_B); } - + } dst = and_ir(dst, 63, RCX, SZ_D); nz_off = dst+1; @@ -2676,7 +2676,7 @@ if (inst->extra.size == OPSIZE_LONG) { uint8_t * neq_32_off = dst + 1; dst = jcc(dst, CC_NZ, dst+2); - + //set the carry bit to the lsb if (dst_op->mode == MODE_REG_DIRECT) { dst = special(dst, 1, dst_op->base, SZ_D); @@ -2703,7 +2703,7 @@ dst = shift_irdisp8(dst, 31, dst_op->base, dst_op->disp, inst->extra.size); dst = shift_irdisp8(dst, 1, dst_op->base, dst_op->disp, inst->extra.size); } - + } end_off = dst+1; dst = jmp(dst, dst+2); @@ -2715,7 +2715,7 @@ } } } - + } if (!special && end_off) { *end_off = dst - (end_off + 1); @@ -3084,7 +3084,7 @@ default: isize = 2; } - uint8_t * passed = dst+1; + uint8_t * passed = dst+1; dst = jcc(dst, CC_GE, dst+2); dst = mov_ir(dst, 1, FLAG_N, SZ_B); dst = mov_ir(dst, VECTOR_CHK, SCRATCH2, SZ_D); @@ -3322,7 +3322,7 @@ } dst = call(dst, (uint8_t *)(inst->op == M68K_MOVE_SR ? set_sr : set_ccr)); dst = cycles(dst, 12); - + } break; case M68K_MOVE_USP: @@ -3446,7 +3446,7 @@ dst = not_rdisp8(dst, dst_op.base, dst_op.disp, inst->extra.size); dst = cmp_irdisp8(dst, 0, dst_op.base, dst_op.disp, inst->extra.size); } - + dst = mov_ir(dst, 0, FLAG_C, SZ_B); dst = setcc_r(dst, CC_Z, FLAG_Z); dst = setcc_r(dst, CC_S, FLAG_N); @@ -3800,7 +3800,15 @@ } uint8_t * loop_top = dst; dst = call(dst, (uint8_t *)do_sync); + dst = cmp_rr(dst, LIMIT, CYCLES, SZ_D); + uint8_t * normal_cycle_up = dst + 1; + dst = jcc(dst, CC_A, dst+2); + dst = cycles(dst, BUS); + uint8_t * after_cycle_up = dst + 1; + dst = jmp(dst, dst+2); + *normal_cycle_up = dst - (normal_cycle_up + 1); dst = mov_rr(dst, LIMIT, CYCLES, SZ_D); + *after_cycle_up = dst - (after_cycle_up+1); dst = cmp_rdisp8r(dst, CONTEXT, offsetof(m68k_context, int_cycle), CYCLES, SZ_D); dst = jcc(dst, CC_C, loop_top); break; @@ -3867,7 +3875,7 @@ dst = rol_irdisp8(dst, 16, src_op.base, src_op.disp, SZ_D); dst = cmp_irdisp8(dst, 0, src_op.base, src_op.disp, SZ_D); } - + dst = mov_ir(dst, 0, FLAG_C, SZ_B); dst = setcc_r(dst, CC_Z, FLAG_Z); dst = setcc_r(dst, CC_S, FLAG_N); @@ -3937,7 +3945,7 @@ m68kinst instbuf; x86_68k_options * opts = context->options; uint8_t * dst = opts->cur_code; - uint8_t * dst_end = opts->code_end; + uint8_t * dst_end = opts->code_end; address &= 0xFFFFFF; if(get_native_address(opts->native_code_map, address)) { return dst; @@ -4065,7 +4073,7 @@ return orig_start; } } - + map_native_address(context, instbuf.address, dst, (after-inst)*2, MAX_NATIVE_SIZE); opts->cur_code = dst+MAX_NATIVE_SIZE; jmp(orig_start, dst); @@ -4112,12 +4120,12 @@ } bp_stub = dst; native = call(native, bp_stub); - + //Calculate length of prologue dst = check_cycles_int(dst, address, opts); int check_int_size = dst-bp_stub; dst = bp_stub; - + //Save context and call breakpoint handler dst = call(dst, (uint8_t *)m68k_save_context); dst = push_r(dst, SCRATCH1); @@ -4195,7 +4203,7 @@ ub_jcc = dst + 1; dst = jcc(dst, CC_NC, dst+2); } - + if (memmap[chunk].mask != 0xFFFFFF) { dst = and_ir(dst, memmap[chunk].mask, adr_reg, SZ_D); } @@ -4239,7 +4247,7 @@ dst = mov_rr(dst, RAX, SCRATCH1, size); } dst = jmp(dst, (uint8_t *)m68k_load_context); - + *not_null = dst - (not_null + 1); } if (size == SZ_B) { @@ -4248,7 +4256,7 @@ dst = add_rdisp8r(dst, CONTEXT, offsetof(m68k_context, mem_pointers) + sizeof(void*) * memmap[chunk].ptr_index, adr_reg, SZ_Q); if (is_write) { dst = mov_rrind(dst, SCRATCH1, SCRATCH2, size); - + } else { dst = mov_rindr(dst, SCRATCH1, SCRATCH1, size); } @@ -4377,14 +4385,14 @@ opts->code_end = opts->cur_code + size; opts->ram_inst_sizes = malloc(sizeof(uint8_t *) * 64); memset(opts->ram_inst_sizes, 0, sizeof(uint8_t *) * 64); - + opts->read_16 = gen_mem_fun(opts, memmap, num_chunks, READ_16); opts->read_8 = gen_mem_fun(opts, memmap, num_chunks, READ_8); opts->write_16 = gen_mem_fun(opts, memmap, num_chunks, WRITE_16); opts->write_8 = gen_mem_fun(opts, memmap, num_chunks, WRITE_8); - + uint8_t * dst = opts->cur_code; - + opts->read_32 = dst; dst = push_r(dst, SCRATCH1); dst = call(dst, opts->read_16); @@ -4398,7 +4406,7 @@ dst = shl_ir(dst, 16, SCRATCH2, SZ_D); dst = or_rr(dst, SCRATCH2, SCRATCH1, SZ_D); dst = retn(dst); - + opts->write_32_lowfirst = dst; dst = push_r(dst, SCRATCH2); dst = push_r(dst, SCRATCH1); @@ -4408,7 +4416,7 @@ dst = pop_r(dst, SCRATCH2); dst = shr_ir(dst, 16, SCRATCH1, SZ_D); dst = jmp(dst, opts->write_16); - + opts->write_32_highfirst = dst; dst = push_r(dst, SCRATCH1); dst = push_r(dst, SCRATCH2); @@ -4418,7 +4426,7 @@ dst = pop_r(dst, SCRATCH1); dst = add_ir(dst, 2, SCRATCH2, SZ_D); dst = jmp(dst, opts->write_16); - + opts->handle_cycle_limit_int = dst; dst = cmp_rdisp8r(dst, CONTEXT, offsetof(m68k_context, int_cycle), CYCLES, SZ_D); uint8_t * do_int = dst+1; @@ -4470,7 +4478,7 @@ //discard function return address dst = pop_r(dst, SCRATCH2); dst = jmp_r(dst, SCRATCH1); - + opts->trap = dst; dst = push_r(dst, SCRATCH2); //swap USP and SSP if not already in supervisor mode @@ -4499,7 +4507,7 @@ dst = call(dst, (uint8_t *)m68k_native_addr_and_sync); dst = cycles(dst, 18); dst = jmp_r(dst, SCRATCH1); - + opts->cur_code = dst; }