Applied mainline PCem commit e9268fe: Recompiler now only recompiles a block after seeing it twice. Improves performance on stuff that uses self modifying code - eg Doom, Duke Nukem 3D, Windows 95 idle

This commit is contained in:
OBattler
2016-07-31 20:22:14 +02:00
parent cd3fd7e726
commit d50a7e9449
4 changed files with 301 additions and 132 deletions

View File

@@ -1360,7 +1360,7 @@ void exec386_dynarec(int cycs)
}
}
if (valid_block)
if (valid_block && block->was_recompiled)
{
void (*code)() = (void *)&block->data[BLOCK_START];
@@ -1377,7 +1377,7 @@ inrecomp=0;
insc += codeblock_ins[index];*/
/* pclog("Exit block now %04X:%04X\n", CS, pc);*/
}
else if (!abrt)
else if (valid_block && !abrt)
{
uint32_t start_page = cpu_state.pc >> 12;
uint32_t start_pc = cpu_state.pc;
@@ -1388,7 +1388,7 @@ inrecomp=0;
cpu_new_blocks++;
codegen_block_init(phys_addr);
codegen_block_start_recompile(block);
codegen_in_recompile = 1;
// if (output) pclog("Recompile block at %04x:%04x %04x %04x %04x %04x %04x %04x ESP=%04x %04x %02x%02x:%02x%02x %02x%02x:%02x%02x %02x%02x:%02x%02x\n", CS, pc, AX, BX, CX, DX, SI, DI, ESP, BP, ram[0x116330+0x6df4+0xa+3], ram[0x116330+0x6df4+0xa+2], ram[0x116330+0x6df4+0xa+1], ram[0x116330+0x6df4+0xa+0], ram[0x11d136+3],ram[0x11d136+2],ram[0x11d136+1],ram[0x11d136+0], ram[(0x119abe)+0x3],ram[(0x119abe)+0x2],ram[(0x119abe)+0x1],ram[(0x119abe)+0x0]);
@@ -1450,13 +1450,89 @@ inrecomp=0;
insc++;
}
if (!abrt && !x86_was_reset)
codegen_block_end_recompile(block);
if (x86_was_reset)
codegen_reset();
codegen_in_recompile = 0;
// output &= ~2;
}
else if (!abrt)
{
/*Mark block but do not recompile*/
uint32_t start_page = cpu_state.pc >> 12;
uint32_t start_pc = cpu_state.pc;
// pclog("Hash %08x %i\n", codeblock_hash_pc[HASH(cs + pc)], codeblock_page_dirty[(cs + pc) >> 12]);
cpu_block_end = 0;
x86_was_reset = 0;
// cpu_new_blocks++;
codegen_block_init(phys_addr);
// if (output) pclog("Recompile block at %04x:%04x %04x %04x %04x %04x %04x %04x ESP=%04x %04x %02x%02x:%02x%02x %02x%02x:%02x%02x %02x%02x:%02x%02x\n", CS, pc, AX, BX, CX, DX, SI, DI, ESP, BP, ram[0x116330+0x6df4+0xa+3], ram[0x116330+0x6df4+0xa+2], ram[0x116330+0x6df4+0xa+1], ram[0x116330+0x6df4+0xa+0], ram[0x11d136+3],ram[0x11d136+2],ram[0x11d136+1],ram[0x11d136+0], ram[(0x119abe)+0x3],ram[(0x119abe)+0x2],ram[(0x119abe)+0x1],ram[(0x119abe)+0x0]);
while (!cpu_block_end)
{
oldcs=CS;
oldpc=cpu_state.pc;
oldcpl=CPL;
op32=use32;
ea_seg = &_ds;
ssegs = 0;
codegen_endpc = (cs + cpu_state.pc) + 8;
fetchdat = fastreadl(cs + cpu_state.pc);
if (!abrt)
{
trap = flags & T_FLAG;
opcode = fetchdat & 0xFF;
fetchdat >>= 8;
// if (output == 3)
// pclog("%04X(%06X):%04X : %08X %08X %08X %08X %04X %04X %04X(%08X) %04X %04X %04X(%08X) %08X %08X %08X SP=%04X:%08X %02X %04X %i %08X %08X %i %i %02X %02X %02X %02X %02X %08x %08x\n",CS,cs,pc,EAX,EBX,ECX,EDX,CS,DS,ES,es,FS,GS,SS,ss,EDI,ESI,EBP,SS,ESP,opcode,flags,ins,0, ldt.base, CPL, stack32, pic.pend, pic.mask, pic.mask2, pic2.pend, pic2.mask, cs+pc, pccache);
cpu_state.pc++;
x86_opcodes[(opcode | op32) & 0x3ff](fetchdat);
if (x86_was_reset)
break;
}
if (!use32) cpu_state.pc &= 0xffff;
/*Cap source code at 4000 bytes per block; this
will prevent any block from spanning more than
2 pages. In practice this limit will never be
hit, as host block size is only 2kB*/
if ((cpu_state.pc - start_pc) > 4000)
CPU_BLOCK_END();
if (trap)
CPU_BLOCK_END();
if (abrt)
{
codegen_block_remove();
CPU_BLOCK_END();
}
ins++;
insc++;
}
if (!abrt && !x86_was_reset)
codegen_block_end();
if (x86_was_reset)
codegen_reset();
codegen_in_recompile = 0;
// output &= ~2;
}
// if (output && (SP & 1))

View File

@@ -55,6 +55,8 @@ typedef struct codeblock_t
int ins;
uint64_t page_mask, page_mask2;
int was_recompiled;
uint8_t data[2048];
} codeblock_t;
@@ -233,6 +235,8 @@ void codegen_init();
void codegen_reset();
void codegen_block_init(uint32_t phys_addr);
void codegen_block_remove();
void codegen_block_start_recompile(codeblock_t *block);
void codegen_block_end_recompile(codeblock_t *block);
void codegen_generate_call(uint8_t opcode, OpFn op, uint32_t fetchdat, uint32_t new_pc, uint32_t old_pc);
void codegen_generate_seg_restore();
void codegen_check_abrt();
@@ -241,6 +245,7 @@ void codegen_flush();
void codegen_check_flush(struct page_t *page, uint64_t mask, uint32_t phys_addr);
extern int cpu_block_end;
extern uint32_t codegen_endpc;
extern int cpu_recomp_blocks, cpu_recomp_ins, cpu_recomp_full_ins, cpu_new_blocks;
extern int cpu_recomp_blocks_latched, cpu_recomp_ins_latched, cpu_recomp_full_ins_latched, cpu_new_blocks_latched;

View File

@@ -49,7 +49,7 @@ int cpu_recomp_evicted, cpu_recomp_evicted_latched;
int cpu_recomp_reuse, cpu_recomp_reuse_latched;
int cpu_recomp_removed, cpu_recomp_removed_latched;
static uint32_t codegen_endpc;
uint32_t codegen_endpc;
int codegen_block_cycles;
static int codegen_block_ins;
@@ -116,18 +116,54 @@ void dump_block()
pclog("dump_block done\n");
}
static void delete_block(codeblock_t *block)
static void add_to_block_list(codeblock_t *block)
{
// pclog("delete_block: pc=%08x\n", block->pc);
if (block == codeblock_hash[HASH(block->phys)])
codeblock_hash[HASH(block->phys)] = NULL;
codeblock_t *block_prev = pages[block->phys >> 12].block;
if (!block->pc)
fatal("Deleting deleted block\n");
block->pc = 0;
if (!block->page_mask)
fatal("add_to_block_list - mask = 0\n");
codeblock_tree_delete(block);
if (block_prev)
{
block->next = block_prev;
block_prev->prev = block;
pages[block->phys >> 12].block = block;
}
else
{
block->next = NULL;
pages[block->phys >> 12].block = block;
}
if (block->next)
{
if (!block->next->pc)
fatal("block->next->pc=0 %p %p %x %x\n", (void *)block->next, (void *)codeblock, block_current, block_pos);
}
if (block->page_mask2)
{
block_prev = pages[block->phys_2 >> 12].block_2;
if (block_prev)
{
block->next_2 = block_prev;
block_prev->prev_2 = block;
pages[block->phys_2 >> 12].block_2 = block;
}
else
{
block->next_2 = NULL;
pages[block->phys_2 >> 12].block_2 = block;
}
}
}
static void remove_from_block_list(codeblock_t *block, uint32_t pc)
{
if (!block->page_mask)
return;
if (block->prev)
{
block->prev->next = block->next;
@@ -166,6 +202,21 @@ static void delete_block(codeblock_t *block)
}
}
static void delete_block(codeblock_t *block)
{
uint32_t old_pc = block->pc;
if (block == codeblock_hash[HASH(block->phys)])
codeblock_hash[HASH(block->phys)] = NULL;
if (!block->pc)
fatal("Deleting deleted block\n");
block->pc = 0;
codeblock_tree_delete(block);
remove_from_block_list(block, old_pc);
}
void codegen_check_flush(page_t *page, uint64_t mask, uint32_t phys_addr)
{
struct codeblock_t *block = page->block;
@@ -219,6 +270,7 @@ void codegen_block_init(uint32_t phys_addr)
}
block_num = HASH(phys_addr);
codeblock_hash[block_num] = &codeblock[block_current];
block->ins = 0;
block->pc = cs + cpu_state.pc;
block->_cs = cs;
@@ -227,7 +279,28 @@ void codegen_block_init(uint32_t phys_addr)
block->use32 = use32;
block->stack32 = stack32;
block->next = block->prev = NULL;
block->next_2 = block->prev_2 = NULL;
block->page_mask = 0;
block->was_recompiled = 0;
codeblock_tree_add(block);
}
void codegen_block_start_recompile(codeblock_t *block)
{
int has_evicted = 0;
page_t *page = &pages[block->phys >> 12];
if (!page->block)
mem_flush_write_page(block->phys, cs+cpu_state.pc);
block_num = HASH(block->phys);
block_current = block->pnt;
if (block->pc != cs + cpu_state.pc || block->was_recompiled)
fatal("Recompile to used block!\n");
block_pos = BLOCK_GPF_OFFSET;
#if WIN64
addbyte(0x48); /*XOR RCX, RCX*/
@@ -297,7 +370,7 @@ void codegen_block_init(uint32_t phys_addr)
codegen_block_ins = 0;
codegen_block_full_ins = 0;
recomp_page = phys_addr & ~0xfff;
recomp_page = block->phys & ~0xfff;
codegen_flags_changed = 0;
codegen_fpu_entered = 0;
@@ -310,52 +383,28 @@ void codegen_block_init(uint32_t phys_addr)
codegen_reg_loaded[0] = codegen_reg_loaded[1] = codegen_reg_loaded[2] = codegen_reg_loaded[3] =
codegen_reg_loaded[4] = codegen_reg_loaded[5] = codegen_reg_loaded[6] = codegen_reg_loaded[7] = 0;
block->was_recompiled = 1;
}
void codegen_block_remove()
{
codeblock_t *block = &codeblock[block_current];
//if ((block->phys & ~0xfff) == 0x119000) pclog("codegen_block_remove %08x\n", block->pc);
// if (block->pc == 0xb00b4ff5)
// pclog("Remove target block\n");
codeblock_hash[block_num] = NULL;
block->pc = 0;//xffffffff;
delete_block(block);
cpu_recomp_removed++;
// pclog("Remove block %i\n", block_num);
recomp_page = -1;
}
void codegen_block_end()
void codegen_block_generate_end_mask()
{
codeblock_t *block = &codeblock[block_current];
codeblock_t *block_prev = pages[block->phys >> 12].block;
uint32_t start_pc = (block->pc & 0xffc) | (block->phys & ~0xfff);
uint32_t end_pc = ((codegen_endpc + 3) & 0xffc) | (block->phys & ~0xfff);
block->endpc = codegen_endpc;
// if (block->pc == 0xb00b4ff5)
// pclog("End target block\n");
if (block_prev)
{
block->next = block_prev;
block_prev->prev = block;
pages[block->phys >> 12].block = block;
}
else
{
block->next = NULL;
pages[block->phys >> 12].block = block;
}
if (block->next)
{
// pclog(" next->pc=%08x\n", block->next->pc);
if (!block->next->pc)
fatal("block->next->pc=0 %p %p %x %x\n", (void *)block->next, (void *)codeblock, block_current, block_pos);
}
block->page_mask = 0;
start_pc = block->pc & 0xffc;
start_pc &= ~PAGE_MASK_MASK;
@@ -383,26 +432,7 @@ void codegen_block_end()
if (block->phys_2 != -1)
{
// pclog("start block - %08x %08x %p %p %p %08x\n", block->pc, block->endpc, (void *)block, (void *)block->next_2, (void *)pages[block->phys_2 >> 12].block_2, block->phys_2);
if (pages[block->phys_2 >> 12].block_2 == block)
fatal("Block same\n");
block_prev = pages[block->phys_2 >> 12].block_2;
if (block_prev)
{
block->next_2 = block_prev;
block_prev->prev_2 = block;
pages[block->phys_2 >> 12].block_2 = block;
// pclog(" pages.block_2=%p\n", (void *)block);
}
else
{
block->next_2 = NULL;
pages[block->phys_2 >> 12].block_2 = block;
// pclog(" pages.block_2=%p 2\n", (void *)block);
}
start_pc = 0;
end_pc = (block->endpc & 0xfff) >> PAGE_MASK_SHIFT;
for (; start_pc <= end_pc; start_pc++)
@@ -423,7 +453,19 @@ void codegen_block_end()
}
// pclog("block_end: %08x %08x %016llx\n", block->pc, block->endpc, block->page_mask);
recomp_page = -1;
}
void codegen_block_end()
{
codeblock_t *block = &codeblock[block_current];
codegen_block_generate_end_mask();
add_to_block_list(block);
}
void codegen_block_end_recompile(codeblock_t *block)
{
codegen_timing_block_end();
if (codegen_block_cycles)
@@ -472,11 +514,13 @@ void codegen_block_end()
if (block_pos > BLOCK_GPF_OFFSET)
fatal("Over limit!\n");
remove_from_block_list(block, block->pc);
block->next = block->prev = NULL;
block->next_2 = block->prev_2 = NULL;
codegen_block_generate_end_mask();
add_to_block_list(block);
// pclog("End block %i\n", block_num);
recomp_page = -1;
codeblock_tree_add(block);
}
void codegen_flush()
@@ -566,7 +610,7 @@ int opcode_0f_modrm[256] =
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*80*/
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*90*/
0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, /*a0*/
0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, /*a0*/
1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, /*b0*/
1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, /*c0*/

View File

@@ -49,7 +49,7 @@ int cpu_recomp_evicted, cpu_recomp_evicted_latched;
int cpu_recomp_reuse, cpu_recomp_reuse_latched;
int cpu_recomp_removed, cpu_recomp_removed_latched;
static uint32_t codegen_endpc;
uint32_t codegen_endpc;
int codegen_block_cycles;
static int codegen_block_ins;
@@ -116,17 +116,53 @@ void dump_block()
pclog("dump_block done\n");
}
static void delete_block(codeblock_t *block)
static void add_to_block_list(codeblock_t *block)
{
// pclog("delete_block: pc=%08x\n", block->pc);
if (block == codeblock_hash[HASH(block->phys)])
codeblock_hash[HASH(block->phys)] = NULL;
codeblock_t *block_prev = pages[block->phys >> 12].block;
if (!block->pc)
fatal("Deleting deleted block\n");
block->pc = 0;
if (!block->page_mask)
fatal("add_to_block_list - mask = 0\n");
codeblock_tree_delete(block);
if (block_prev)
{
block->next = block_prev;
block_prev->prev = block;
pages[block->phys >> 12].block = block;
}
else
{
block->next = NULL;
pages[block->phys >> 12].block = block;
}
if (block->next)
{
if (!block->next->pc)
fatal("block->next->pc=0 %p %p %x %x\n", (void *)block->next, (void *)codeblock, block_current, block_pos);
}
if (block->page_mask2)
{
block_prev = pages[block->phys_2 >> 12].block_2;
if (block_prev)
{
block->next_2 = block_prev;
block_prev->prev_2 = block;
pages[block->phys_2 >> 12].block_2 = block;
}
else
{
block->next_2 = NULL;
pages[block->phys_2 >> 12].block_2 = block;
}
}
}
static void remove_from_block_list(codeblock_t *block, uint32_t pc)
{
if (!block->page_mask)
return;
if (block->prev)
{
@@ -166,6 +202,21 @@ static void delete_block(codeblock_t *block)
}
}
static void delete_block(codeblock_t *block)
{
uint32_t old_pc = block->pc;
if (block == codeblock_hash[HASH(block->phys)])
codeblock_hash[HASH(block->phys)] = NULL;
if (!block->pc)
fatal("Deleting deleted block\n");
block->pc = 0;
codeblock_tree_delete(block);
remove_from_block_list(block, old_pc);
}
void codegen_check_flush(page_t *page, uint64_t mask, uint32_t phys_addr)
{
struct codeblock_t *block = page->block;
@@ -219,6 +270,7 @@ void codegen_block_init(uint32_t phys_addr)
}
block_num = HASH(phys_addr);
codeblock_hash[block_num] = &codeblock[block_current];
block->ins = 0;
block->pc = cs + cpu_state.pc;
block->_cs = cs;
@@ -227,7 +279,28 @@ void codegen_block_init(uint32_t phys_addr)
block->use32 = use32;
block->stack32 = stack32;
block->next = block->prev = NULL;
block->next_2 = block->prev_2 = NULL;
block->page_mask = 0;
block->was_recompiled = 0;
codeblock_tree_add(block);
}
void codegen_block_start_recompile(codeblock_t *block)
{
int has_evicted = 0;
page_t *page = &pages[block->phys >> 12];
if (!page->block)
mem_flush_write_page(block->phys, cs+cpu_state.pc);
block_num = HASH(block->phys);
block_current = block->pnt;
if (block->pc != cs + cpu_state.pc || block->was_recompiled)
fatal("Recompile to used block!\n");
block_pos = BLOCK_GPF_OFFSET;
addbyte(0xc7); /*MOV [ESP],0*/
addbyte(0x04);
@@ -273,7 +346,7 @@ void codegen_block_init(uint32_t phys_addr)
codegen_block_ins = 0;
codegen_block_full_ins = 0;
recomp_page = phys_addr & ~0xfff;
recomp_page = block->phys & ~0xfff;
codegen_flags_changed = 0;
codegen_fpu_entered = 0;
@@ -283,52 +356,28 @@ void codegen_block_init(uint32_t phys_addr)
codegen_fpu_loaded_iq[4] = codegen_fpu_loaded_iq[5] = codegen_fpu_loaded_iq[6] = codegen_fpu_loaded_iq[7] = 0;
_ds.checked = _es.checked = _fs.checked = _gs.checked = (cr0 & 1) ? 0 : 1;
block->was_recompiled = 1;
}
void codegen_block_remove()
{
codeblock_t *block = &codeblock[block_current];
//if ((block->phys & ~0xfff) == 0x119000) pclog("codegen_block_remove %08x\n", block->pc);
// if (block->pc == 0xb00b4ff5)
// pclog("Remove target block\n");
codeblock_hash[block_num] = NULL;
block->pc = 0;//xffffffff;
delete_block(block);
cpu_recomp_removed++;
// pclog("Remove block %i\n", block_num);
recomp_page = -1;
}
void codegen_block_end()
void codegen_block_generate_end_mask()
{
codeblock_t *block = &codeblock[block_current];
codeblock_t *block_prev = pages[block->phys >> 12].block;
uint32_t start_pc = (block->pc & 0xffc) | (block->phys & ~0xfff);
uint32_t end_pc = ((codegen_endpc + 3) & 0xffc) | (block->phys & ~0xfff);
block->endpc = codegen_endpc;
// if (block->pc == 0xb00b4ff5)
// pclog("End target block\n");
if (block_prev)
{
block->next = block_prev;
block_prev->prev = block;
pages[block->phys >> 12].block = block;
}
else
{
block->next = NULL;
pages[block->phys >> 12].block = block;
}
if (block->next)
{
// pclog(" next->pc=%08x\n", block->next->pc);
if (!block->next->pc)
fatal("block->next->pc=0 %p %p %x %x\n", (void *)block->next, (void *)codeblock, block_current, block_pos);
}
block->page_mask = 0;
start_pc = block->pc & 0xffc;
start_pc &= ~PAGE_MASK_MASK;
@@ -356,26 +405,7 @@ void codegen_block_end()
if (block->phys_2 != -1)
{
// pclog("start block - %08x %08x %p %p %p %08x\n", block->pc, block->endpc, (void *)block, (void *)block->next_2, (void *)pages[block->phys_2 >> 12].block_2, block->phys_2);
if (pages[block->phys_2 >> 12].block_2 == block)
fatal("Block same\n");
block_prev = pages[block->phys_2 >> 12].block_2;
if (block_prev)
{
block->next_2 = block_prev;
block_prev->prev_2 = block;
pages[block->phys_2 >> 12].block_2 = block;
// pclog(" pages.block_2=%p\n", (void *)block);
}
else
{
block->next_2 = NULL;
pages[block->phys_2 >> 12].block_2 = block;
// pclog(" pages.block_2=%p 2\n", (void *)block);
}
start_pc = 0;
end_pc = (block->endpc & 0xfff) >> PAGE_MASK_SHIFT;
for (; start_pc <= end_pc; start_pc++)
@@ -396,7 +426,19 @@ void codegen_block_end()
}
// pclog("block_end: %08x %08x %016llx\n", block->pc, block->endpc, block->page_mask);
recomp_page = -1;
}
void codegen_block_end()
{
codeblock_t *block = &codeblock[block_current];
codegen_block_generate_end_mask();
add_to_block_list(block);
}
void codegen_block_end_recompile(codeblock_t *block)
{
codegen_timing_block_end();
if (codegen_block_cycles)
@@ -433,11 +475,13 @@ void codegen_block_end()
if (block_pos > BLOCK_GPF_OFFSET)
fatal("Over limit!\n");
// pclog("End block %i\n", block_num);
recomp_page = -1;
codeblock_tree_add(block);
remove_from_block_list(block, block->pc);
block->next = block->prev = NULL;
block->next_2 = block->prev_2 = NULL;
codegen_block_generate_end_mask();
add_to_block_list(block);
// pclog("End block %i\n", block_num);
}
void codegen_flush()
@@ -527,7 +571,7 @@ int opcode_0f_modrm[256] =
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*80*/
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*90*/
0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, /*a0*/
0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, /*a0*/
1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, /*b0*/
1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, /*c0*/