Path: blob/master/libmupen64plus/mupen64plus-core/src/r4300/x86/regcache.c
2 views
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *1* Mupen64plus - regcache.c *2* Mupen64Plus homepage: http://code.google.com/p/mupen64plus/ *3* Copyright (C) 2002 Hacktarux *4* *5* This program is free software; you can redistribute it and/or modify *6* it under the terms of the GNU General Public License as published by *7* the Free Software Foundation; either version 2 of the License, or *8* (at your option) any later version. *9* *10* This program is distributed in the hope that it will be useful, *11* but WITHOUT ANY WARRANTY; without even the implied warranty of *12* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *13* GNU General Public License for more details. *14* *15* You should have received a copy of the GNU General Public License *16* along with this program; if not, write to the *17* Free Software Foundation, Inc., *18* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *19* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */2021#include <stdio.h>2223#include "regcache.h"2425#include "r4300/recomp.h"26#include "r4300/r4300.h"27#include "r4300/recomph.h"2829static unsigned int* reg_content[8];30static precomp_instr* last_access[8];31static precomp_instr* free_since[8];32static int dirty[8];33static int r64[8];34static unsigned int* r0;3536void init_cache(precomp_instr* start)37{38int i;39for (i=0; i<8; i++)40{41last_access[i] = NULL;42free_since[i] = start;43}44r0 = (unsigned int*)reg;45}4647void free_all_registers(void)48{49#if defined(PROFILE_R4300)50int freestart = code_length;51int flushed = 0;52#endif5354int i;55for (i=0; i<8; i++)56{57#if defined(PROFILE_R4300)58if (last_access[i] && dirty[i]) flushed = 1;59#endif60if (last_access[i]) free_register(i);61else62{63while (free_since[i] <= dst)64{65free_since[i]->reg_cache_infos.needed_registers[i] = NULL;66free_since[i]++;67}68}69}7071#if defined(PROFILE_R4300)72if (flushed == 1)73{74long x86addr = (long) ((*inst_pointer) + freestart);75int mipsop = -5;76fwrite(&mipsop, 1, 4, pfProfile); /* -5 = regcache flushing */77fwrite(&x86addr, 1, sizeof(char *), pfProfile); // write pointer to start of register cache flushing instructions78x86addr = (long) ((*inst_pointer) + code_length);79fwrite(&src, 1, 4, pfProfile); // write 4-byte MIPS opcode for current instruction80fwrite(&x86addr, 1, sizeof(char *), pfProfile); // write pointer to dynamically generated x86 code for this MIPS instruction81}82#endif83}8485// this function frees a specific X86 GPR86void free_register(int reg)87{88precomp_instr *last;8990if (last_access[reg] != NULL &&91r64[reg] != -1 && (int)reg_content[reg] != (int)reg_content[r64[reg]]-4)92{93free_register(r64[reg]);94return;95}9697if (last_access[reg] != NULL) last = last_access[reg]+1;98else last = free_since[reg];99100while (last <= dst)101{102if (last_access[reg] != NULL && dirty[reg])103last->reg_cache_infos.needed_registers[reg] = reg_content[reg];104else105last->reg_cache_infos.needed_registers[reg] = NULL;106107if (last_access[reg] != NULL && r64[reg] != -1)108{109if (dirty[r64[reg]])110last->reg_cache_infos.needed_registers[r64[reg]] = reg_content[r64[reg]];111else112last->reg_cache_infos.needed_registers[r64[reg]] = NULL;113}114115last++;116}117if (last_access[reg] == NULL)118{119free_since[reg] = dst+1;120return;121}122123if (dirty[reg])124{125mov_m32_reg32(reg_content[reg], reg);126if (r64[reg] == -1)127{128sar_reg32_imm8(reg, 31);129mov_m32_reg32((unsigned int*)reg_content[reg]+1, reg);130}131else mov_m32_reg32(reg_content[r64[reg]], r64[reg]);132}133last_access[reg] = NULL;134free_since[reg] = dst+1;135if (r64[reg] != -1)136{137last_access[r64[reg]] = NULL;138free_since[r64[reg]] = dst+1;139}140}141142int lru_register(void)143{144unsigned int oldest_access = 0xFFFFFFFF;145int i, reg = 0;146for (i=0; i<8; i++)147{148if (i != ESP && (unsigned int)last_access[i] < oldest_access)149{150oldest_access = (int)last_access[i];151reg = i;152}153}154return reg;155}156157int lru_register_exc1(int exc1)158{159unsigned int oldest_access = 0xFFFFFFFF;160int i, reg = 0;161for (i=0; i<8; i++)162{163if (i != ESP && i != exc1 && (unsigned int)last_access[i] < oldest_access)164{165oldest_access = (int)last_access[i];166reg = i;167}168}169return reg;170}171172// this function finds a register to put the data contained in addr,173// if there was another value before it's cleanly removed of the174// register cache. After that, the register number is returned.175// If data are already cached, the function only returns the register number176int allocate_register(unsigned int *addr)177{178unsigned int oldest_access = 0xFFFFFFFF;179int reg = 0, i;180181// is it already cached ?182if (addr != NULL)183{184for (i=0; i<8; i++)185{186if (last_access[i] != NULL && reg_content[i] == addr)187{188precomp_instr *last = last_access[i]+1;189190while (last <= dst)191{192last->reg_cache_infos.needed_registers[i] = reg_content[i];193last++;194}195last_access[i] = dst;196if (r64[i] != -1)197{198last = last_access[r64[i]]+1;199200while (last <= dst)201{202last->reg_cache_infos.needed_registers[r64[i]] = reg_content[r64[i]];203last++;204}205last_access[r64[i]] = dst;206}207208return i;209}210}211}212213// if it's not cached, we take the least recently used register214for (i=0; i<8; i++)215{216if (i != ESP && (unsigned int)last_access[i] < oldest_access)217{218oldest_access = (int)last_access[i];219reg = i;220}221}222223if (last_access[reg]) free_register(reg);224else225{226while (free_since[reg] <= dst)227{228free_since[reg]->reg_cache_infos.needed_registers[reg] = NULL;229free_since[reg]++;230}231}232233last_access[reg] = dst;234reg_content[reg] = addr;235dirty[reg] = 0;236r64[reg] = -1;237238if (addr != NULL)239{240if (addr == r0 || addr == r0+1)241xor_reg32_reg32(reg, reg);242else243mov_reg32_m32(reg, addr);244}245246return reg;247}248249// this function is similar to allocate_register except it loads250// a 64 bits value, and return the register number of the LSB part251int allocate_64_register1(unsigned int *addr)252{253int reg1, reg2, i;254255// is it already cached as a 32 bits value ?256for (i=0; i<8; i++)257{258if (last_access[i] != NULL && reg_content[i] == addr)259{260if (r64[i] == -1)261{262allocate_register(addr);263reg2 = allocate_register(dirty[i] ? NULL : addr+1);264r64[i] = reg2;265r64[reg2] = i;266267if (dirty[i])268{269reg_content[reg2] = addr+1;270dirty[reg2] = 1;271mov_reg32_reg32(reg2, i);272sar_reg32_imm8(reg2, 31);273}274275return i;276}277}278}279280reg1 = allocate_register(addr);281reg2 = allocate_register(addr+1);282r64[reg1] = reg2;283r64[reg2] = reg1;284285return reg1;286}287288// this function is similar to allocate_register except it loads289// a 64 bits value, and return the register number of the MSB part290int allocate_64_register2(unsigned int *addr)291{292int reg1, reg2, i;293294// is it already cached as a 32 bits value ?295for (i=0; i<8; i++)296{297if (last_access[i] != NULL && reg_content[i] == addr)298{299if (r64[i] == -1)300{301allocate_register(addr);302reg2 = allocate_register(dirty[i] ? NULL : addr+1);303r64[i] = reg2;304r64[reg2] = i;305306if (dirty[i])307{308reg_content[reg2] = addr+1;309dirty[reg2] = 1;310mov_reg32_reg32(reg2, i);311sar_reg32_imm8(reg2, 31);312}313314return reg2;315}316}317}318319reg1 = allocate_register(addr);320reg2 = allocate_register(addr+1);321r64[reg1] = reg2;322r64[reg2] = reg1;323324return reg2;325}326327// this function checks if the data located at addr are cached in a register328// and then, it returns 1 if it's a 64 bit value329// 0 if it's a 32 bit value330// -1 if it's not cached331int is64(unsigned int *addr)332{333int i;334for (i=0; i<8; i++)335{336if (last_access[i] != NULL && reg_content[i] == addr)337{338if (r64[i] == -1) return 0;339return 1;340}341}342return -1;343}344345int allocate_register_w(unsigned int *addr)346{347unsigned int oldest_access = 0xFFFFFFFF;348int reg = 0, i;349350// is it already cached ?351for (i=0; i<8; i++)352{353if (last_access[i] != NULL && reg_content[i] == addr)354{355precomp_instr *last = last_access[i]+1;356357while (last <= dst)358{359last->reg_cache_infos.needed_registers[i] = NULL;360last++;361}362last_access[i] = dst;363dirty[i] = 1;364if (r64[i] != -1)365{366last = last_access[r64[i]]+1;367while (last <= dst)368{369last->reg_cache_infos.needed_registers[r64[i]] = NULL;370last++;371}372free_since[r64[i]] = dst+1;373last_access[r64[i]] = NULL;374r64[i] = -1;375}376377return i;378}379}380381// if it's not cached, we take the least recently used register382for (i=0; i<8; i++)383{384if (i != ESP && (unsigned int)last_access[i] < oldest_access)385{386oldest_access = (int)last_access[i];387reg = i;388}389}390391if (last_access[reg]) free_register(reg);392else393{394while (free_since[reg] <= dst)395{396free_since[reg]->reg_cache_infos.needed_registers[reg] = NULL;397free_since[reg]++;398}399}400401last_access[reg] = dst;402reg_content[reg] = addr;403dirty[reg] = 1;404r64[reg] = -1;405406return reg;407}408409int allocate_64_register1_w(unsigned int *addr)410{411int reg1, reg2, i;412413// is it already cached as a 32 bits value ?414for (i=0; i<8; i++)415{416if (last_access[i] != NULL && reg_content[i] == addr)417{418if (r64[i] == -1)419{420allocate_register_w(addr);421reg2 = lru_register();422if (last_access[reg2]) free_register(reg2);423else424{425while (free_since[reg2] <= dst)426{427free_since[reg2]->reg_cache_infos.needed_registers[reg2] = NULL;428free_since[reg2]++;429}430}431r64[i] = reg2;432r64[reg2] = i;433last_access[reg2] = dst;434435reg_content[reg2] = addr+1;436dirty[reg2] = 1;437mov_reg32_reg32(reg2, i);438sar_reg32_imm8(reg2, 31);439440return i;441}442else443{444last_access[i] = dst;445last_access[r64[i]] = dst;446dirty[i] = dirty[r64[i]] = 1;447return i;448}449}450}451452reg1 = allocate_register_w(addr);453reg2 = lru_register();454if (last_access[reg2]) free_register(reg2);455else456{457while (free_since[reg2] <= dst)458{459free_since[reg2]->reg_cache_infos.needed_registers[reg2] = NULL;460free_since[reg2]++;461}462}463r64[reg1] = reg2;464r64[reg2] = reg1;465last_access[reg2] = dst;466reg_content[reg2] = addr+1;467dirty[reg2] = 1;468469return reg1;470}471472int allocate_64_register2_w(unsigned int *addr)473{474int reg1, reg2, i;475476// is it already cached as a 32 bits value ?477for (i=0; i<8; i++)478{479if (last_access[i] != NULL && reg_content[i] == addr)480{481if (r64[i] == -1)482{483allocate_register_w(addr);484reg2 = lru_register();485if (last_access[reg2]) free_register(reg2);486else487{488while (free_since[reg2] <= dst)489{490free_since[reg2]->reg_cache_infos.needed_registers[reg2] = NULL;491free_since[reg2]++;492}493}494r64[i] = reg2;495r64[reg2] = i;496last_access[reg2] = dst;497498reg_content[reg2] = addr+1;499dirty[reg2] = 1;500mov_reg32_reg32(reg2, i);501sar_reg32_imm8(reg2, 31);502503return reg2;504}505else506{507last_access[i] = dst;508last_access[r64[i]] = dst;509dirty[i] = dirty[r64[i]] = 1;510return r64[i];511}512}513}514515reg1 = allocate_register_w(addr);516reg2 = lru_register();517if (last_access[reg2]) free_register(reg2);518else519{520while (free_since[reg2] <= dst)521{522free_since[reg2]->reg_cache_infos.needed_registers[reg2] = NULL;523free_since[reg2]++;524}525}526r64[reg1] = reg2;527r64[reg2] = reg1;528last_access[reg2] = dst;529reg_content[reg2] = addr+1;530dirty[reg2] = 1;531532return reg2;533}534535void set_register_state(int reg, unsigned int *addr, int d)536{537last_access[reg] = dst;538reg_content[reg] = addr;539r64[reg] = -1;540dirty[reg] = d;541}542543void set_64_register_state(int reg1, int reg2, unsigned int *addr, int d)544{545last_access[reg1] = dst;546last_access[reg2] = dst;547reg_content[reg1] = addr;548reg_content[reg2] = addr+1;549r64[reg1] = reg2;550r64[reg2] = reg1;551dirty[reg1] = d;552dirty[reg2] = d;553}554555void force_32(int reg)556{557if (r64[reg] != -1)558{559precomp_instr *last = last_access[reg]+1;560561while (last <= dst)562{563if (dirty[reg])564last->reg_cache_infos.needed_registers[reg] = reg_content[reg];565else566last->reg_cache_infos.needed_registers[reg] = NULL;567568if (dirty[r64[reg]])569last->reg_cache_infos.needed_registers[r64[reg]] = reg_content[r64[reg]];570else571last->reg_cache_infos.needed_registers[r64[reg]] = NULL;572573last++;574}575576if (dirty[reg])577{578mov_m32_reg32(reg_content[reg], reg);579mov_m32_reg32(reg_content[r64[reg]], r64[reg]);580dirty[reg] = 0;581}582last_access[r64[reg]] = NULL;583free_since[r64[reg]] = dst+1;584r64[reg] = -1;585}586}587588void allocate_register_manually(int reg, unsigned int *addr)589{590int i;591592if (last_access[reg] != NULL && reg_content[reg] == addr)593{594precomp_instr *last = last_access[reg]+1;595596while (last <= dst)597{598last->reg_cache_infos.needed_registers[reg] = reg_content[reg];599last++;600}601last_access[reg] = dst;602if (r64[reg] != -1)603{604last = last_access[r64[reg]]+1;605606while (last <= dst)607{608last->reg_cache_infos.needed_registers[r64[reg]] = reg_content[r64[reg]];609last++;610}611last_access[r64[reg]] = dst;612}613return;614}615616if (last_access[reg]) free_register(reg);617else618{619while (free_since[reg] <= dst)620{621free_since[reg]->reg_cache_infos.needed_registers[reg] = NULL;622free_since[reg]++;623}624}625626// is it already cached ?627for (i=0; i<8; i++)628{629if (last_access[i] != NULL && reg_content[i] == addr)630{631precomp_instr *last = last_access[i]+1;632633while (last <= dst)634{635last->reg_cache_infos.needed_registers[i] = reg_content[i];636last++;637}638last_access[i] = dst;639if (r64[i] != -1)640{641last = last_access[r64[i]]+1;642643while (last <= dst)644{645last->reg_cache_infos.needed_registers[r64[i]] = reg_content[r64[i]];646last++;647}648last_access[r64[i]] = dst;649}650651mov_reg32_reg32(reg, i);652last_access[reg] = dst;653r64[reg] = r64[i];654if (r64[reg] != -1) r64[r64[reg]] = reg;655dirty[reg] = dirty[i];656reg_content[reg] = reg_content[i];657free_since[i] = dst+1;658last_access[i] = NULL;659660return;661}662}663664last_access[reg] = dst;665reg_content[reg] = addr;666dirty[reg] = 0;667r64[reg] = -1;668669if (addr != NULL)670{671if (addr == r0 || addr == r0+1)672xor_reg32_reg32(reg, reg);673else674mov_reg32_m32(reg, addr);675}676}677678void allocate_register_manually_w(int reg, unsigned int *addr, int load)679{680int i;681682if (last_access[reg] != NULL && reg_content[reg] == addr)683{684precomp_instr *last = last_access[reg]+1;685686while (last <= dst)687{688last->reg_cache_infos.needed_registers[reg] = reg_content[reg];689last++;690}691last_access[reg] = dst;692693if (r64[reg] != -1)694{695last = last_access[r64[reg]]+1;696697while (last <= dst)698{699last->reg_cache_infos.needed_registers[r64[reg]] = reg_content[r64[reg]];700last++;701}702last_access[r64[reg]] = NULL;703free_since[r64[reg]] = dst+1;704r64[reg] = -1;705}706dirty[reg] = 1;707return;708}709710if (last_access[reg]) free_register(reg);711else712{713while (free_since[reg] <= dst)714{715free_since[reg]->reg_cache_infos.needed_registers[reg] = NULL;716free_since[reg]++;717}718}719720// is it already cached ?721for (i=0; i<8; i++)722{723if (last_access[i] != NULL && reg_content[i] == addr)724{725precomp_instr *last = last_access[i]+1;726727while (last <= dst)728{729last->reg_cache_infos.needed_registers[i] = reg_content[i];730last++;731}732last_access[i] = dst;733if (r64[i] != -1)734{735last = last_access[r64[i]]+1;736while (last <= dst)737{738last->reg_cache_infos.needed_registers[r64[i]] = NULL;739last++;740}741free_since[r64[i]] = dst+1;742last_access[r64[i]] = NULL;743r64[i] = -1;744}745746if (load)747mov_reg32_reg32(reg, i);748last_access[reg] = dst;749dirty[reg] = 1;750r64[reg] = -1;751reg_content[reg] = reg_content[i];752free_since[i] = dst+1;753last_access[i] = NULL;754755return;756}757}758759last_access[reg] = dst;760reg_content[reg] = addr;761dirty[reg] = 1;762r64[reg] = -1;763764if (addr != NULL && load)765{766if (addr == r0 || addr == r0+1)767xor_reg32_reg32(reg, reg);768else769mov_reg32_m32(reg, addr);770}771}772773// 0x81 0xEC 0x4 0x0 0x0 0x0 sub esp, 4774// 0xA1 0xXXXXXXXX mov eax, XXXXXXXX (&code start)775// 0x05 0xXXXXXXXX add eax, XXXXXXXX (local_addr)776// 0x89 0x04 0x24 mov [esp], eax777// 0x8B (reg<<3)|5 0xXXXXXXXX mov eax, [XXXXXXXX]778// 0x8B (reg<<3)|5 0xXXXXXXXX mov ebx, [XXXXXXXX]779// 0x8B (reg<<3)|5 0xXXXXXXXX mov ecx, [XXXXXXXX]780// 0x8B (reg<<3)|5 0xXXXXXXXX mov edx, [XXXXXXXX]781// 0x8B (reg<<3)|5 0xXXXXXXXX mov ebp, [XXXXXXXX]782// 0x8B (reg<<3)|5 0xXXXXXXXX mov esi, [XXXXXXXX]783// 0x8B (reg<<3)|5 0xXXXXXXXX mov edi, [XXXXXXXX]784// 0xC3 ret785// total : 62 bytes786static void build_wrapper(precomp_instr *instr, unsigned char* code, precomp_block* block)787{788int i;789int j=0;790791#if defined(PROFILE_R4300)792long x86addr = (long) code;793int mipsop = -4;794fwrite(&mipsop, 1, 4, pfProfile); // write 4-byte MIPS opcode795fwrite(&x86addr, 1, sizeof(char *), pfProfile); // write pointer to dynamically generated x86 code for this MIPS instruction796#endif797798code[j++] = 0x81;799code[j++] = 0xEC;800code[j++] = 0x04;801code[j++] = 0x00;802code[j++] = 0x00;803code[j++] = 0x00;804805code[j++] = 0xA1;806*((unsigned int*)&code[j]) = (unsigned int)(&block->code);807j+=4;808809code[j++] = 0x05;810*((unsigned int*)&code[j]) = (unsigned int)instr->local_addr;811j+=4;812813code[j++] = 0x89;814code[j++] = 0x04;815code[j++] = 0x24;816817for (i=0; i<8; i++)818{819if (instr->reg_cache_infos.needed_registers[i] != NULL)820{821code[j++] = 0x8B;822code[j++] = (i << 3) | 5;823*((unsigned int*)&code[j]) =824(unsigned int)instr->reg_cache_infos.needed_registers[i];825j+=4;826}827}828829code[j++] = 0xC3;830}831832void build_wrappers(precomp_instr *instr, int start, int end, precomp_block* block)833{834int i, reg;;835for (i=start; i<end; i++)836{837instr[i].reg_cache_infos.need_map = 0;838for (reg=0; reg<8; reg++)839{840if (instr[i].reg_cache_infos.needed_registers[reg] != NULL)841{842instr[i].reg_cache_infos.need_map = 1;843build_wrapper(&instr[i], instr[i].reg_cache_infos.jump_wrapper, block);844break;845}846}847}848}849850void simplify_access(void)851{852int i;853dst->local_addr = code_length;854for(i=0; i<8; i++) dst->reg_cache_infos.needed_registers[i] = NULL;855}856857858859