CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
Path: blob/master/Core/MIPS/MIPSIntVFPU.cpp
Views: 1401
// Copyright (c) 2012- PPSSPP Project.12// This program is free software: you can redistribute it and/or modify3// it under the terms of the GNU General Public License as published by4// the Free Software Foundation, version 2.0 or later versions.56// This program is distributed in the hope that it will be useful,7// but WITHOUT ANY WARRANTY; without even the implied warranty of8// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the9// GNU General Public License 2.0 for more details.1011// A copy of the GPL 2.0 should have been included with the program.12// If not, see http://www.gnu.org/licenses/1314// Official git repository and contact information can be found at15// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.1617// TODO: Test and maybe fix: https://code.google.com/p/jpcsp/source/detail?r=3082#1819#include <cmath>20#include <limits>21#include <algorithm>2223#include "Common/Data/Convert/SmallDataConvert.h"24#include "Common/Math/math_util.h"2526#include "Core/Compatibility.h"27#include "Core/Core.h"28#include "Core/MemMap.h"29#include "Core/Reporting.h"30#include "Core/System.h"3132#include "Core/MIPS/MIPS.h"33#include "Core/MIPS/MIPSInt.h"34#include "Core/MIPS/MIPSTables.h"35#include "Core/MIPS/MIPSVFPUUtils.h"3637#define R(i) (currentMIPS->r[i])38#define V(i) (currentMIPS->v[voffset[i]])39#define VI(i) (currentMIPS->vi[voffset[i]])40#define FI(i) (currentMIPS->fi[i])41#define FsI(i) (currentMIPS->fs[i])42#define PC (currentMIPS->pc)4344#define _RS ((op>>21) & 0x1F)45#define _RT ((op>>16) & 0x1F)46#define _RD ((op>>11) & 0x1F)47#define _FS ((op>>11) & 0x1F)48#define _FT ((op>>16) & 0x1F)49#define _FD ((op>>6 ) & 0x1F)50#define _POS ((op>>6 ) & 0x1F)51#define _SIZE ((op>>11) & 0x1F)5253#define HI currentMIPS->hi54#define LO currentMIPS->lo5556#ifndef M_LOG2E57#define M_E 2.71828182845904523536f58#define M_LOG2E 1.44269504088896340736f59#define M_LOG10E 0.434294481903251827651f60#define M_LN2 0.693147180559945309417f61#define M_LN10 2.30258509299404568402f62#undef M_PI63#define M_PI 3.14159265358979323846f64#ifndef M_PI_265#define M_PI_2 1.57079632679489661923f66#endif67#define M_PI_4 0.785398163397448309616f68#define M_1_PI 0.318309886183790671538f69#define M_2_PI 0.636619772367581343076f70#define M_2_SQRTPI 1.12837916709551257390f71#define M_SQRT2 1.41421356237309504880f72#define M_SQRT1_2 0.707106781186547524401f73#endif7475static const bool USE_VFPU_DOT = false;76static const bool USE_VFPU_SQRT = false;7778union FloatBits {79float f[4];80u32 u[4];81int i[4];82};8384// Preserves NaN in first param, takes sign of equal second param.85// Technically, std::max may do this but it's undefined.86inline float nanmax(float f, float cst)87{88return f <= cst ? cst : f;89}9091// Preserves NaN in first param, takes sign of equal second param.92inline float nanmin(float f, float cst)93{94return f >= cst ? cst : f;95}9697// Preserves NaN in first param, takes sign of equal value in others.98inline float nanclamp(float f, float lower, float upper)99{100return nanmin(nanmax(f, lower), upper);101}102103static void ApplyPrefixST(float *r, u32 data, VectorSize size, float invalid = 0.0f) {104// Check for no prefix.105if (data == 0xe4)106return;107108int n = GetNumVectorElements(size);109float origV[4]{ invalid, invalid, invalid, invalid };110static const float constantArray[8] = {0.f, 1.f, 2.f, 0.5f, 3.f, 1.f/3.f, 0.25f, 1.f/6.f};111112for (int i = 0; i < n; i++) {113origV[i] = r[i];114}115116for (int i = 0; i < n; i++) {117int regnum = (data >> (i*2)) & 3;118int abs = (data >> (8+i)) & 1;119int negate = (data >> (16+i)) & 1;120int constants = (data >> (12+i)) & 1;121122if (!constants) {123if (regnum >= n) {124// We mostly handle this now, but still worth reporting.125ERROR_LOG_REPORT(Log::CPU, "Invalid VFPU swizzle: %08x: %i / %d at PC = %08x (%s)", data, regnum, n, currentMIPS->pc, MIPSDisasmAt(currentMIPS->pc).c_str());126}127r[i] = origV[regnum];128if (abs)129((u32 *)r)[i] = ((u32 *)r)[i] & 0x7FFFFFFF;130} else {131r[i] = constantArray[regnum + (abs<<2)];132}133134if (negate)135((u32 *)r)[i] = ((u32 *)r)[i] ^ 0x80000000;136}137}138139inline void ApplySwizzleS(float *v, VectorSize size, float invalid = 0.0f)140{141ApplyPrefixST(v, currentMIPS->vfpuCtrl[VFPU_CTRL_SPREFIX], size, invalid);142}143144inline void ApplySwizzleT(float *v, VectorSize size, float invalid = 0.0f)145{146ApplyPrefixST(v, currentMIPS->vfpuCtrl[VFPU_CTRL_TPREFIX], size, invalid);147}148149void ApplyPrefixD(float *v, VectorSize size, bool onlyWriteMask = false)150{151u32 data = currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX];152if (!data || onlyWriteMask)153return;154int n = GetNumVectorElements(size);155for (int i = 0; i < n; i++)156{157int sat = (data >> (i * 2)) & 3;158if (sat == 1)159v[i] = vfpu_clamp(v[i], 0.0f, 1.0f);160else if (sat == 3)161v[i] = vfpu_clamp(v[i], -1.0f, 1.0f);162}163}164165static void RetainInvalidSwizzleST(float *d, VectorSize sz) {166// Somehow it's like a supernan, maybe wires through to zero?167// Doesn't apply to all ops.168int sPrefix = currentMIPS->vfpuCtrl[VFPU_CTRL_SPREFIX];169int tPrefix = currentMIPS->vfpuCtrl[VFPU_CTRL_TPREFIX];170int n = GetNumVectorElements(sz);171172// TODO: We can probably do some faster check of sPrefix and tPrefix to skip over this loop.173for (int i = 0; i < n; i++) {174int swizzleS = (sPrefix >> (i + i)) & 3;175int swizzleT = (tPrefix >> (i + i)) & 3;176int constS = (sPrefix >> (12 + i)) & 1;177int constT = (tPrefix >> (12 + i)) & 1;178if ((swizzleS >= n && !constS) || (swizzleT >= n && !constT))179d[i] = 0.0f;180}181}182183void EatPrefixes()184{185currentMIPS->vfpuCtrl[VFPU_CTRL_SPREFIX] = 0xe4; // passthru186currentMIPS->vfpuCtrl[VFPU_CTRL_TPREFIX] = 0xe4; // passthru187currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] = 0;188}189190namespace MIPSInt191{192void Int_VPFX(MIPSOpcode op)193{194int data = op & 0x000FFFFF;195int regnum = (op >> 24) & 3;196if (regnum == VFPU_CTRL_DPREFIX)197data &= 0x00000FFF;198currentMIPS->vfpuCtrl[VFPU_CTRL_SPREFIX + regnum] = data;199PC += 4;200}201202void Int_SVQ(MIPSOpcode op)203{204int imm = SignExtend16ToS32(op & 0xFFFC);205int rs = _RS;206int vt = (((op >> 16) & 0x1f)) | ((op&1) << 5);207208u32 addr = R(rs) + imm;209float *f;210const float *cf;211212switch (op >> 26)213{214case 53: //lvl.q/lvr.q215{216if (addr & 0x3)217{218_dbg_assert_msg_( 0, "Misaligned lvX.q at %08x (pc = %08x)", addr, PC);219}220float d[4];221ReadVector(d, V_Quad, vt);222int offset = (addr >> 2) & 3;223if ((op & 2) == 0)224{225// It's an LVL226for (int i = 0; i < offset + 1; i++)227{228d[3 - i] = Memory::Read_Float(addr - 4 * i);229}230}231else232{233// It's an LVR234for (int i = 0; i < (3 - offset) + 1; i++)235{236d[i] = Memory::Read_Float(addr + 4 * i);237}238}239WriteVector(d, V_Quad, vt);240}241break;242243case 54: //lv.q244if (addr & 0xF)245{246_dbg_assert_msg_( 0, "Misaligned lv.q at %08x (pc = %08x)", addr, PC);247}248#ifndef COMMON_BIG_ENDIAN249cf = reinterpret_cast<const float *>(Memory::GetPointerRange(addr, 16));250if (cf)251WriteVector(cf, V_Quad, vt);252#else253float lvqd[4];254255lvqd[0] = Memory::Read_Float(addr);256lvqd[1] = Memory::Read_Float(addr + 4);257lvqd[2] = Memory::Read_Float(addr + 8);258lvqd[3] = Memory::Read_Float(addr + 12);259260WriteVector(lvqd, V_Quad, vt);261#endif262break;263264case 61: // svl.q/svr.q265{266if (addr & 0x3)267{268_dbg_assert_msg_( 0, "Misaligned svX.q at %08x (pc = %08x)", addr, PC);269}270float d[4];271ReadVector(d, V_Quad, vt);272int offset = (addr >> 2) & 3;273if ((op&2) == 0)274{275// It's an SVL276for (int i = 0; i < offset + 1; i++)277{278Memory::Write_Float(d[3 - i], addr - i * 4);279}280}281else282{283// It's an SVR284for (int i = 0; i < (3 - offset) + 1; i++)285{286Memory::Write_Float(d[i], addr + 4 * i);287}288}289break;290}291292case 62: //sv.q293if (addr & 0xF)294{295_dbg_assert_msg_( 0, "Misaligned sv.q at %08x (pc = %08x)", addr, PC);296}297#ifndef COMMON_BIG_ENDIAN298f = reinterpret_cast<float *>(Memory::GetPointerWriteRange(addr, 16));299if (f)300ReadVector(f, V_Quad, vt);301#else302float svqd[4];303ReadVector(svqd, V_Quad, vt);304305Memory::Write_Float(svqd[0], addr);306Memory::Write_Float(svqd[1], addr + 4);307Memory::Write_Float(svqd[2], addr + 8);308Memory::Write_Float(svqd[3], addr + 12);309#endif310break;311312default:313_dbg_assert_msg_(false,"Trying to interpret VQ instruction that can't be interpreted");314break;315}316PC += 4;317}318319void Int_VMatrixInit(MIPSOpcode op) {320static const float idt[16] = {3211,0,0,0,3220,1,0,0,3230,0,1,0,3240,0,0,1,325};326static const float zero[16] = {3270,0,0,0,3280,0,0,0,3290,0,0,0,3300,0,0,0,331};332static const float one[16] = {3331,1,1,1,3341,1,1,1,3351,1,1,1,3361,1,1,1,337};338int vd = _VD;339MatrixSize sz = GetMtxSize(op);340const float *m;341342switch ((op >> 16) & 0xF) {343case 3: m=idt; break; //identity // vmidt344case 6: m=zero; break; // vmzero345case 7: m=one; break; // vmone346default:347_dbg_assert_msg_(false,"Trying to interpret instruction that can't be interpreted");348PC += 4;349EatPrefixes();350return;351}352353// The S prefix generates constants, but only for the final (possibly transposed) row.354if (currentMIPS->vfpuCtrl[VFPU_CTRL_SPREFIX] & 0xF0F00) {355float prefixed[16];356memcpy(prefixed, m, sizeof(prefixed));357358int off = GetMatrixSide(sz) - 1;359u32 sprefixRemove = VFPU_ANY_SWIZZLE();360u32 sprefixAdd = 0;361switch ((op >> 16) & 0xF) {362case 3:363{364VFPUConst constX = off == 0 ? VFPUConst::ONE : VFPUConst::ZERO;365VFPUConst constY = off == 1 ? VFPUConst::ONE : VFPUConst::ZERO;366VFPUConst constZ = off == 2 ? VFPUConst::ONE : VFPUConst::ZERO;367VFPUConst constW = off == 3 ? VFPUConst::ONE : VFPUConst::ZERO;368sprefixAdd = VFPU_MAKE_CONSTANTS(constX, constY, constZ, constW);369break;370}371case 6:372sprefixAdd = VFPU_MAKE_CONSTANTS(VFPUConst::ZERO, VFPUConst::ZERO, VFPUConst::ZERO, VFPUConst::ZERO);373break;374case 7:375sprefixAdd = VFPU_MAKE_CONSTANTS(VFPUConst::ONE, VFPUConst::ONE, VFPUConst::ONE, VFPUConst::ONE);376break;377default:378_dbg_assert_msg_( 0, "Unknown matrix init op");379break;380}381ApplyPrefixST(&prefixed[off * 4], VFPURewritePrefix(VFPU_CTRL_SPREFIX, sprefixRemove, sprefixAdd), V_Quad);382WriteMatrix(prefixed, sz, vd);383} else {384// Write mask applies to the final (maybe transposed) row. Sat causes hang.385WriteMatrix(m, sz, vd);386}387PC += 4;388EatPrefixes();389}390391void Int_VVectorInit(MIPSOpcode op)392{393int vd = _VD;394VectorSize sz = GetVecSize(op);395float d[4];396397VFPUConst constant = VFPUConst::ZERO;398switch ((op >> 16) & 0xF) {399case 6: constant = VFPUConst::ZERO; break; //vzero400case 7: constant = VFPUConst::ONE; break; //vone401default:402_dbg_assert_msg_( 0, "Trying to interpret instruction that can't be interpreted");403PC += 4;404EatPrefixes();405return;406}407408// The S prefix generates constants, but negate is still respected.409u32 sprefixRemove = VFPU_ANY_SWIZZLE();410u32 sprefixAdd = VFPU_MAKE_CONSTANTS(constant, constant, constant, constant);411ApplyPrefixST(d, VFPURewritePrefix(VFPU_CTRL_SPREFIX, sprefixRemove, sprefixAdd), sz);412413ApplyPrefixD(d, sz);414WriteVector(d, sz, vd);415416EatPrefixes();417PC += 4;418}419420void Int_Viim(MIPSOpcode op) {421int vt = _VT;422s32 imm = SignExtend16ToS32(op & 0xFFFF);423u16 uimm16 = (op&0xFFFF);424float f[1];425int type = (op >> 23) & 7;426if (type == 6) {427f[0] = (float)imm; // viim428} else if (type == 7) {429f[0] = Float16ToFloat32((u16)uimm16); // vfim430} else {431_dbg_assert_msg_( 0, "Invalid Viim opcode type %d", type);432f[0] = 0;433}434435ApplyPrefixD(f, V_Single);436WriteVector(f, V_Single, vt);437PC += 4;438EatPrefixes();439}440441void Int_Vidt(MIPSOpcode op) {442int vd = _VD;443VectorSize sz = GetVecSize(op);444float f[4];445446// The S prefix generates constants, but negate is still respected.447int offmask = sz == V_Quad || sz == V_Triple ? 3 : 1;448int off = vd & offmask;449// If it's a pair, the identity starts in a different position.450VFPUConst constX = off == (0 & offmask) ? VFPUConst::ONE : VFPUConst::ZERO;451VFPUConst constY = off == (1 & offmask) ? VFPUConst::ONE : VFPUConst::ZERO;452VFPUConst constZ = off == (2 & offmask) ? VFPUConst::ONE : VFPUConst::ZERO;453VFPUConst constW = off == (3 & offmask) ? VFPUConst::ONE : VFPUConst::ZERO;454455u32 sprefixRemove = VFPU_ANY_SWIZZLE();456u32 sprefixAdd = VFPU_MAKE_CONSTANTS(constX, constY, constZ, constW);457ApplyPrefixST(f, VFPURewritePrefix(VFPU_CTRL_SPREFIX, sprefixRemove, sprefixAdd), sz);458459ApplyPrefixD(f, sz);460WriteVector(f, sz, vd);461PC += 4;462EatPrefixes();463}464465// The test really needs some work.466void Int_Vmmul(MIPSOpcode op) {467float s[16]{}, t[16]{}, d[16];468469int vd = _VD;470int vs = _VS;471int vt = _VT;472MatrixSize sz = GetMtxSize(op);473int n = GetMatrixSide(sz);474475ReadMatrix(s, sz, vs);476ReadMatrix(t, sz, vt);477478// TODO: Always use the more accurate path in interpreter?479bool useAccurateDot = USE_VFPU_DOT || PSP_CoreParameter().compat.flags().MoreAccurateVMMUL;480for (int a = 0; a < n; a++) {481for (int b = 0; b < n; b++) {482union { float f; uint32_t u; } sum = { 0.0f };483if (a == n - 1 && b == n - 1) {484// S and T prefixes work on the final (or maybe first, in reverse?) dot.485ApplySwizzleS(&s[b * 4], V_Quad);486ApplySwizzleT(&t[a * 4], V_Quad);487}488489if (useAccurateDot) {490sum.f = vfpu_dot(&s[b * 4], &t[a * 4]);491if (my_isnan(sum.f)) {492sum.u = 0x7f800001;493} else if ((sum.u & 0x7F800000) == 0) {494sum.u &= 0xFF800000;495}496} else {497if (a == n - 1 && b == n - 1) {498for (int c = 0; c < 4; c++) {499sum.f += s[b * 4 + c] * t[a * 4 + c];500}501} else {502for (int c = 0; c < n; c++) {503sum.f += s[b * 4 + c] * t[a * 4 + c];504}505}506}507508d[a * 4 + b] = sum.f;509}510}511512// The D prefix applies ONLY to the final element, but sat does work.513u32 lastmask = (currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] & (1 << 8)) << (n - 1);514u32 lastsat = (currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] & 3) << (n + n - 2);515currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] = lastmask | lastsat;516ApplyPrefixD(&d[4 * (n - 1)], V_Quad, false);517WriteMatrix(d, sz, vd);518PC += 4;519EatPrefixes();520}521522void Int_Vmscl(MIPSOpcode op) {523float s[16]{}, t[4]{}, d[16];524525int vd = _VD;526int vs = _VS;527int vt = _VT;528MatrixSize sz = GetMtxSize(op);529int n = GetMatrixSide(sz);530531ReadMatrix(s, sz, vs);532ReadVector(t, V_Single, vt);533534for (int a = 0; a < n - 1; a++) {535for (int b = 0; b < n; b++) {536d[a * 4 + b] = s[a * 4 + b] * t[0];537}538}539540// S prefix applies to the last row.541ApplySwizzleS(&s[(n - 1) * 4], V_Quad);542// T prefix applies only for the last row, and is used per element.543// This is like vscl, but instead of zzzz it uses xxxx.544int tlane = (vt >> 5) & 3;545t[tlane] = t[0];546u32 tprefixRemove = VFPU_ANY_SWIZZLE();547u32 tprefixAdd = VFPU_SWIZZLE(tlane, tlane, tlane, tlane);548ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), V_Quad);549550for (int b = 0; b < n; b++) {551d[(n - 1) * 4 + b] = s[(n - 1) * 4 + b] * t[b];552}553554// The D prefix is applied to the last row.555ApplyPrefixD(&d[(n - 1) * 4], V_Quad);556WriteMatrix(d, sz, vd);557PC += 4;558EatPrefixes();559}560561void Int_Vmmov(MIPSOpcode op) {562float s[16]{};563int vd = _VD;564int vs = _VS;565MatrixSize sz = GetMtxSize(op);566ReadMatrix(s, sz, vs);567// S and D prefixes are applied to the last row.568int off = GetMatrixSide(sz) - 1;569ApplySwizzleS(&s[off * 4], V_Quad);570ApplyPrefixD(&s[off * 4], V_Quad);571WriteMatrix(s, sz, vd);572PC += 4;573EatPrefixes();574}575576void Int_Vflush(MIPSOpcode op)577{578VERBOSE_LOG(Log::CPU, "vflush");579PC += 4;580// Anything with 0xFC000000 is a nop, but only 0xFFFF0000 retains prefixes.581if ((op & 0xFFFF0000) != 0xFFFF0000)582EatPrefixes();583}584585void Int_VV2Op(MIPSOpcode op) {586float s[4], d[4];587int vd = _VD;588int vs = _VS;589int optype = (op >> 16) & 0x1f;590VectorSize sz = GetVecSize(op);591u32 n = GetNumVectorElements(sz);592ReadVector(s, sz, vs);593// Some of these are prefix hacks (affects constants, etc.)594switch (optype) {595case 1:596ApplyPrefixST(s, VFPURewritePrefix(VFPU_CTRL_SPREFIX, 0, VFPU_ABS(1, 1, 1, 1)), sz);597break;598case 2:599ApplyPrefixST(s, VFPURewritePrefix(VFPU_CTRL_SPREFIX, 0, VFPU_NEGATE(1, 1, 1, 1)), sz);600break;601case 16:602case 17:603case 18:604case 19:605case 20:606case 21:607case 22:608case 23:609// Similar to vdiv. Some of the behavior using the invalid constant is iffy.610ApplySwizzleS(&s[n - 1], V_Single, INFINITY);611break;612case 24:613case 26:614// Similar to above, but also ignores negate.615ApplyPrefixST(&s[n - 1], VFPURewritePrefix(VFPU_CTRL_SPREFIX, VFPU_NEGATE(1, 0, 0, 0), 0), V_Single, -INFINITY);616break;617case 28:618// Similar to above, but also ignores negate.619ApplyPrefixST(&s[n - 1], VFPURewritePrefix(VFPU_CTRL_SPREFIX, VFPU_NEGATE(1, 0, 0, 0), 0), V_Single, INFINITY);620break;621default:622ApplySwizzleS(s, sz);623break;624}625for (int i = 0; i < (int)n; i++) {626switch (optype) {627case 0: d[i] = s[i]; break; //vmov628case 1: d[i] = s[i]; break; //vabs (prefix)629case 2: d[i] = s[i]; break; //vneg (prefix)630// vsat0 changes -0.0 to +0.0, both retain NAN.631case 4: if (s[i] <= 0) d[i] = 0; else {if(s[i] > 1.0f) d[i] = 1.0f; else d[i] = s[i];} break; // vsat0632case 5: if (s[i] < -1.0f) d[i] = -1.0f; else {if(s[i] > 1.0f) d[i] = 1.0f; else d[i] = s[i];} break; // vsat1633case 16: { d[i] = vfpu_rcp(s[i]); } break; //vrcp634case 17: d[i] = USE_VFPU_SQRT ? vfpu_rsqrt(s[i]) : 1.0f / sqrtf(s[i]); break; //vrsq635636case 18: { d[i] = vfpu_sin(s[i]); } break; //vsin637case 19: { d[i] = vfpu_cos(s[i]); } break; //vcos638case 20: { d[i] = vfpu_exp2(s[i]); } break; //vexp2639case 21: { d[i] = vfpu_log2(s[i]); } break; //vlog2640case 22: d[i] = USE_VFPU_SQRT ? vfpu_sqrt(s[i]) : fabsf(sqrtf(s[i])); break; //vsqrt641case 23: { d[i] = vfpu_asin(s[i]); } break; //vasin642case 24: { d[i] = -vfpu_rcp(s[i]); } break; // vnrcp643case 26: { d[i] = -vfpu_sin(s[i]); } break; // vnsin644case 28: { d[i] = vfpu_rexp2(s[i]); } break; // vrexp2645default:646_dbg_assert_msg_( false, "Invalid VV2Op op type %d", optype);647break;648}649}650// vsat1 is a prefix hack, so 0:1 doesn't apply. Others don't process sat at all.651switch (optype) {652case 5:653ApplyPrefixD(d, sz, true);654break;655case 16:656case 17:657case 18:658case 19:659case 20:660case 21:661case 22:662case 23:663case 24:664case 26:665case 28:666{667// Only the last element gets the mask applied.668u32 lastmask = (currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] & (1 << 8)) << (n - 1);669u32 lastsat = (currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] & 3) << (n + n - 2);670currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] = lastmask | lastsat;671ApplyPrefixD(d, sz);672break;673}674default:675ApplyPrefixD(d, sz);676}677WriteVector(d, sz, vd);678PC += 4;679EatPrefixes();680}681682void Int_Vocp(MIPSOpcode op) {683float s[4], t[4], d[4];684int vd = _VD;685int vs = _VS;686VectorSize sz = GetVecSize(op);687ReadVector(s, sz, vs);688689// S prefix forces the negate flags.690u32 sprefixAdd = VFPU_NEGATE(1, 1, 1, 1);691ApplyPrefixST(s, VFPURewritePrefix(VFPU_CTRL_SPREFIX, 0, sprefixAdd), sz);692693// T prefix forces constants on and regnum to 1.694// That means negate still works, and abs activates a different constant.695u32 tprefixRemove = VFPU_ANY_SWIZZLE();696u32 tprefixAdd = VFPU_MAKE_CONSTANTS(VFPUConst::ONE, VFPUConst::ONE, VFPUConst::ONE, VFPUConst::ONE);697ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), sz);698699for (int i = 0; i < GetNumVectorElements(sz); i++) {700// Always positive NaN. Note that s is always negated from the registers.701d[i] = my_isnan(s[i]) ? fabsf(s[i]) : t[i] + s[i];702}703RetainInvalidSwizzleST(d, sz);704ApplyPrefixD(d, sz);705WriteVector(d, sz, vd);706PC += 4;707EatPrefixes();708}709710void Int_Vsocp(MIPSOpcode op) {711float s[4], t[4], d[4];712int vd = _VD;713int vs = _VS;714VectorSize sz = GetVecSize(op);715VectorSize outSize = GetDoubleVectorSizeSafe(sz);716if (outSize == V_Invalid)717outSize = V_Quad;718ReadVector(s, sz, vs);719720// S prefix forces negate in even/odd and xxyy swizzle.721// abs works, and applies to final position (not source.)722u32 sprefixRemove = VFPU_ANY_SWIZZLE() | VFPU_NEGATE(1, 1, 1, 1);723u32 sprefixAdd = VFPU_SWIZZLE(0, 0, 1, 1) | VFPU_NEGATE(1, 0, 1, 0);724ApplyPrefixST(s, VFPURewritePrefix(VFPU_CTRL_SPREFIX, sprefixRemove, sprefixAdd), outSize);725726// T prefix forces constants on and regnum to 1, 0, 1, 0.727// That means negate still works, and abs activates a different constant.728u32 tprefixRemove = VFPU_ANY_SWIZZLE();729u32 tprefixAdd = VFPU_MAKE_CONSTANTS(VFPUConst::ONE, VFPUConst::ZERO, VFPUConst::ONE, VFPUConst::ZERO);730ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), outSize);731732// Essentially D prefix saturation is forced.733d[0] = nanclamp(t[0] + s[0], 0.0f, 1.0f);734d[1] = nanclamp(t[1] + s[1], 0.0f, 1.0f);735if (outSize == V_Quad) {736d[2] = nanclamp(t[2] + s[2], 0.0f, 1.0f);737d[3] = nanclamp(t[3] + s[3], 0.0f, 1.0f);738}739ApplyPrefixD(d, sz, true);740WriteVector(d, outSize, vd);741PC += 4;742EatPrefixes();743}744745void Int_Vsgn(MIPSOpcode op) {746float s[4], t[4], d[4];747int vd = _VD;748int vs = _VS;749VectorSize sz = GetVecSize(op);750ReadVector(s, sz, vs);751752// Not sure who would do this, but using abs/neg allows a compare against 3 or -3.753u32 tprefixRemove = VFPU_ANY_SWIZZLE();754u32 tprefixAdd = VFPU_MAKE_CONSTANTS(VFPUConst::ZERO, VFPUConst::ZERO, VFPUConst::ZERO, VFPUConst::ZERO);755ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), sz);756757int n = GetNumVectorElements(sz);758if (n < 4) {759// Compare with a swizzled value out of bounds always produces 0.760memcpy(&s[n], &t[n], sizeof(float) * (4 - n));761}762ApplySwizzleS(s, V_Quad);763764for (int i = 0; i < n; i++) {765float diff = s[i] - t[i];766// To handle NaNs correctly, we do this with integer hackery767u32 val;768memcpy(&val, &diff, sizeof(u32));769if (val == 0 || val == 0x80000000)770d[i] = 0.0f;771else if ((val >> 31) == 0)772d[i] = 1.0f;773else774d[i] = -1.0f;775}776ApplyPrefixD(d, sz);777WriteVector(d, sz, vd);778PC += 4;779EatPrefixes();780}781782inline int round_vfpu_n(double param) {783// return floorf(param);784return (int)round_ieee_754(param);785}786787void Int_Vf2i(MIPSOpcode op) {788float s[4];789int d[4];790int vd = _VD;791int vs = _VS;792int imm = (op >> 16) & 0x1f;793float mult = (float)(1UL << imm);794VectorSize sz = GetVecSize(op);795ReadVector(s, sz, vs);796// Negate, abs, and constants apply as you'd expect to the bits.797ApplySwizzleS(s, sz);798for (int i = 0; i < GetNumVectorElements(sz); i++) {799if (my_isnan(s[i])) {800d[i] = 0x7FFFFFFF;801continue;802}803double sv = s[i] * mult; // (float)0x7fffffff == (float)0x80000000804// Cap/floor it to 0x7fffffff / 0x80000000805if (sv > (double)0x7fffffff) {806d[i] = 0x7fffffff;807} else if (sv <= (double)(int)0x80000000) {808d[i] = 0x80000000;809} else {810switch ((op >> 21) & 0x1f)811{812case 16: d[i] = (int)round_vfpu_n(sv); break; //(floor(sv + 0.5f)); break; //n813case 17: d[i] = s[i]>=0 ? (int)floor(sv) : (int)ceil(sv); break; //z814case 18: d[i] = (int)ceil(sv); break; //u815case 19: d[i] = (int)floor(sv); break; //d816default: d[i] = 0x7FFFFFFF; break;817}818}819}820// Does not apply sat, but does apply mask.821ApplyPrefixD(reinterpret_cast<float *>(d), sz, true);822WriteVector(reinterpret_cast<float *>(d), sz, vd);823PC += 4;824EatPrefixes();825}826827void Int_Vi2f(MIPSOpcode op) {828int s[4];829float d[4];830int vd = _VD;831int vs = _VS;832int imm = (op >> 16) & 0x1f;833float mult = 1.0f/(float)(1UL << imm);834VectorSize sz = GetVecSize(op);835ReadVector(reinterpret_cast<float *>(s), sz, vs);836// Negate, abs, and constants apply as you'd expect to the bits.837ApplySwizzleS(reinterpret_cast<float *>(s), sz);838for (int i = 0; i < GetNumVectorElements(sz); i++) {839d[i] = (float)s[i] * mult;840}841// Sat and mask apply normally.842ApplyPrefixD(d, sz);843WriteVector(d, sz, vd);844PC += 4;845EatPrefixes();846}847848void Int_Vh2f(MIPSOpcode op) {849u32 s[4];850float d[4];851int vd = _VD;852int vs = _VS;853VectorSize sz = GetVecSize(op);854ReadVector(reinterpret_cast<float *>(s), sz, vs);855ApplySwizzleS(reinterpret_cast<float *>(s), sz);856857VectorSize outsize = V_Pair;858switch (sz) {859case V_Single:860outsize = V_Pair;861d[0] = ExpandHalf(s[0] & 0xFFFF);862d[1] = ExpandHalf(s[0] >> 16);863break;864case V_Pair:865default:866// All other sizes are treated the same.867outsize = V_Quad;868d[0] = ExpandHalf(s[0] & 0xFFFF);869d[1] = ExpandHalf(s[0] >> 16);870d[2] = ExpandHalf(s[1] & 0xFFFF);871d[3] = ExpandHalf(s[1] >> 16);872break;873}874ApplyPrefixD(d, outsize);875WriteVector(d, outsize, vd);876PC += 4;877EatPrefixes();878}879880void Int_Vf2h(MIPSOpcode op) {881float s[4]{};882u32 d[4];883int vd = _VD;884int vs = _VS;885VectorSize sz = GetVecSize(op);886ReadVector(s, sz, vs);887// Swizzle can cause V_Single to properly write both components.888ApplySwizzleS(s, V_Quad);889// Negate should not actually apply to invalid swizzle.890RetainInvalidSwizzleST(s, V_Quad);891892VectorSize outsize = V_Single;893switch (sz) {894case V_Single:895case V_Pair:896outsize = V_Single;897d[0] = ShrinkToHalf(s[0]) | ((u32)ShrinkToHalf(s[1]) << 16);898break;899case V_Triple:900case V_Quad:901outsize = V_Pair;902d[0] = ShrinkToHalf(s[0]) | ((u32)ShrinkToHalf(s[1]) << 16);903d[1] = ShrinkToHalf(s[2]) | ((u32)ShrinkToHalf(s[3]) << 16);904break;905906default:907ERROR_LOG_REPORT(Log::CPU, "vf2h with invalid elements");908break;909}910ApplyPrefixD(reinterpret_cast<float *>(d), outsize);911WriteVector(reinterpret_cast<float *>(d), outsize, vd);912PC += 4;913EatPrefixes();914}915916void Int_Vx2i(MIPSOpcode op) {917u32 s[4], d[4]{};918int vd = _VD;919int vs = _VS;920VectorSize sz = GetVecSize(op);921VectorSize oz = sz;922ReadVector(reinterpret_cast<float *>(s), sz, vs);923ApplySwizzleS(reinterpret_cast<float *>(s), sz);924925// TODO: Similar to colorconv, invalid swizzle seems to reuse last output.926switch ((op >> 16) & 3) {927case 0: // vuc2i928// Quad is the only option.929// This converts 8-bit unsigned to 31-bit signed, swizzling to saturate.930// Similar to 5-bit to 8-bit color swizzling, but clamping to INT_MAX.931{932u32 value = s[0];933for (int i = 0; i < 4; i++) {934d[i] = (u32)((u32)(value & 0xFF) * 0x01010101UL) >> 1;935value >>= 8;936}937oz = V_Quad;938}939break;940941case 1: // vc2i942// Quad is the only option943// Unlike vuc2i, the source and destination are signed so there is no shift.944// It lacks the swizzle because of negative values.945{946u32 value = s[0];947d[0] = (value & 0xFF) << 24;948d[1] = (value & 0xFF00) << 16;949d[2] = (value & 0xFF0000) << 8;950d[3] = (value & 0xFF000000);951oz = V_Quad;952}953break;954955case 2: // vus2i956// Note: for some reason, this skips swizzle such that 0xFFFF -> 0x7FFF8000 unlike vuc2i.957oz = V_Pair;958switch (sz) {959case V_Quad:960case V_Triple:961sz = V_Pair;962// Intentional fallthrough.963case V_Pair:964oz = V_Quad;965// Intentional fallthrough.966case V_Single:967for (int i = 0; i < GetNumVectorElements(sz); i++) {968u32 value = s[i];969d[i * 2] = (value & 0xFFFF) << 15;970d[i * 2 + 1] = (value & 0xFFFF0000) >> 1;971}972break;973974default:975ERROR_LOG_REPORT(Log::CPU, "vus2i with more than 2 elements");976break;977}978break;979980case 3: // vs2i981oz = V_Pair;982switch (sz) {983case V_Quad:984case V_Triple:985sz = V_Pair;986// Intentional fallthrough.987case V_Pair:988oz = V_Quad;989// Intentional fallthrough.990case V_Single:991for (int i = 0; i < GetNumVectorElements(sz); i++) {992u32 value = s[i];993d[i * 2] = (value & 0xFFFF) << 16;994d[i * 2 + 1] = value & 0xFFFF0000;995}996break;997998default:999ERROR_LOG_REPORT(Log::CPU, "vs2i with more than 2 elements");1000break;1001}1002break;10031004default:1005_dbg_assert_msg_( false, "Trying to interpret instruction that can't be interpreted");1006break;1007}10081009// Saturation does in fact apply.1010ApplyPrefixD(reinterpret_cast<float *>(d),oz);1011WriteVector(reinterpret_cast<float *>(d), oz, vd);1012PC += 4;1013EatPrefixes();1014}10151016void Int_Vi2x(MIPSOpcode op) {1017int s[4]{};1018u32 d[2]{};1019const int vd = _VD;1020const int vs = _VS;1021const VectorSize sz = GetVecSize(op);1022VectorSize oz;1023ReadVector(reinterpret_cast<float *>(s), sz, vs);1024// Negate, const, etc. apply as expected.1025ApplySwizzleS(reinterpret_cast<float *>(s), V_Quad);10261027// TODO: Similar to colorconv, invalid swizzle seems to reuse last output.1028switch ((op >> 16) & 3) {1029case 0: //vi2uc1030for (int i = 0; i < 4; i++) {1031int v = s[i];1032if (v < 0) v = 0;1033v >>= 23;1034d[0] |= ((u32)v & 0xFF) << (i * 8);1035}1036oz = V_Single;1037break;10381039case 1: //vi2c1040for (int i = 0; i < 4; i++) {1041u32 v = s[i];1042d[0] |= (v >> 24) << (i * 8);1043}1044oz = V_Single;1045break;10461047case 2: //vi2us1048{1049int elems = (GetNumVectorElements(sz) + 1) / 2;1050for (int i = 0; i < elems; i++) {1051int low = s[i * 2];1052int high = s[i * 2 + 1];1053if (low < 0) low = 0;1054if (high < 0) high = 0;1055low >>= 15;1056high >>= 15;1057d[i] = low | (high << 16);1058}1059switch (sz) {1060case V_Quad: oz = V_Pair; break;1061case V_Triple: oz = V_Pair; break;1062case V_Pair: oz = V_Single; break;1063case V_Single: oz = V_Single; break;1064default:1065_dbg_assert_msg_( false, "Trying to interpret instruction that can't be interpreted");1066oz = V_Single;1067break;1068}1069break;1070}1071case 3: //vi2s1072{1073int elems = (GetNumVectorElements(sz) + 1) / 2;1074for (int i = 0; i < elems; i++) {1075u32 low = s[i * 2];1076u32 high = s[i * 2 + 1];1077low >>= 16;1078high >>= 16;1079d[i] = low | (high << 16);1080}1081switch (sz) {1082case V_Quad: oz = V_Pair; break;1083case V_Triple: oz = V_Pair; break;1084case V_Pair: oz = V_Single; break;1085case V_Single: oz = V_Single; break;1086default:1087_dbg_assert_msg_(0, "Trying to interpret instruction that can't be interpreted");1088oz = V_Single;1089break;1090}1091break;1092}1093default:1094_dbg_assert_msg_( 0, "Trying to interpret instruction that can't be interpreted");1095oz = V_Single;1096break;1097}1098// D prefix applies as expected.1099ApplyPrefixD(reinterpret_cast<float *>(d), oz);1100WriteVector(reinterpret_cast<float *>(d), oz, vd);1101PC += 4;1102EatPrefixes();1103}11041105void Int_ColorConv(MIPSOpcode op)1106{1107int vd = _VD;1108int vs = _VS;1109u32 s[4];1110VectorSize isz = GetVecSize(op);1111VectorSize sz = V_Quad;1112ReadVector(reinterpret_cast<float *>(s), sz, vs);1113ApplySwizzleS(reinterpret_cast<float *>(s), sz);1114u16 colors[4];1115// TODO: Invalid swizzle values almost seem to use the last value converted in a1116// previous execution of these ops. It's a bit odd.1117for (int i = 0; i < 4; i++)1118{1119u32 in = s[i];1120u16 col = 0;1121switch ((op >> 16) & 3)1122{1123case 1: // 44441124{1125int a = ((in >> 24) & 0xFF) >> 4;1126int b = ((in >> 16) & 0xFF) >> 4;1127int g = ((in >> 8) & 0xFF) >> 4;1128int r = ((in) & 0xFF) >> 4;1129col = (a << 12) | (b << 8) | (g << 4) | (r);1130break;1131}1132case 2: // 55511133{1134int a = ((in >> 24) & 0xFF) >> 7;1135int b = ((in >> 16) & 0xFF) >> 3;1136int g = ((in >> 8) & 0xFF) >> 3;1137int r = ((in) & 0xFF) >> 3;1138col = (a << 15) | (b << 10) | (g << 5) | (r);1139break;1140}1141case 3: // 5651142{1143int b = ((in >> 16) & 0xFF) >> 3;1144int g = ((in >> 8) & 0xFF) >> 2;1145int r = ((in) & 0xFF) >> 3;1146col = (b << 11) | (g << 5) | (r);1147break;1148}1149}1150colors[i] = col;1151}1152u32 ov[2] = {(u32)colors[0] | (colors[1] << 16), (u32)colors[2] | (colors[3] << 16)};1153ApplyPrefixD(reinterpret_cast<float *>(ov), V_Pair);1154WriteVector((const float *)ov, isz == V_Single ? V_Single : V_Pair, vd);1155PC += 4;1156EatPrefixes();1157}11581159void Int_VDot(MIPSOpcode op) {1160float s[4]{}, t[4]{};1161union { float f; uint32_t u; } d;1162int vd = _VD;1163int vs = _VS;1164int vt = _VT;1165VectorSize sz = GetVecSize(op);1166ReadVector(s, sz, vs);1167ApplySwizzleS(s, V_Quad);1168ReadVector(t, sz, vt);1169ApplySwizzleT(t, V_Quad);11701171if (USE_VFPU_DOT) {1172d.f = vfpu_dot(s, t);1173if (my_isnan(d.f)) {1174d.u = 0x7f800001;1175} else if ((d.u & 0x7F800000) == 0) {1176d.u &= 0xFF800000;1177}1178} else {1179d.f = 0.0f;1180for (int i = 0; i < 4; i++) {1181d.f += s[i] * t[i];1182}1183}11841185ApplyPrefixD(&d.f, V_Single);1186WriteVector(&d.f, V_Single, vd);1187PC += 4;1188EatPrefixes();1189}11901191void Int_VHdp(MIPSOpcode op) {1192float s[4]{}, t[4]{};1193float d;1194int vd = _VD;1195int vs = _VS;1196int vt = _VT;1197VectorSize sz = GetVecSize(op);1198ReadVector(s, sz, vs);1199ReadVector(t, sz, vt);1200ApplySwizzleT(t, V_Quad);12011202// S prefix forces constant 1 for the last element (w for quad.)1203// Otherwise it is the same as vdot.1204u32 sprefixRemove;1205u32 sprefixAdd;1206if (sz == V_Quad) {1207sprefixRemove = VFPU_SWIZZLE(0, 0, 0, 3);1208sprefixAdd = VFPU_MAKE_CONSTANTS(VFPUConst::NONE, VFPUConst::NONE, VFPUConst::NONE, VFPUConst::ONE);1209} else if (sz == V_Triple) {1210sprefixRemove = VFPU_SWIZZLE(0, 0, 3, 0);1211sprefixAdd = VFPU_MAKE_CONSTANTS(VFPUConst::NONE, VFPUConst::NONE, VFPUConst::ONE, VFPUConst::NONE);1212} else if (sz == V_Pair) {1213sprefixRemove = VFPU_SWIZZLE(0, 3, 0, 0);1214sprefixAdd = VFPU_MAKE_CONSTANTS(VFPUConst::NONE, VFPUConst::ONE, VFPUConst::NONE, VFPUConst::NONE);1215} else {1216sprefixRemove = VFPU_SWIZZLE(3, 0, 0, 0);1217sprefixAdd = VFPU_MAKE_CONSTANTS(VFPUConst::ONE, VFPUConst::NONE, VFPUConst::NONE, VFPUConst::NONE);1218}1219ApplyPrefixST(s, VFPURewritePrefix(VFPU_CTRL_SPREFIX, sprefixRemove, sprefixAdd), V_Quad);12201221float sum = 0.0f;1222if (USE_VFPU_DOT) {1223sum = vfpu_dot(s, t);1224} else {1225for (int i = 0; i < 4; i++) {1226sum += s[i] * t[i];1227}1228}1229d = my_isnan(sum) ? fabsf(sum) : sum;1230ApplyPrefixD(&d, V_Single);1231WriteVector(&d, V_Single, vd);1232PC += 4;1233EatPrefixes();1234}12351236void Int_Vbfy(MIPSOpcode op) {1237float s[4]{}, t[4]{}, d[4];1238int vd = _VD;1239int vs = _VS;1240VectorSize sz = GetVecSize(op);1241ReadVector(s, sz, vs);1242ReadVector(t, sz, vs);12431244if (op & 0x10000) {1245// vbfy21246// S prefix forces the negate flags (so z and w are negative.)1247u32 sprefixAdd = VFPU_NEGATE(0, 0, 1, 1);1248ApplyPrefixST(s, VFPURewritePrefix(VFPU_CTRL_SPREFIX, 0, sprefixAdd), sz);12491250// T prefix forces swizzle (zwxy.)1251// That means negate still works, but constants are a bit weird.1252u32 tprefixRemove = VFPU_ANY_SWIZZLE();1253u32 tprefixAdd = VFPU_SWIZZLE(2, 3, 0, 1);1254ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), sz);12551256// Other sizes don't seem completely predictable.1257if (sz != V_Quad) {1258ERROR_LOG_REPORT_ONCE(vbfy2, Log::CPU, "vfby2 with incorrect size");1259}1260} else {1261// vbfy11262// S prefix forces the negate flags (so y and w are negative.)1263u32 sprefixAdd = VFPU_NEGATE(0, 1, 0, 1);1264ApplyPrefixST(s, VFPURewritePrefix(VFPU_CTRL_SPREFIX, 0, sprefixAdd), sz);12651266// T prefix forces swizzle (yxwz.)1267// That means negate still works, but constants are a bit weird.1268u32 tprefixRemove = VFPU_ANY_SWIZZLE();1269u32 tprefixAdd = VFPU_SWIZZLE(1, 0, 3, 2);1270ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), sz);12711272if (sz != V_Quad && sz != V_Pair) {1273ERROR_LOG_REPORT_ONCE(vbfy2, Log::CPU, "vfby1 with incorrect size");1274}1275}12761277d[0] = s[0] + t[0];1278d[1] = s[1] + t[1];1279d[2] = s[2] + t[2];1280d[3] = s[3] + t[3];12811282ApplyPrefixD(d, sz);1283WriteVector(d, sz, vd);1284PC += 4;1285EatPrefixes();1286}12871288void Int_Vsrt1(MIPSOpcode op) {1289float s[4], t[4], d[4];1290int vd = _VD;1291int vs = _VS;1292VectorSize sz = GetVecSize(op);1293ReadVector(s, sz, vs);1294ApplySwizzleS(s, sz);1295ReadVector(t, sz, vs);12961297// T is force swizzled to yxwz from S.1298u32 tprefixRemove = VFPU_SWIZZLE(3, 3, 3, 3);1299u32 tprefixAdd = VFPU_SWIZZLE(1, 0, 3, 2);1300ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), sz);13011302// TODO: May mishandle NAN / negative zero / etc.1303d[0] = std::min(s[0], t[0]);1304d[1] = std::max(s[1], t[1]);1305d[2] = std::min(s[2], t[2]);1306d[3] = std::max(s[3], t[3]);1307RetainInvalidSwizzleST(d, sz);1308ApplyPrefixD(d, sz);1309WriteVector(d, sz, vd);1310PC += 4;1311EatPrefixes();1312}13131314void Int_Vsrt2(MIPSOpcode op) {1315float s[4], t[4], d[4];1316int vd = _VD;1317int vs = _VS;1318VectorSize sz = GetVecSize(op);1319ReadVector(s, sz, vs);1320ApplySwizzleS(s, sz);1321ReadVector(t, sz, vs);13221323// T is force swizzled to wzyx from S.1324u32 tprefixRemove = VFPU_SWIZZLE(3, 3, 3, 3);1325u32 tprefixAdd = VFPU_SWIZZLE(3, 2, 1, 0);1326ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), sz);13271328// TODO: May mishandle NAN / negative zero / etc.1329d[0] = std::min(s[0], t[0]);1330d[1] = std::min(s[1], t[1]);1331d[2] = std::max(s[2], t[2]);1332d[3] = std::max(s[3], t[3]);1333RetainInvalidSwizzleST(d, sz);1334ApplyPrefixD(d, sz);1335WriteVector(d, sz, vd);1336PC += 4;1337EatPrefixes();1338}13391340void Int_Vsrt3(MIPSOpcode op) {1341float s[4], t[4], d[4];1342int vd = _VD;1343int vs = _VS;1344VectorSize sz = GetVecSize(op);1345ReadVector(s, sz, vs);1346ApplySwizzleS(s, sz);1347ReadVector(t, sz, vs);13481349// T is force swizzled to yxwz from S.1350u32 tprefixRemove = VFPU_SWIZZLE(3, 3, 3, 3);1351u32 tprefixAdd = VFPU_SWIZZLE(1, 0, 3, 2);1352ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), sz);13531354// TODO: May mishandle NAN / negative zero / etc.1355d[0] = std::max(s[0], t[0]);1356d[1] = std::min(s[1], t[1]);1357d[2] = std::max(s[2], t[2]);1358d[3] = std::min(s[3], t[3]);1359RetainInvalidSwizzleST(d, sz);1360ApplyPrefixD(d, sz);1361WriteVector(d, sz, vd);1362PC += 4;1363EatPrefixes();1364}13651366void Int_Vsrt4(MIPSOpcode op) {1367float s[4], t[4], d[4];1368int vd = _VD;1369int vs = _VS;1370VectorSize sz = GetVecSize(op);1371ReadVector(s, sz, vs);1372ApplySwizzleS(s, sz);1373ReadVector(t, sz, vs);13741375// T is force swizzled to wzyx from S.1376u32 tprefixRemove = VFPU_SWIZZLE(3, 3, 3, 3);1377u32 tprefixAdd = VFPU_SWIZZLE(3, 2, 1, 0);1378ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), sz);13791380// TODO: May mishandle NAN / negative zero / etc.1381d[0] = std::max(s[0], t[0]);1382d[1] = std::max(s[1], t[1]);1383d[2] = std::min(s[2], t[2]);1384d[3] = std::min(s[3], t[3]);1385RetainInvalidSwizzleST(d, sz);1386ApplyPrefixD(d, sz);1387WriteVector(d, sz, vd);1388PC += 4;1389EatPrefixes();1390}13911392void Int_Vcrs(MIPSOpcode op) {1393//half a cross product1394float s[4]{}, t[4]{}, d[4];1395int vd = _VD;1396int vs = _VS;1397int vt = _VT;1398VectorSize sz = GetVecSize(op);1399ReadVector(s, sz, vs);1400ReadVector(t, sz, vt);14011402// S prefix forces swizzle (yzx?.)1403// That means negate still works, but constants are a bit weird.1404u32 sprefixRemove = VFPU_SWIZZLE(3, 3, 3, 0);1405u32 sprefixAdd = VFPU_SWIZZLE(1, 2, 0, 0);1406ApplyPrefixST(s, VFPURewritePrefix(VFPU_CTRL_SPREFIX, sprefixRemove, sprefixAdd), sz);14071408// T prefix forces swizzle (zxy?.)1409u32 tprefixRemove = VFPU_SWIZZLE(3, 3, 3, 0);1410u32 tprefixAdd = VFPU_SWIZZLE(2, 0, 1, 0);1411ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), sz);14121413d[0] = s[0] * t[0];1414d[1] = s[1] * t[1];1415d[2] = s[2] * t[2];1416d[3] = s[3] * t[3];1417ApplyPrefixD(d, sz);1418WriteVector(d, sz, vd);1419PC += 4;1420EatPrefixes();1421}14221423void Int_Vdet(MIPSOpcode op) {1424float s[4]{}, t[4]{}, d[4];1425int vd = _VD;1426int vs = _VS;1427int vt = _VT;1428VectorSize sz = GetVecSize(op);1429// This is normally V_Pair. Unfilled s/t values are treated as zero.1430ReadVector(s, sz, vs);1431ApplySwizzleS(s, V_Quad);1432ReadVector(t, sz, vt);14331434// T prefix forces swizzle for x and y (yx??.)1435// That means negate still works, but constants are a bit weird.1436// Note: there is no forced negation here.1437u32 tprefixRemove = VFPU_SWIZZLE(3, 3, 0, 0);1438u32 tprefixAdd = VFPU_SWIZZLE(1, 0, 0, 0);1439ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), V_Quad);14401441if (USE_VFPU_DOT) {1442s[1] = -s[1];1443d[0] = vfpu_dot(s, t);1444} else {1445d[0] = s[0] * t[0] - s[1] * t[1];1446d[0] += s[2] * t[2] + s[3] * t[3];1447}14481449ApplyPrefixD(d, V_Single);1450WriteVector(d, V_Single, vd);1451PC += 4;1452EatPrefixes();1453}14541455void Int_Vfad(MIPSOpcode op) {1456float s[4]{}, t[4]{};1457float d;1458int vd = _VD;1459int vs = _VS;1460VectorSize sz = GetVecSize(op);1461ReadVector(s, sz, vs);1462ApplySwizzleS(s, V_Quad);14631464// T prefix generates constants, but abs can change the constant.1465u32 tprefixRemove = VFPU_ANY_SWIZZLE();1466u32 tprefixAdd = VFPU_MAKE_CONSTANTS(VFPUConst::ONE, VFPUConst::ONE, VFPUConst::ONE, VFPUConst::ONE);1467ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), V_Quad);14681469if (USE_VFPU_DOT) {1470d = vfpu_dot(s, t);1471} else {1472d = 0.0f;1473for (int i = 0; i < 4; i++) {1474d += s[i] * t[i];1475}1476}1477ApplyPrefixD(&d, V_Single);1478WriteVector(&d, V_Single, vd);1479PC += 4;1480EatPrefixes();1481}14821483void Int_Vavg(MIPSOpcode op) {1484float s[4]{}, t[4]{};1485float d;1486int vd = _VD;1487int vs = _VS;1488VectorSize sz = GetVecSize(op);1489ReadVector(s, sz, vs);1490ApplySwizzleS(s, V_Quad);14911492// T prefix generates constants, but supports negate.1493u32 tprefixRemove = VFPU_ANY_SWIZZLE() | VFPU_ABS(1, 1, 1, 1);1494u32 tprefixAdd;1495if (sz == V_Single)1496tprefixAdd = VFPU_MAKE_CONSTANTS(VFPUConst::ZERO, VFPUConst::ZERO, VFPUConst::ZERO, VFPUConst::ZERO);1497else if (sz == V_Pair)1498tprefixAdd = VFPU_MAKE_CONSTANTS(VFPUConst::HALF, VFPUConst::HALF, VFPUConst::HALF, VFPUConst::HALF);1499else if (sz == V_Triple)1500tprefixAdd = VFPU_MAKE_CONSTANTS(VFPUConst::THIRD, VFPUConst::THIRD, VFPUConst::THIRD, VFPUConst::THIRD);1501else if (sz == V_Quad)1502tprefixAdd = VFPU_MAKE_CONSTANTS(VFPUConst::FOURTH, VFPUConst::FOURTH, VFPUConst::FOURTH, VFPUConst::FOURTH);1503else1504tprefixAdd = 0;1505ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), V_Quad);15061507if (USE_VFPU_DOT) {1508d = vfpu_dot(s, t);1509} else {1510d = 0.0f;1511for (int i = 0; i < 4; i++) {1512d += s[i] * t[i];1513}1514}1515ApplyPrefixD(&d, V_Single);1516WriteVector(&d, V_Single, vd);1517PC += 4;1518EatPrefixes();1519}15201521void Int_VScl(MIPSOpcode op) {1522float s[4], t[4], d[4];1523int vd = _VD;1524int vs = _VS;1525int vt = _VT;1526VectorSize sz = GetVecSize(op);1527ReadVector(s, sz, vs);1528ApplySwizzleS(s, sz);15291530// T prefix forces swizzle (zzzz for some reason, so we force V_Quad.)1531// That means negate still works, but constants are a bit weird.1532int tlane = (vt >> 5) & 3;1533t[tlane] = V(vt);1534u32 tprefixRemove = VFPU_ANY_SWIZZLE();1535u32 tprefixAdd = VFPU_SWIZZLE(tlane, tlane, tlane, tlane);1536ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), V_Quad);15371538int n = GetNumVectorElements(sz);1539for (int i = 0; i < n; i++) {1540d[i] = s[i] * t[i];1541}1542ApplyPrefixD(d, sz);1543WriteVector(d, sz, vd);1544PC += 4;1545EatPrefixes();1546}15471548void Int_Vrnds(MIPSOpcode op) {1549int vd = _VD;1550int seed = VI(vd);1551// Swizzles apply a constant value, constants/abs/neg work to vary the seed.1552ApplySwizzleS(reinterpret_cast<float *>(&seed), V_Single);1553vrnd_init(uint32_t(seed), currentMIPS->vfpuCtrl + VFPU_CTRL_RCX0);1554PC += 4;1555EatPrefixes();1556}15571558void Int_VrndX(MIPSOpcode op) {1559FloatBits d;1560int vd = _VD;1561VectorSize sz = GetVecSize(op);1562u32 n = GetNumVectorElements(sz);1563// Values are written in backwards order.1564for (int i = n - 1; i >= 0; i--) {1565switch ((op >> 16) & 0x1f) {1566case 1: d.u[i] = vrnd_generate(currentMIPS->vfpuCtrl + VFPU_CTRL_RCX0); break; // vrndi1567case 2: d.u[i] = 0x3F800000 | (vrnd_generate(currentMIPS->vfpuCtrl + VFPU_CTRL_RCX0) & 0x007FFFFF); break; // vrndf1 (>= 1, < 2)1568case 3: d.u[i] = 0x40000000 | (vrnd_generate(currentMIPS->vfpuCtrl + VFPU_CTRL_RCX0) & 0x007FFFFF); break; // vrndf2 (>= 2, < 4)1569default: _dbg_assert_msg_(false,"Trying to interpret instruction that can't be interpreted");1570}1571}1572// D prefix is broken and applies to the last element only (mask and sat.)1573u32 lastmask = (currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] & (1 << 8)) << (n - 1);1574u32 lastsat = (currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] & 3) << (n + n - 2);1575currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] = lastmask | lastsat;1576ApplyPrefixD(d.f, sz);1577WriteVector(d.f, sz, vd);1578PC += 4;1579EatPrefixes();1580}15811582// Generates one line of a rotation matrix around one of the three axes1583void Int_Vrot(MIPSOpcode op) {1584float d[4]{};1585int vd = _VD;1586int vs = _VS;1587int imm = (op >> 16) & 0x1f;1588VectorSize sz = GetVecSize(op);1589bool negSin = (imm & 0x10) != 0;1590int sineLane = (imm >> 2) & 3;1591int cosineLane = imm & 3;15921593float sine, cosine;1594if (currentMIPS->vfpuCtrl[VFPU_CTRL_SPREFIX] == 0x000E4) {1595vfpu_sincos(V(vs), sine, cosine);1596if (negSin)1597sine = -sine;1598} else {1599// Swizzle on S is a bit odd here, but generally only applies to sine.1600float s[4]{};1601ReadVector(s, V_Single, vs);1602u32 sprefixRemove = VFPU_NEGATE(1, 0, 0, 0);1603// We apply negSin later, not here. This handles zero a bit better.1604u32 sprefixAdd = VFPU_NEGATE(0, 0, 0, 0);1605ApplyPrefixST(s, VFPURewritePrefix(VFPU_CTRL_SPREFIX, sprefixRemove, sprefixAdd), V_Single);16061607// Cosine ignores all prefixes, so take the original.1608cosine = vfpu_cos(V(vs));1609sine = vfpu_sin(s[0]);16101611if (negSin)1612sine = -sine;1613RetainInvalidSwizzleST(&sine, V_Single);1614}16151616if (sineLane == cosineLane) {1617for (int i = 0; i < 4; i++)1618d[i] = sine;1619} else {1620d[sineLane] = sine;1621}16221623if (((vd >> 2) & 7) == ((vs >> 2) & 7)) {1624u8 dregs[4]{};1625GetVectorRegs(dregs, sz, vd);1626// Calculate cosine based on sine/zero result.1627bool written = false;1628for (int i = 0; i < 4; i++) {1629if (vs == dregs[i]) {1630d[cosineLane] = vfpu_cos(d[i]);1631written = true;1632break;1633}1634}1635if (!written)1636d[cosineLane] = cosine;1637} else {1638d[cosineLane] = cosine;1639}16401641// D prefix works, just not for the cosine lane.1642uint32_t dprefixRemove = (3 << cosineLane) | (1 << (8 + cosineLane));1643currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] &= 0xFFFFF ^ dprefixRemove;1644ApplyPrefixD(d, sz);1645WriteVector(d, sz, vd);1646PC += 4;1647EatPrefixes();1648}16491650void Int_Vtfm(MIPSOpcode op) {1651float s[16]{}, t[4]{};1652FloatBits d;1653int vd = _VD;1654int vs = _VS;1655int vt = _VT;1656int ins = (op >> 23) & 3;16571658VectorSize sz = (VectorSize)(ins + 1);1659MatrixSize msz = (MatrixSize)(ins + 1);1660int n = GetNumVectorElements(GetVecSize(op));16611662int tn = std::min(n, ins + 1);1663ReadMatrix(s, msz, vs);1664ReadVector(t, sz, vt);16651666if (USE_VFPU_DOT) {1667float t2[4];1668for (int i = 0; i < 4; i++) {1669if (i < tn) {1670t2[i] = t[i];1671} else if (i == ins) {1672t2[i] = 1.0f;1673} else {1674t2[i] = 0.0f;1675}1676}16771678for (int i = 0; i < ins; i++) {1679d.f[i] = vfpu_dot(&s[i * 4], t2);16801681if (my_isnan(d.f[i])) {1682d.u[i] = 0x7f800001;1683} else if ((d.u[i] & 0x7F800000) == 0) {1684d.u[i] &= 0xFF800000;1685}1686}1687} else {1688for (int i = 0; i < ins; i++) {1689d.f[i] = s[i * 4] * t[0];1690for (int k = 1; k < tn; k++) {1691d.f[i] += s[i * 4 + k] * t[k];1692}1693if (ins >= n) {1694d.f[i] += s[i * 4 + ins];1695}1696}1697}16981699// S and T prefixes apply for the final row only.1700// The T prefix is used to apply zero/one constants, but abs still changes it.1701ApplySwizzleS(&s[ins * 4], V_Quad);1702VFPUConst constX = VFPUConst::NONE;1703VFPUConst constY = n < 2 ? VFPUConst::ZERO : VFPUConst::NONE;1704VFPUConst constZ = n < 3 ? VFPUConst::ZERO : VFPUConst::NONE;1705VFPUConst constW = n < 4 ? VFPUConst::ZERO : VFPUConst::NONE;1706if (ins >= n) {1707if (ins == 1) {1708constY = VFPUConst::ONE;1709} else if (ins == 2) {1710constZ = VFPUConst::ONE;1711} else if (ins == 3) {1712constW = VFPUConst::ONE;1713}1714}1715u32 tprefixRemove = VFPU_SWIZZLE(0, n < 2 ? 3 : 0, n < 3 ? 3 : 0, n < 4 ? 3 : 0);1716u32 tprefixAdd = VFPU_MAKE_CONSTANTS(constX, constY, constZ, constW);1717ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), V_Quad);17181719// Really this is the operation all rows probably use (with constant wiring.)1720if (USE_VFPU_DOT) {1721d.f[ins] = vfpu_dot(&s[ins * 4], t);17221723if (my_isnan(d.f[ins])) {1724d.u[ins] = 0x7f800001;1725} else if ((d.u[ins] & 0x7F800000) == 0) {1726d.u[ins] &= 0xFF800000;1727}1728} else {1729d.f[ins] = s[ins * 4] * t[0];1730for (int k = 1; k < 4; k++) {1731d.f[ins] += s[ins * 4 + k] * t[k];1732}1733}17341735// D prefix applies to the last element only.1736u32 lastmask = (currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] & (1 << 8)) << ins;1737u32 lastsat = (currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] & 3) << (ins + ins);1738currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] = lastmask | lastsat;1739ApplyPrefixD(d.f, sz);1740WriteVector(d.f, sz, vd);1741PC += 4;1742EatPrefixes();1743}17441745void Int_SV(MIPSOpcode op)1746{1747s32 imm = SignExtend16ToS32(op & 0xFFFC);1748int vt = ((op >> 16) & 0x1f) | ((op & 3) << 5);1749int rs = _RS;1750u32 addr = R(rs) + imm;17511752switch (op >> 26)1753{1754case 50: //lv.s1755VI(vt) = Memory::Read_U32(addr);1756break;1757case 58: //sv.s1758Memory::Write_U32(VI(vt), addr);1759break;1760default:1761_dbg_assert_msg_(false,"Trying to interpret instruction that can't be interpreted");1762break;1763}1764PC += 4;1765}176617671768void Int_Mftv(MIPSOpcode op)1769{1770int imm = op & 0xFF;1771int rt = _RT;1772switch ((op >> 21) & 0x1f)1773{1774case 3: //mfv / mfvc1775// rt = 0, imm = 255 appears to be used as a CPU interlock by some games.1776if (rt != 0) {1777if (imm < 128) {1778R(rt) = VI(imm);1779} else if (imm < 128 + VFPU_CTRL_MAX) { //mfvc1780R(rt) = currentMIPS->vfpuCtrl[imm - 128];1781} else {1782//ERROR - maybe need to make this value too an "interlock" value?1783_dbg_assert_msg_(false,"mfv - invalid register");1784}1785}1786break;17871788case 7: //mtv1789if (imm < 128) {1790VI(imm) = R(rt);1791} else if (imm < 128 + VFPU_CTRL_MAX) { //mtvc1792u32 mask;1793if (GetVFPUCtrlMask(imm - 128, &mask)) {1794currentMIPS->vfpuCtrl[imm - 128] = R(rt) & mask;1795}1796} else {1797//ERROR1798_dbg_assert_msg_(false,"mtv - invalid register");1799}1800break;18011802default:1803_dbg_assert_msg_(false,"Trying to interpret instruction that can't be interpreted");1804break;1805}1806PC += 4;1807}18081809void Int_Vmfvc(MIPSOpcode op) {1810int vd = _VD;1811int imm = (op >> 8) & 0x7F;1812if (imm < VFPU_CTRL_MAX) {1813VI(vd) = currentMIPS->vfpuCtrl[imm];1814} else {1815VI(vd) = 0;1816}1817PC += 4;1818}18191820void Int_Vmtvc(MIPSOpcode op) {1821int vs = _VS;1822int imm = op & 0x7F;1823if (imm < VFPU_CTRL_MAX) {1824u32 mask;1825if (GetVFPUCtrlMask(imm, &mask)) {1826currentMIPS->vfpuCtrl[imm] = VI(vs) & mask;1827}1828}1829PC += 4;1830}18311832void Int_Vcst(MIPSOpcode op)1833{1834int conNum = (op >> 16) & 0x1f;1835int vd = _VD;18361837VectorSize sz = GetVecSize(op);1838float c = cst_constants[conNum];1839float temp[4] = {c,c,c,c};1840ApplyPrefixD(temp, sz);1841WriteVector(temp, sz, vd);1842PC += 4;1843EatPrefixes();1844}18451846void Int_Vcmp(MIPSOpcode op)1847{1848int vs = _VS;1849int vt = _VT;1850int cond = op & 0xf;1851VectorSize sz = GetVecSize(op);1852int n = GetNumVectorElements(sz);1853float s[4];1854float t[4];1855ReadVector(s, sz, vs);1856ApplySwizzleS(s, sz);1857ReadVector(t, sz, vt);1858ApplySwizzleT(t, sz);1859int cc = 0;1860int or_val = 0;1861int and_val = 1;1862int affected_bits = (1 << 4) | (1 << 5); // 4 and 51863for (int i = 0; i < n; i++)1864{1865int c;1866// These set c to 0 or 1, nothing else.1867switch (cond)1868{1869case VC_FL: c = 0; break;1870case VC_EQ: c = s[i] == t[i]; break;1871case VC_LT: c = s[i] < t[i]; break;1872case VC_LE: c = s[i] <= t[i]; break;18731874case VC_TR: c = 1; break;1875case VC_NE: c = s[i] != t[i]; break;1876case VC_GE: c = s[i] >= t[i]; break;1877case VC_GT: c = s[i] > t[i]; break;18781879case VC_EZ: c = s[i] == 0.0f || s[i] == -0.0f; break;1880case VC_EN: c = my_isnan(s[i]); break;1881case VC_EI: c = my_isinf(s[i]); break;1882case VC_ES: c = my_isnanorinf(s[i]); break; // Tekken Dark Resurrection18831884case VC_NZ: c = s[i] != 0; break;1885case VC_NN: c = !my_isnan(s[i]); break;1886case VC_NI: c = !my_isinf(s[i]); break;1887case VC_NS: c = !(my_isnanorinf(s[i])); break; // How about t[i] ?18881889default:1890_dbg_assert_msg_(false,"Unsupported vcmp condition code %d", cond);1891PC += 4;1892EatPrefixes();1893return;1894}1895cc |= (c<<i);1896or_val |= c;1897and_val &= c;1898affected_bits |= 1 << i;1899}1900// Use masking to only change the affected bits1901currentMIPS->vfpuCtrl[VFPU_CTRL_CC] =1902(currentMIPS->vfpuCtrl[VFPU_CTRL_CC] & ~affected_bits) |1903((cc | (or_val << 4) | (and_val << 5)) & affected_bits);1904PC += 4;1905EatPrefixes();1906}19071908void Int_Vminmax(MIPSOpcode op) {1909FloatBits s, t, d;1910int vt = _VT;1911int vs = _VS;1912int vd = _VD;1913int cond = op&15;1914VectorSize sz = GetVecSize(op);1915int numElements = GetNumVectorElements(sz);19161917ReadVector(s.f, sz, vs);1918ApplySwizzleS(s.f, sz);1919ReadVector(t.f, sz, vt);1920ApplySwizzleT(t.f, sz);19211922// If both are zero, take t's sign.1923// Otherwise: -NAN < -INF < real < INF < NAN (higher mantissa is farther from 0.)19241925switch ((op >> 23) & 3) {1926case 2: // vmin1927for (int i = 0; i < numElements; i++) {1928if (my_isnanorinf(s.f[i]) || my_isnanorinf(t.f[i])) {1929// If both are negative, we flip the comparison (not two's compliment.)1930if (s.i[i] < 0 && t.i[i] < 0) {1931// If at least one side is NAN, we take the highest mantissa bits.1932d.i[i] = std::max(t.i[i], s.i[i]);1933} else {1934// Otherwise, we take the lowest value (negative or lowest mantissa.)1935d.i[i] = std::min(t.i[i], s.i[i]);1936}1937} else {1938d.f[i] = std::min(t.f[i], s.f[i]);1939}1940}1941break;1942case 3: // vmax1943for (int i = 0; i < numElements; i++) {1944// This is the same logic as vmin, just reversed.1945if (my_isnanorinf(s.f[i]) || my_isnanorinf(t.f[i])) {1946if (s.i[i] < 0 && t.i[i] < 0) {1947d.i[i] = std::min(t.i[i], s.i[i]);1948} else {1949d.i[i] = std::max(t.i[i], s.i[i]);1950}1951} else {1952d.f[i] = std::max(t.f[i], s.f[i]);1953}1954}1955break;1956default:1957_dbg_assert_msg_(false,"unknown min/max op %d", cond);1958PC += 4;1959EatPrefixes();1960return;1961}1962RetainInvalidSwizzleST(d.f, sz);1963ApplyPrefixD(d.f, sz);1964WriteVector(d.f, sz, vd);1965PC += 4;1966EatPrefixes();1967}19681969void Int_Vscmp(MIPSOpcode op) {1970FloatBits s, t, d;1971int vt = _VT;1972int vs = _VS;1973int vd = _VD;1974VectorSize sz = GetVecSize(op);1975ReadVector(s.f, sz, vs);1976ApplySwizzleS(s.f, sz);1977ReadVector(t.f, sz, vt);1978ApplySwizzleT(t.f, sz);1979int n = GetNumVectorElements(sz);1980for (int i = 0; i < n ; i++) {1981float a = s.f[i] - t.f[i];1982if (my_isnan(a)) {1983// NAN/INF are treated as just larger numbers, as in vmin/vmax.1984int sMagnitude = s.u[i] & 0x7FFFFFFF;1985int tMagnitude = t.u[i] & 0x7FFFFFFF;1986int b = (s.i[i] < 0 ? -sMagnitude : sMagnitude) - (t.i[i] < 0 ? -tMagnitude : tMagnitude);1987d.f[i] = (float)((0 < b) - (b < 0));1988} else {1989d.f[i] = (float)((0.0f < a) - (a < 0.0f));1990}1991}1992RetainInvalidSwizzleST(d.f, sz);1993ApplyPrefixD(d.f, sz);1994WriteVector(d.f, sz, vd);1995PC += 4;1996EatPrefixes();1997}19981999void Int_Vsge(MIPSOpcode op) {2000float s[4], t[4], d[4];2001int vt = _VT;2002int vs = _VS;2003int vd = _VD;2004VectorSize sz = GetVecSize(op);2005int numElements = GetNumVectorElements(sz);2006ReadVector(s, sz, vs);2007ApplySwizzleS(s, sz);2008ReadVector(t, sz, vt);2009ApplySwizzleT(t, sz);2010for (int i = 0; i < numElements; i++) {2011if ( my_isnan(s[i]) || my_isnan(t[i]) )2012d[i] = 0.0f;2013else2014d[i] = s[i] >= t[i] ? 1.0f : 0.0f;2015}2016RetainInvalidSwizzleST(d, sz);2017// The clamp cannot matter, so skip it.2018ApplyPrefixD(d, sz, true);2019WriteVector(d, sz, vd);2020PC += 4;2021EatPrefixes();2022}20232024void Int_Vslt(MIPSOpcode op) {2025float s[4], t[4], d[4];2026int vt = _VT;2027int vs = _VS;2028int vd = _VD;2029VectorSize sz = GetVecSize(op);2030int numElements = GetNumVectorElements(sz);2031ReadVector(s, sz, vs);2032ApplySwizzleS(s, sz);2033ReadVector(t, sz, vt);2034ApplySwizzleT(t, sz);2035for (int i = 0; i < numElements; i++) {2036if ( my_isnan(s[i]) || my_isnan(t[i]) )2037d[i] = 0.0f;2038else2039d[i] = s[i] < t[i] ? 1.0f : 0.0f;2040}2041RetainInvalidSwizzleST(d, sz);2042// The clamp cannot matter, so skip it.2043ApplyPrefixD(d, sz, true);2044WriteVector(d, sz, vd);2045PC += 4;2046EatPrefixes();2047}204820492050void Int_Vcmov(MIPSOpcode op) {2051int vs = _VS;2052int vd = _VD;2053int tf = (op >> 19) & 1;2054int imm3 = (op >> 16) & 7;2055VectorSize sz = GetVecSize(op);2056int n = GetNumVectorElements(sz);2057float s[4];2058float d[4];2059ReadVector(s, sz, vs);2060ApplySwizzleS(s, sz);2061// Not only is D read (as T), but the T prefix applies to it.2062ReadVector(d, sz, vd);2063ApplySwizzleT(d, sz);20642065int CC = currentMIPS->vfpuCtrl[VFPU_CTRL_CC];20662067if (imm3 < 6) {2068if (((CC >> imm3) & 1) == !tf) {2069for (int i = 0; i < n; i++)2070d[i] = s[i];2071}2072} else if (imm3 == 6) {2073for (int i = 0; i < n; i++) {2074if (((CC >> i) & 1) == !tf)2075d[i] = s[i];2076}2077} else {2078ERROR_LOG_REPORT(Log::CPU, "Bad Imm3 in cmov: %d", imm3);2079}2080ApplyPrefixD(d, sz);2081WriteVector(d, sz, vd);2082PC += 4;2083EatPrefixes();2084}20852086void Int_VecDo3(MIPSOpcode op) {2087float s[4], t[4];2088FloatBits d;2089int vd = _VD;2090int vs = _VS;2091int vt = _VT;2092VectorSize sz = GetVecSize(op);20932094int optype = 0;2095switch (op >> 26) {2096case 24: //VFPU02097switch ((op >> 23) & 7) {2098case 0: optype = 0; break;2099case 1: optype = 1; break;2100case 7: optype = 7; break;2101default: goto bad;2102}2103break;2104case 25: //VFPU12105switch ((op >> 23) & 7) {2106case 0: optype = 8; break;2107default: goto bad;2108}2109break;2110default:2111bad:2112_dbg_assert_msg_( 0, "Trying to interpret instruction that can't be interpreted");2113break;2114}21152116u32 n = GetNumVectorElements(sz);2117ReadVector(s, sz, vs);2118ReadVector(t, sz, vt);2119if (optype != 7) {2120ApplySwizzleS(s, sz);2121ApplySwizzleT(t, sz);2122} else {2123// The prefix handling of S/T is a bit odd, probably the HW doesn't do it in parallel.2124// The X prefix is applied to the last element in sz.2125// TODO: This doesn't match exactly for a swizzle past x in some cases...2126ApplySwizzleS(&s[n - 1], V_Single, -INFINITY);2127ApplySwizzleT(&t[n - 1], V_Single, -INFINITY);2128}21292130for (int i = 0; i < (int)n; i++) {2131switch (optype) {2132case 0: d.f[i] = s[i] + t[i]; break; //vadd2133case 1: d.f[i] = s[i] - t[i]; break; //vsub2134case 7: d.f[i] = s[i] / t[i]; break; //vdiv2135case 8: d.f[i] = s[i] * t[i]; break; //vmul2136}21372138if (USE_VFPU_DOT) {2139if (my_isnan(d.f[i])) {2140d.u[i] = (d.u[i] & 0xff800001) | 1;2141} else if ((d.u[i] & 0x7F800000) == 0) {2142d.u[i] &= 0xFF800000;2143}2144}2145}21462147// For vdiv only, the D prefix only applies mask (and like S/T, x applied to last.)2148if (optype == 7) {2149u32 lastmask = (currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] & (1 << 8)) << (n - 1);2150u32 lastsat = (currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] & 3) << (n + n - 2);2151currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] = lastmask | lastsat;2152ApplyPrefixD(d.f, sz);2153} else {2154RetainInvalidSwizzleST(d.f, sz);2155ApplyPrefixD(d.f, sz);2156}2157WriteVector(d.f, sz, vd);2158PC += 4;2159EatPrefixes();2160}21612162void Int_CrossQuat(MIPSOpcode op) {2163float s[4]{}, t[4]{}, d[4];2164int vd = _VD;2165int vs = _VS;2166int vt = _VT;2167VectorSize sz = GetVecSize(op);2168u32 n = GetNumVectorElements(sz);2169ReadVector(s, sz, vs);2170ReadVector(t, sz, vt);21712172u32 tprefixRemove = VFPU_ANY_SWIZZLE() | VFPU_NEGATE(1, 1, 1, 1);2173u32 tprefixAdd;21742175switch (sz) {2176case V_Triple: // vcrsp.t2177{2178if (USE_VFPU_DOT) {2179float t0[4] = { 0.0f, t[2], -t[1], 0.0f };2180float t1[4] = { -t[2], 0.0f, t[0], 0.0f };2181d[0] = vfpu_dot(s, t0);2182d[1] = vfpu_dot(s, t1);2183} else {2184d[0] = s[1] * t[2] - s[2] * t[1];2185d[1] = s[2] * t[0] - s[0] * t[2];2186}21872188// T prefix forces swizzle and negate, can be used to have weird constants.2189tprefixAdd = VFPU_SWIZZLE(1, 0, 3, 2) | VFPU_NEGATE(0, 1, 0, 0);2190ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), V_Quad);2191ApplySwizzleS(s, V_Quad);2192if (USE_VFPU_DOT) {2193// TODO: But flush any infs to 0? This seems sketchy.2194for (int i = 0; i < 4; ++i) {2195if (my_isinf(s[i]))2196s[i] = 0.0f;2197if (my_isinf(t[i]))2198t[i] = 0.0f;2199}2200d[2] = vfpu_dot(s, t);2201} else {2202d[2] = s[0] * t[0] + s[1] * t[1] + s[2] * t[2] + s[3] * t[3];2203}2204break;2205}22062207case V_Quad: // vqmul.q2208{2209if (USE_VFPU_DOT) {2210float t0[4] = { t[3], t[2], -t[1], t[0] };2211float t1[4] = { -t[2], t[3], t[0], t[1] };2212float t2[4] = { t[1], -t[0], t[3], t[2] };2213d[0] = vfpu_dot(s, t0);2214d[1] = vfpu_dot(s, t1);2215d[2] = vfpu_dot(s, t2);2216} else {2217d[0] = s[0] * t[3] + s[1] * t[2] - s[2] * t[1] + s[3] * t[0];2218d[1] = -s[0] * t[2] + s[1] * t[3] + s[2] * t[0] + s[3] * t[1];2219d[2] = s[0] * t[1] - s[1] * t[0] + s[2] * t[3] + s[3] * t[2];2220}22212222// T prefix forces swizzle and negate, can be used to have weird constants.2223tprefixAdd = VFPU_SWIZZLE(0, 1, 2, 3) | VFPU_NEGATE(1, 1, 1, 0);2224ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), V_Quad);2225ApplySwizzleS(s, sz);2226if (USE_VFPU_DOT)2227d[3] = vfpu_dot(s, t);2228else2229d[3] = s[0] * t[0] + s[1] * t[1] + s[2] * t[2] + s[3] * t[3];2230break;2231}22322233case V_Pair:2234// t swizzles invalid so the multiply is always zero.2235d[0] = 0;22362237tprefixAdd = VFPU_SWIZZLE(0, 0, 0, 0) | VFPU_NEGATE(0, 0, 0, 0);2238ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), V_Quad);2239ApplySwizzleS(s, V_Quad);2240// It's possible to populate a value by swizzling s[2].2241d[1] = s[2] * t[2];2242break;22432244case V_Single:2245// t swizzles invalid so the multiply is always zero.2246d[0] = 0;2247break;22482249default:2250ERROR_LOG_REPORT(Log::CPU, "vcrsp/vqmul with invalid elements");2251break;2252}22532254// D prefix applies to the last element only (mask and sat) for pair and larger.2255if (sz != V_Single) {2256u32 lastmask = (currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] & (1 << 8)) << (n - 1);2257u32 lastsat = (currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] & 3) << (n + n - 2);2258currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] = lastmask | lastsat;2259ApplyPrefixD(d, sz);2260} else {2261// Single always seems to write out zero.2262currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] = 0;2263}2264WriteVector(d, sz, vd);2265PC += 4;2266EatPrefixes();2267}22682269void Int_Vlgb(MIPSOpcode op) {2270// Vector log binary (extract exponent)2271FloatBits d, s;2272int vd = _VD;2273int vs = _VS;2274VectorSize sz = GetVecSize(op);22752276ReadVector(s.f, sz, vs);2277ApplySwizzleS(s.f, sz);22782279int exp = (s.u[0] & 0x7F800000) >> 23;2280if (exp == 0xFF) {2281d.f[0] = s.f[0];2282} else if (exp == 0) {2283d.f[0] = -INFINITY;2284} else {2285d.f[0] = (float)(exp - 127);2286}22872288// If sz is greater than V_Single, the rest are copied unchanged.2289for (int i = 1; i < GetNumVectorElements(sz); ++i) {2290d.u[i] = s.u[i];2291}22922293RetainInvalidSwizzleST(d.f, sz);2294ApplyPrefixD(d.f, sz);2295WriteVector(d.f, sz, vd);2296PC += 4;2297EatPrefixes();2298}22992300void Int_Vwbn(MIPSOpcode op) {2301FloatBits d, s;2302int vd = _VD;2303int vs = _VS;2304VectorSize sz = GetVecSize(op);2305u8 exp = (u8)((op >> 16) & 0xFF);23062307ReadVector(s.f, sz, vs);2308ApplySwizzleS(s.f, sz);23092310u32 sigbit = s.u[0] & 0x80000000;2311u32 prevExp = (s.u[0] & 0x7F800000) >> 23;2312u32 mantissa = (s.u[0] & 0x007FFFFF) | 0x00800000;2313if (prevExp != 0xFF && prevExp != 0) {2314if (exp > prevExp) {2315s8 shift = (exp - prevExp) & 0xF;2316mantissa = mantissa >> shift;2317} else {2318s8 shift = (prevExp - exp) & 0xF;2319mantissa = mantissa << shift;2320}2321d.u[0] = sigbit | (mantissa & 0x007FFFFF) | (exp << 23);2322} else {2323d.u[0] = s.u[0] | (exp << 23);2324}23252326// If sz is greater than V_Single, the rest are copied unchanged.2327for (int i = 1; i < GetNumVectorElements(sz); ++i) {2328d.u[i] = s.u[i];2329}23302331RetainInvalidSwizzleST(d.f, sz);2332ApplyPrefixD(d.f, sz);2333WriteVector(d.f, sz, vd);2334PC += 4;2335EatPrefixes();2336}23372338void Int_Vsbn(MIPSOpcode op) {2339FloatBits d, s, t;2340int vd = _VD;2341int vs = _VS;2342int vt = _VT;2343VectorSize sz = GetVecSize(op);23442345ReadVector(s.f, sz, vs);2346ApplySwizzleS(s.f, sz);2347ReadVector(t.f, sz, vt);2348ApplySwizzleT(t.f, sz);2349// Swizzle does apply to the value read as an integer.2350u8 exp = (u8)(127 + t.i[0]);23512352// Simply replace the exponent bits.2353u32 prev = s.u[0] & 0x7F800000;2354if (prev != 0 && prev != 0x7F800000) {2355d.u[0] = (s.u[0] & ~0x7F800000) | (exp << 23);2356} else {2357d.u[0] = s.u[0];2358}23592360// If sz is greater than V_Single, the rest are copied unchanged.2361for (int i = 1; i < GetNumVectorElements(sz); ++i) {2362d.u[i] = s.u[i];2363}23642365ApplyPrefixD(d.f, sz);2366WriteVector(d.f, sz, vd);2367PC += 4;2368EatPrefixes();2369}23702371void Int_Vsbz(MIPSOpcode op) {2372// Vector scale by zero (set exp to 0 to extract mantissa)2373FloatBits d, s;2374int vd = _VD;2375int vs = _VS;2376VectorSize sz = GetVecSize(op);23772378ReadVector(s.f, sz, vs);2379ApplySwizzleS(s.f, sz);23802381// NAN and denormals pass through.2382if (my_isnan(s.f[0]) || (s.u[0] & 0x7F800000) == 0) {2383d.u[0] = s.u[0];2384} else {2385d.u[0] = (127 << 23) | (s.u[0] & 0x007FFFFF);2386}23872388// If sz is greater than V_Single, the rest are copied unchanged.2389for (int i = 1; i < GetNumVectorElements(sz); ++i) {2390d.u[i] = s.u[i];2391}23922393ApplyPrefixD(d.f, sz);2394WriteVector(d.f, sz, vd);2395PC += 4;2396EatPrefixes();2397}2398}239924002401