CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
Path: blob/master/Core/MIPS/MIPSVFPUUtils.h
Views: 1401
// Copyright (c) 2012- PPSSPP Project.12// This program is free software: you can redistribute it and/or modify3// it under the terms of the GNU General Public License as published by4// the Free Software Foundation, version 2.0 or later versions.56// This program is distributed in the hope that it will be useful,7// but WITHOUT ANY WARRANTY; without even the implied warranty of8// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the9// GNU General Public License 2.0 for more details.1011// A copy of the GPL 2.0 should have been included with the program.12// If not, see http://www.gnu.org/licenses/1314// Official git repository and contact information can be found at15// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.1617#pragma once1819#include <cmath>20#include <string>21#include "Common/CommonTypes.h"22#include "Core/MIPS/MIPS.h"2324#define _VD (op & 0x7F)25#define _VS ((op>>8) & 0x7F)26#define _VT ((op>>16) & 0x7F)2728inline int Xpose(int v) {29return v^0x20;30}3132// Half of PI, or 90 degrees.33#ifndef M_PI_234#define M_PI_2 1.5707963267948966192335#endif3637// The VFPU uses weird angles where 4.0 represents a full circle. This makes it possible to return38// exact 1.0/-1.0 values at certain angles.39//40// The current code attempts to match VFPU sin/cos exactly.41// Possibly affected games:42// Final Fantasy III (#2921 )43// Hitman Reborn 2 (#12900)44// Cho Aniki Zero (#13705)45// Hajime no Ippo (#13671)46// Dissidia Duodecim Final Fantasy (#6710 )47//48// Messing around with the modulo functions? try https://www.desmos.com/calculator.4950extern float vfpu_sin(float);51extern float vfpu_cos(float);52extern void vfpu_sincos(float, float&, float&);5354extern float vfpu_asin(float);5556inline float vfpu_clamp(float v, float min, float max) {57// Note: NAN is preserved, and -0.0 becomes +0.0 if min=+0.0.58return v >= max ? max : (v <= min ? min : v);59}6061float vfpu_dot(const float a[4], const float b[4]);62float vfpu_sqrt(float a);63float vfpu_rsqrt(float a);6465extern float vfpu_exp2(float);66extern float vfpu_rexp2(float);67extern float vfpu_log2(float);68extern float vfpu_rcp(float);6970extern void vrnd_init_default(uint32_t *rcx);71extern void vrnd_init(uint32_t seed, uint32_t *rcx);72extern uint32_t vrnd_generate(uint32_t *rcx);7374inline uint32_t get_uexp(uint32_t x) {75return (x >> 23) & 0xFF;76}7778inline int32_t get_exp(uint32_t x) {79return get_uexp(x) - 127;80}8182inline int32_t get_mant(uint32_t x) {83// Note: this returns the hidden 1.84return (x & 0x007FFFFF) | 0x00800000;85}8687inline int32_t get_sign(uint32_t x) {88return x & 0x80000000;89}9091#define VFPU_FLOAT16_EXP_MAX 0x1f92#define VFPU_SH_FLOAT16_SIGN 1593#define VFPU_MASK_FLOAT16_SIGN 0x194#define VFPU_SH_FLOAT16_EXP 1095#define VFPU_MASK_FLOAT16_EXP 0x1f96#define VFPU_SH_FLOAT16_FRAC 097#define VFPU_MASK_FLOAT16_FRAC 0x3ff9899enum VectorSize {100V_Single = 1,101V_Pair = 2,102V_Triple = 3,103V_Quad = 4,104V_Invalid = -1,105};106107enum MatrixSize {108M_1x1 = 1,109M_2x2 = 2,110M_3x3 = 3,111M_4x4 = 4,112M_Invalid = -1113};114115inline u32 VFPU_SWIZZLE(int x, int y, int z, int w) {116return (x << 0) | (y << 2) | (z << 4) | (w << 6);117}118119inline u32 VFPU_MASK(int x, int y, int z, int w) {120return (x << 0) | (y << 1) | (z << 2) | (w << 3);121}122123inline u32 VFPU_ANY_SWIZZLE() {124return 0x000000FF;125}126127inline u32 VFPU_ABS(int x, int y, int z, int w) {128return VFPU_MASK(x, y, z, w) << 8;129}130131inline u32 VFPU_CONST(int x, int y, int z, int w) {132return VFPU_MASK(x, y, z, w) << 12;133}134135inline u32 VFPU_NEGATE(int x, int y, int z, int w) {136return VFPU_MASK(x, y, z, w) << 16;137}138139enum class VFPUConst {140NONE = -1,141ZERO,142ONE,143TWO,144HALF,145THREE,146THIRD,147FOURTH,148SIXTH,149};150151inline u32 VFPU_MAKE_CONSTANTS(VFPUConst x, VFPUConst y, VFPUConst z, VFPUConst w) {152u32 result = 0;153if (x != VFPUConst::NONE) {154// This sets the constant flag and the swizzle/abs flags for the right constant.155result |= (((int)x & 3) << 0) | (((int)x & 4) << 6) | (1 << 12);156}157if (y != VFPUConst::NONE) {158result |= (((int)y & 3) << 2) | (((int)y & 4) << 7) | (1 << 13);159}160if (z != VFPUConst::NONE) {161result |= (((int)z & 3) << 4) | (((int)z & 4) << 8) | (1 << 14);162}163if (w != VFPUConst::NONE) {164result |= (((int)w & 3) << 6) | (((int)w & 4) << 9) | (1 << 15);165}166return result;167}168169u32 VFPURewritePrefix(int ctrl, u32 remove, u32 add);170171void ReadMatrix(float *rd, MatrixSize size, int reg);172void WriteMatrix(const float *rs, MatrixSize size, int reg);173174void WriteVector(const float *rs, VectorSize N, int reg);175void ReadVector(float *rd, VectorSize N, int reg);176177void GetVectorRegs(u8 regs[4], VectorSize N, int vectorReg);178void GetMatrixRegs(u8 regs[16], MatrixSize N, int matrixReg);179180// Translate between vector and matrix size. Possibly we should simply181// join the two enums, but the type safety is kind of nice.182VectorSize GetVectorSize(MatrixSize sz);183MatrixSize GetMatrixSize(VectorSize sz);184185// Note that if matrix is a transposed matrix (E format), GetColumn will actually return rows,186// and vice versa.187int GetColumnName(int matrix, MatrixSize msize, int column, int offset);188int GetRowName(int matrix, MatrixSize msize, int row, int offset);189190int GetMatrixName(int matrix, MatrixSize msize, int column, int row, bool transposed);191192void GetMatrixColumns(int matrixReg, MatrixSize msize, u8 vecs[4]);193void GetMatrixRows(int matrixReg, MatrixSize msize, u8 vecs[4]);194195enum MatrixOverlapType {196OVERLAP_NONE = 0,197OVERLAP_PARTIAL = 1,198OVERLAP_EQUAL = 2,199// Transposed too? (same space but transposed)200};201202MatrixOverlapType GetMatrixOverlap(int m1, int m2, MatrixSize msize);203204// Returns a number from 0-7, good for checking overlap for 4x4 matrices.205static inline int GetMtx(int matrixReg) {206return (matrixReg >> 2) & 7;207}208209static inline VectorSize GetVecSize(MIPSOpcode op) {210int a = (op >> 7) & 1;211int b = (op >> 14) & 2;212return (VectorSize)(a + b + 1); // Safe, there are no other possibilities213}214215static inline MatrixSize GetMtxSize(MIPSOpcode op) {216int a = (op >> 7) & 1;217int b = (op >> 14) & 2;218return (MatrixSize)(a + b + 1); // Safe, there are no other possibilities219}220221VectorSize GetHalfVectorSizeSafe(VectorSize sz);222VectorSize GetHalfVectorSize(VectorSize sz);223VectorSize GetDoubleVectorSizeSafe(VectorSize sz);224VectorSize GetDoubleVectorSize(VectorSize sz);225VectorSize MatrixVectorSizeSafe(MatrixSize sz);226VectorSize MatrixVectorSize(MatrixSize sz);227228static inline int GetNumVectorElements(VectorSize sz) {229switch (sz) {230case V_Single: return 1;231case V_Pair: return 2;232case V_Triple: return 3;233case V_Quad: return 4;234default: return 0;235}236}237238int GetMatrixSideSafe(MatrixSize sz);239int GetMatrixSide(MatrixSize sz);240std::string GetVectorNotation(int reg, VectorSize size);241std::string GetMatrixNotation(int reg, MatrixSize size);242static inline bool IsMatrixTransposed(int matrixReg) {243return (matrixReg >> 5) & 1;244}245static inline bool IsVectorColumn(int vectorReg) {246return !((vectorReg >> 5) & 1);247}248static inline int TransposeMatrixReg(int matrixReg) {249return matrixReg ^ 0x20;250}251int GetVectorOverlap(int reg1, VectorSize size1, int reg2, VectorSize size2);252253bool GetVFPUCtrlMask(int reg, u32 *mask);254255float Float16ToFloat32(unsigned short l);256void InitVFPU();257258259