Path: blob/master/dep/vixl/src/aarch64/logic-aarch64.cc
4261 views
// Copyright 2015, VIXL authors1// All rights reserved.2//3// Redistribution and use in source and binary forms, with or without4// modification, are permitted provided that the following conditions are met:5//6// * Redistributions of source code must retain the above copyright notice,7// this list of conditions and the following disclaimer.8// * Redistributions in binary form must reproduce the above copyright notice,9// this list of conditions and the following disclaimer in the documentation10// and/or other materials provided with the distribution.11// * Neither the name of ARM Limited nor the names of its contributors may be12// used to endorse or promote products derived from this software without13// specific prior written permission.14//15// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND16// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED17// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE18// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE19// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL20// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR21// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER22// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,23// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE24// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.2526#ifdef VIXL_INCLUDE_SIMULATOR_AARCH642728#include <cmath>2930#include "simulator-aarch64.h"3132namespace vixl {33namespace aarch64 {3435using vixl::internal::SimFloat16;3637template <typename T>38bool IsFloat64() {39return false;40}41template <>42bool IsFloat64<double>() {43return true;44}4546template <typename T>47bool IsFloat32() {48return false;49}50template <>51bool IsFloat32<float>() {52return true;53}5455template <typename T>56bool IsFloat16() {57return false;58}59template <>60bool IsFloat16<Float16>() {61return true;62}63template <>64bool IsFloat16<SimFloat16>() {65return true;66}6768template <>69double Simulator::FPDefaultNaN<double>() {70return kFP64DefaultNaN;71}727374template <>75float Simulator::FPDefaultNaN<float>() {76return kFP32DefaultNaN;77}787980template <>81SimFloat16 Simulator::FPDefaultNaN<SimFloat16>() {82return SimFloat16(kFP16DefaultNaN);83}848586double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) {87if (src >= 0) {88return UFixedToDouble(src, fbits, round);89} else if (src == INT64_MIN) {90return -UFixedToDouble(src, fbits, round);91} else {92return -UFixedToDouble(-src, fbits, round);93}94}959697double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) {98// An input of 0 is a special case because the result is effectively99// subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.100if (src == 0) {101return 0.0;102}103104// Calculate the exponent. The highest significant bit will have the value105// 2^exponent.106const int highest_significant_bit = 63 - CountLeadingZeros(src);107const int64_t exponent = highest_significant_bit - fbits;108109return FPRoundToDouble(0, exponent, src, round);110}111112113float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) {114if (src >= 0) {115return UFixedToFloat(src, fbits, round);116} else if (src == INT64_MIN) {117return -UFixedToFloat(src, fbits, round);118} else {119return -UFixedToFloat(-src, fbits, round);120}121}122123124float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) {125// An input of 0 is a special case because the result is effectively126// subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.127if (src == 0) {128return 0.0f;129}130131// Calculate the exponent. The highest significant bit will have the value132// 2^exponent.133const int highest_significant_bit = 63 - CountLeadingZeros(src);134const int32_t exponent = highest_significant_bit - fbits;135136return FPRoundToFloat(0, exponent, src, round);137}138139140SimFloat16 Simulator::FixedToFloat16(int64_t src, int fbits, FPRounding round) {141if (src >= 0) {142return UFixedToFloat16(src, fbits, round);143} else if (src == INT64_MIN) {144return -UFixedToFloat16(src, fbits, round);145} else {146return -UFixedToFloat16(-src, fbits, round);147}148}149150151SimFloat16 Simulator::UFixedToFloat16(uint64_t src,152int fbits,153FPRounding round) {154// An input of 0 is a special case because the result is effectively155// subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.156if (src == 0) {157return 0.0f;158}159160// Calculate the exponent. The highest significant bit will have the value161// 2^exponent.162const int highest_significant_bit = 63 - CountLeadingZeros(src);163const int16_t exponent = highest_significant_bit - fbits;164165return FPRoundToFloat16(0, exponent, src, round);166}167168169uint64_t Simulator::GenerateRandomTag(uint16_t exclude) {170uint64_t rtag = nrand48(rand_state_) >> 28;171VIXL_ASSERT(IsUint4(rtag));172173if (exclude == 0) {174exclude = nrand48(rand_state_) >> 27;175}176177// TODO: implement this to better match the specification, which calls for a178// true random mode, and a pseudo-random mode with state (EL1.TAG) modified by179// PRNG.180return ChooseNonExcludedTag(rtag, 0, exclude);181}182183184bool Simulator::ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr) {185dst.ClearForWrite(vform);186for (int i = 0; i < LaneCountFromFormat(vform); i++) {187if (!LoadLane(dst, vform, i, addr)) {188return false;189}190addr += LaneSizeInBytesFromFormat(vform);191}192return true;193}194195196bool Simulator::ld1(VectorFormat vform,197LogicVRegister dst,198int index,199uint64_t addr) {200dst.ClearForWrite(vform);201return LoadLane(dst, vform, index, addr);202}203204205bool Simulator::ld1r(VectorFormat vform,206VectorFormat unpack_vform,207LogicVRegister dst,208uint64_t addr,209bool is_signed) {210unsigned unpack_size = LaneSizeInBytesFromFormat(unpack_vform);211dst.ClearForWrite(vform);212for (int i = 0; i < LaneCountFromFormat(vform); i++) {213if (is_signed) {214if (!LoadIntToLane(dst, vform, unpack_size, i, addr)) {215return false;216}217} else {218if (!LoadUintToLane(dst, vform, unpack_size, i, addr)) {219return false;220}221}222}223return true;224}225226227bool Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) {228return ld1r(vform, vform, dst, addr);229}230231232bool Simulator::ld2(VectorFormat vform,233LogicVRegister dst1,234LogicVRegister dst2,235uint64_t addr1) {236dst1.ClearForWrite(vform);237dst2.ClearForWrite(vform);238int esize = LaneSizeInBytesFromFormat(vform);239uint64_t addr2 = addr1 + esize;240for (int i = 0; i < LaneCountFromFormat(vform); i++) {241if (!LoadLane(dst1, vform, i, addr1) || !LoadLane(dst2, vform, i, addr2)) {242return false;243}244addr1 += 2 * esize;245addr2 += 2 * esize;246}247return true;248}249250251bool Simulator::ld2(VectorFormat vform,252LogicVRegister dst1,253LogicVRegister dst2,254int index,255uint64_t addr1) {256dst1.ClearForWrite(vform);257dst2.ClearForWrite(vform);258uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);259return (LoadLane(dst1, vform, index, addr1) &&260LoadLane(dst2, vform, index, addr2));261}262263264bool Simulator::ld2r(VectorFormat vform,265LogicVRegister dst1,266LogicVRegister dst2,267uint64_t addr) {268dst1.ClearForWrite(vform);269dst2.ClearForWrite(vform);270uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);271for (int i = 0; i < LaneCountFromFormat(vform); i++) {272if (!LoadLane(dst1, vform, i, addr) || !LoadLane(dst2, vform, i, addr2)) {273return false;274}275}276return true;277}278279280bool Simulator::ld3(VectorFormat vform,281LogicVRegister dst1,282LogicVRegister dst2,283LogicVRegister dst3,284uint64_t addr1) {285dst1.ClearForWrite(vform);286dst2.ClearForWrite(vform);287dst3.ClearForWrite(vform);288int esize = LaneSizeInBytesFromFormat(vform);289uint64_t addr2 = addr1 + esize;290uint64_t addr3 = addr2 + esize;291for (int i = 0; i < LaneCountFromFormat(vform); i++) {292if (!LoadLane(dst1, vform, i, addr1) || !LoadLane(dst2, vform, i, addr2) ||293!LoadLane(dst3, vform, i, addr3)) {294return false;295}296addr1 += 3 * esize;297addr2 += 3 * esize;298addr3 += 3 * esize;299}300return true;301}302303304bool Simulator::ld3(VectorFormat vform,305LogicVRegister dst1,306LogicVRegister dst2,307LogicVRegister dst3,308int index,309uint64_t addr1) {310dst1.ClearForWrite(vform);311dst2.ClearForWrite(vform);312dst3.ClearForWrite(vform);313uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);314uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);315return (LoadLane(dst1, vform, index, addr1) &&316LoadLane(dst2, vform, index, addr2) &&317LoadLane(dst3, vform, index, addr3));318}319320321bool Simulator::ld3r(VectorFormat vform,322LogicVRegister dst1,323LogicVRegister dst2,324LogicVRegister dst3,325uint64_t addr) {326dst1.ClearForWrite(vform);327dst2.ClearForWrite(vform);328dst3.ClearForWrite(vform);329uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);330uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);331for (int i = 0; i < LaneCountFromFormat(vform); i++) {332if (!LoadLane(dst1, vform, i, addr) || !LoadLane(dst2, vform, i, addr2) ||333!LoadLane(dst3, vform, i, addr3)) {334return false;335}336}337return true;338}339340341bool Simulator::ld4(VectorFormat vform,342LogicVRegister dst1,343LogicVRegister dst2,344LogicVRegister dst3,345LogicVRegister dst4,346uint64_t addr1) {347dst1.ClearForWrite(vform);348dst2.ClearForWrite(vform);349dst3.ClearForWrite(vform);350dst4.ClearForWrite(vform);351int esize = LaneSizeInBytesFromFormat(vform);352uint64_t addr2 = addr1 + esize;353uint64_t addr3 = addr2 + esize;354uint64_t addr4 = addr3 + esize;355for (int i = 0; i < LaneCountFromFormat(vform); i++) {356if (!LoadLane(dst1, vform, i, addr1) || !LoadLane(dst2, vform, i, addr2) ||357!LoadLane(dst3, vform, i, addr3) || !LoadLane(dst4, vform, i, addr4)) {358return false;359}360addr1 += 4 * esize;361addr2 += 4 * esize;362addr3 += 4 * esize;363addr4 += 4 * esize;364}365return true;366}367368369bool Simulator::ld4(VectorFormat vform,370LogicVRegister dst1,371LogicVRegister dst2,372LogicVRegister dst3,373LogicVRegister dst4,374int index,375uint64_t addr1) {376dst1.ClearForWrite(vform);377dst2.ClearForWrite(vform);378dst3.ClearForWrite(vform);379dst4.ClearForWrite(vform);380uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);381uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);382uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);383return (LoadLane(dst1, vform, index, addr1) &&384LoadLane(dst2, vform, index, addr2) &&385LoadLane(dst3, vform, index, addr3) &&386LoadLane(dst4, vform, index, addr4));387}388389390bool Simulator::ld4r(VectorFormat vform,391LogicVRegister dst1,392LogicVRegister dst2,393LogicVRegister dst3,394LogicVRegister dst4,395uint64_t addr) {396dst1.ClearForWrite(vform);397dst2.ClearForWrite(vform);398dst3.ClearForWrite(vform);399dst4.ClearForWrite(vform);400uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);401uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);402uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);403for (int i = 0; i < LaneCountFromFormat(vform); i++) {404if (!LoadLane(dst1, vform, i, addr) || !LoadLane(dst2, vform, i, addr2) ||405!LoadLane(dst3, vform, i, addr3) || !LoadLane(dst4, vform, i, addr4)) {406return false;407}408}409return true;410}411412413bool Simulator::st1(VectorFormat vform, LogicVRegister src, uint64_t addr) {414for (int i = 0; i < LaneCountFromFormat(vform); i++) {415if (!StoreLane(src, vform, i, addr)) return false;416addr += LaneSizeInBytesFromFormat(vform);417}418return true;419}420421422bool Simulator::st1(VectorFormat vform,423LogicVRegister src,424int index,425uint64_t addr) {426return StoreLane(src, vform, index, addr);427}428429430bool Simulator::st2(VectorFormat vform,431LogicVRegister src,432LogicVRegister src2,433uint64_t addr) {434int esize = LaneSizeInBytesFromFormat(vform);435uint64_t addr2 = addr + esize;436for (int i = 0; i < LaneCountFromFormat(vform); i++) {437if (!StoreLane(src, vform, i, addr) || !StoreLane(src2, vform, i, addr2)) {438return false;439}440addr += 2 * esize;441addr2 += 2 * esize;442}443return true;444}445446447bool Simulator::st2(VectorFormat vform,448LogicVRegister src,449LogicVRegister src2,450int index,451uint64_t addr) {452int esize = LaneSizeInBytesFromFormat(vform);453return (StoreLane(src, vform, index, addr) &&454StoreLane(src2, vform, index, addr + 1 * esize));455}456457458bool Simulator::st3(VectorFormat vform,459LogicVRegister src,460LogicVRegister src2,461LogicVRegister src3,462uint64_t addr) {463int esize = LaneSizeInBytesFromFormat(vform);464uint64_t addr2 = addr + esize;465uint64_t addr3 = addr2 + esize;466for (int i = 0; i < LaneCountFromFormat(vform); i++) {467if (!StoreLane(src, vform, i, addr) || !StoreLane(src2, vform, i, addr2) ||468!StoreLane(src3, vform, i, addr3)) {469return false;470}471addr += 3 * esize;472addr2 += 3 * esize;473addr3 += 3 * esize;474}475return true;476}477478479bool Simulator::st3(VectorFormat vform,480LogicVRegister src,481LogicVRegister src2,482LogicVRegister src3,483int index,484uint64_t addr) {485int esize = LaneSizeInBytesFromFormat(vform);486return (StoreLane(src, vform, index, addr) &&487StoreLane(src2, vform, index, addr + 1 * esize) &&488StoreLane(src3, vform, index, addr + 2 * esize));489}490491492bool Simulator::st4(VectorFormat vform,493LogicVRegister src,494LogicVRegister src2,495LogicVRegister src3,496LogicVRegister src4,497uint64_t addr) {498int esize = LaneSizeInBytesFromFormat(vform);499uint64_t addr2 = addr + esize;500uint64_t addr3 = addr2 + esize;501uint64_t addr4 = addr3 + esize;502for (int i = 0; i < LaneCountFromFormat(vform); i++) {503if (!StoreLane(src, vform, i, addr) || !StoreLane(src2, vform, i, addr2) ||504!StoreLane(src3, vform, i, addr3) ||505!StoreLane(src4, vform, i, addr4)) {506return false;507}508addr += 4 * esize;509addr2 += 4 * esize;510addr3 += 4 * esize;511addr4 += 4 * esize;512}513return true;514}515516517bool Simulator::st4(VectorFormat vform,518LogicVRegister src,519LogicVRegister src2,520LogicVRegister src3,521LogicVRegister src4,522int index,523uint64_t addr) {524int esize = LaneSizeInBytesFromFormat(vform);525return (StoreLane(src, vform, index, addr) &&526StoreLane(src2, vform, index, addr + 1 * esize) &&527StoreLane(src3, vform, index, addr + 2 * esize) &&528StoreLane(src4, vform, index, addr + 3 * esize));529}530531532LogicVRegister Simulator::cmp(VectorFormat vform,533LogicVRegister dst,534const LogicVRegister& src1,535const LogicVRegister& src2,536Condition cond) {537dst.ClearForWrite(vform);538for (int i = 0; i < LaneCountFromFormat(vform); i++) {539int64_t sa = src1.Int(vform, i);540int64_t sb = src2.Int(vform, i);541uint64_t ua = src1.Uint(vform, i);542uint64_t ub = src2.Uint(vform, i);543bool result = false;544switch (cond) {545case eq:546result = (ua == ub);547break;548case ge:549result = (sa >= sb);550break;551case gt:552result = (sa > sb);553break;554case hi:555result = (ua > ub);556break;557case hs:558result = (ua >= ub);559break;560case lt:561result = (sa < sb);562break;563case le:564result = (sa <= sb);565break;566default:567VIXL_UNREACHABLE();568break;569}570dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);571}572return dst;573}574575576LogicVRegister Simulator::cmp(VectorFormat vform,577LogicVRegister dst,578const LogicVRegister& src1,579int imm,580Condition cond) {581SimVRegister temp;582LogicVRegister imm_reg = dup_immediate(vform, temp, imm);583return cmp(vform, dst, src1, imm_reg, cond);584}585586587LogicVRegister Simulator::cmptst(VectorFormat vform,588LogicVRegister dst,589const LogicVRegister& src1,590const LogicVRegister& src2) {591dst.ClearForWrite(vform);592for (int i = 0; i < LaneCountFromFormat(vform); i++) {593uint64_t ua = src1.Uint(vform, i);594uint64_t ub = src2.Uint(vform, i);595dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0);596}597return dst;598}599600601LogicVRegister Simulator::add(VectorFormat vform,602LogicVRegister dst,603const LogicVRegister& src1,604const LogicVRegister& src2) {605int lane_size = LaneSizeInBitsFromFormat(vform);606dst.ClearForWrite(vform);607608for (int i = 0; i < LaneCountFromFormat(vform); i++) {609// Test for unsigned saturation.610uint64_t ua = src1.UintLeftJustified(vform, i);611uint64_t ub = src2.UintLeftJustified(vform, i);612uint64_t ur = ua + ub;613if (ur < ua) {614dst.SetUnsignedSat(i, true);615}616617// Test for signed saturation.618bool pos_a = (ua >> 63) == 0;619bool pos_b = (ub >> 63) == 0;620bool pos_r = (ur >> 63) == 0;621// If the signs of the operands are the same, but different from the result,622// there was an overflow.623if ((pos_a == pos_b) && (pos_a != pos_r)) {624dst.SetSignedSat(i, pos_a);625}626dst.SetInt(vform, i, ur >> (64 - lane_size));627}628return dst;629}630631LogicVRegister Simulator::add_uint(VectorFormat vform,632LogicVRegister dst,633const LogicVRegister& src1,634uint64_t value) {635int lane_size = LaneSizeInBitsFromFormat(vform);636VIXL_ASSERT(IsUintN(lane_size, value));637dst.ClearForWrite(vform);638// Left-justify `value`.639uint64_t ub = value << (64 - lane_size);640for (int i = 0; i < LaneCountFromFormat(vform); i++) {641// Test for unsigned saturation.642uint64_t ua = src1.UintLeftJustified(vform, i);643uint64_t ur = ua + ub;644if (ur < ua) {645dst.SetUnsignedSat(i, true);646}647648// Test for signed saturation.649// `value` is always positive, so we have an overflow if the (signed) result650// is smaller than the first operand.651if (RawbitsToInt64(ur) < RawbitsToInt64(ua)) {652dst.SetSignedSat(i, true);653}654655dst.SetInt(vform, i, ur >> (64 - lane_size));656}657return dst;658}659660LogicVRegister Simulator::addp(VectorFormat vform,661LogicVRegister dst,662const LogicVRegister& src1,663const LogicVRegister& src2) {664SimVRegister temp1, temp2;665uzp1(vform, temp1, src1, src2);666uzp2(vform, temp2, src1, src2);667add(vform, dst, temp1, temp2);668if (IsSVEFormat(vform)) {669interleave_top_bottom(vform, dst, dst);670}671return dst;672}673674LogicVRegister Simulator::sdiv(VectorFormat vform,675LogicVRegister dst,676const LogicVRegister& src1,677const LogicVRegister& src2) {678VIXL_ASSERT((vform == kFormatVnS) || (vform == kFormatVnD));679680for (int i = 0; i < LaneCountFromFormat(vform); i++) {681int64_t val1 = src1.Int(vform, i);682int64_t val2 = src2.Int(vform, i);683int64_t min_int = (vform == kFormatVnD) ? kXMinInt : kWMinInt;684int64_t quotient = 0;685if ((val1 == min_int) && (val2 == -1)) {686quotient = min_int;687} else if (val2 != 0) {688quotient = val1 / val2;689}690dst.SetInt(vform, i, quotient);691}692693return dst;694}695696LogicVRegister Simulator::udiv(VectorFormat vform,697LogicVRegister dst,698const LogicVRegister& src1,699const LogicVRegister& src2) {700VIXL_ASSERT((vform == kFormatVnS) || (vform == kFormatVnD));701702for (int i = 0; i < LaneCountFromFormat(vform); i++) {703uint64_t val1 = src1.Uint(vform, i);704uint64_t val2 = src2.Uint(vform, i);705uint64_t quotient = 0;706if (val2 != 0) {707quotient = val1 / val2;708}709dst.SetUint(vform, i, quotient);710}711712return dst;713}714715716LogicVRegister Simulator::mla(VectorFormat vform,717LogicVRegister dst,718const LogicVRegister& srca,719const LogicVRegister& src1,720const LogicVRegister& src2) {721SimVRegister temp;722mul(vform, temp, src1, src2);723add(vform, dst, srca, temp);724return dst;725}726727728LogicVRegister Simulator::mls(VectorFormat vform,729LogicVRegister dst,730const LogicVRegister& srca,731const LogicVRegister& src1,732const LogicVRegister& src2) {733SimVRegister temp;734mul(vform, temp, src1, src2);735sub(vform, dst, srca, temp);736return dst;737}738739740LogicVRegister Simulator::mul(VectorFormat vform,741LogicVRegister dst,742const LogicVRegister& src1,743const LogicVRegister& src2) {744dst.ClearForWrite(vform);745746for (int i = 0; i < LaneCountFromFormat(vform); i++) {747dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i));748}749return dst;750}751752753LogicVRegister Simulator::mul(VectorFormat vform,754LogicVRegister dst,755const LogicVRegister& src1,756const LogicVRegister& src2,757int index) {758SimVRegister temp;759VectorFormat indexform = VectorFormatFillQ(vform);760return mul(vform, dst, src1, dup_element(indexform, temp, src2, index));761}762763764LogicVRegister Simulator::smulh(VectorFormat vform,765LogicVRegister dst,766const LogicVRegister& src1,767const LogicVRegister& src2) {768for (int i = 0; i < LaneCountFromFormat(vform); i++) {769int64_t dst_val = 0xbadbeef;770int64_t val1 = src1.Int(vform, i);771int64_t val2 = src2.Int(vform, i);772switch (LaneSizeInBitsFromFormat(vform)) {773case 8:774dst_val = internal::MultiplyHigh<8>(val1, val2);775break;776case 16:777dst_val = internal::MultiplyHigh<16>(val1, val2);778break;779case 32:780dst_val = internal::MultiplyHigh<32>(val1, val2);781break;782case 64:783dst_val = internal::MultiplyHigh<64>(val1, val2);784break;785default:786VIXL_UNREACHABLE();787break;788}789dst.SetInt(vform, i, dst_val);790}791return dst;792}793794795LogicVRegister Simulator::umulh(VectorFormat vform,796LogicVRegister dst,797const LogicVRegister& src1,798const LogicVRegister& src2) {799for (int i = 0; i < LaneCountFromFormat(vform); i++) {800uint64_t dst_val = 0xbadbeef;801uint64_t val1 = src1.Uint(vform, i);802uint64_t val2 = src2.Uint(vform, i);803switch (LaneSizeInBitsFromFormat(vform)) {804case 8:805dst_val = internal::MultiplyHigh<8>(val1, val2);806break;807case 16:808dst_val = internal::MultiplyHigh<16>(val1, val2);809break;810case 32:811dst_val = internal::MultiplyHigh<32>(val1, val2);812break;813case 64:814dst_val = internal::MultiplyHigh<64>(val1, val2);815break;816default:817VIXL_UNREACHABLE();818break;819}820dst.SetUint(vform, i, dst_val);821}822return dst;823}824825826LogicVRegister Simulator::mla(VectorFormat vform,827LogicVRegister dst,828const LogicVRegister& src1,829const LogicVRegister& src2,830int index) {831SimVRegister temp;832VectorFormat indexform = VectorFormatFillQ(vform);833return mla(vform, dst, dst, src1, dup_element(indexform, temp, src2, index));834}835836837LogicVRegister Simulator::mls(VectorFormat vform,838LogicVRegister dst,839const LogicVRegister& src1,840const LogicVRegister& src2,841int index) {842SimVRegister temp;843VectorFormat indexform = VectorFormatFillQ(vform);844return mls(vform, dst, dst, src1, dup_element(indexform, temp, src2, index));845}846847LogicVRegister Simulator::sqdmull(VectorFormat vform,848LogicVRegister dst,849const LogicVRegister& src1,850const LogicVRegister& src2,851int index) {852SimVRegister temp;853VectorFormat indexform =854VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));855return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index));856}857858LogicVRegister Simulator::sqdmlal(VectorFormat vform,859LogicVRegister dst,860const LogicVRegister& src1,861const LogicVRegister& src2,862int index) {863SimVRegister temp;864VectorFormat indexform =865VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));866return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index));867}868869LogicVRegister Simulator::sqdmlsl(VectorFormat vform,870LogicVRegister dst,871const LogicVRegister& src1,872const LogicVRegister& src2,873int index) {874SimVRegister temp;875VectorFormat indexform =876VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));877return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));878}879880LogicVRegister Simulator::sqdmulh(VectorFormat vform,881LogicVRegister dst,882const LogicVRegister& src1,883const LogicVRegister& src2,884int index) {885SimVRegister temp;886VectorFormat indexform = VectorFormatFillQ(vform);887return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));888}889890891LogicVRegister Simulator::sqrdmulh(VectorFormat vform,892LogicVRegister dst,893const LogicVRegister& src1,894const LogicVRegister& src2,895int index) {896SimVRegister temp;897VectorFormat indexform = VectorFormatFillQ(vform);898return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));899}900901902LogicVRegister Simulator::sqrdmlah(VectorFormat vform,903LogicVRegister dst,904const LogicVRegister& src1,905const LogicVRegister& src2,906int index) {907SimVRegister temp;908VectorFormat indexform = VectorFormatFillQ(vform);909return sqrdmlah(vform, dst, src1, dup_element(indexform, temp, src2, index));910}911912913LogicVRegister Simulator::sqrdmlsh(VectorFormat vform,914LogicVRegister dst,915const LogicVRegister& src1,916const LogicVRegister& src2,917int index) {918SimVRegister temp;919VectorFormat indexform = VectorFormatFillQ(vform);920return sqrdmlsh(vform, dst, src1, dup_element(indexform, temp, src2, index));921}922923uint64_t Simulator::PolynomialMult(uint64_t op1,924uint64_t op2,925int lane_size_in_bits) const {926return PolynomialMult128(op1, op2, lane_size_in_bits).second;927}928929LogicVRegister Simulator::pmul(VectorFormat vform,930LogicVRegister dst,931const LogicVRegister& src1,932const LogicVRegister& src2) {933dst.ClearForWrite(vform);934for (int i = 0; i < LaneCountFromFormat(vform); i++) {935dst.SetUint(vform,936i,937PolynomialMult(src1.Uint(vform, i),938src2.Uint(vform, i),939LaneSizeInBitsFromFormat(vform)));940}941return dst;942}943944945LogicVRegister Simulator::pmull(VectorFormat vform,946LogicVRegister dst,947const LogicVRegister& src1,948const LogicVRegister& src2) {949dst.ClearForWrite(vform);950VectorFormat vform_src = VectorFormatHalfWidth(vform);951952// Process the elements in reverse to avoid problems when the destination953// register is the same as a source.954for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {955dst.SetUint(vform,956i,957PolynomialMult128(src1.Uint(vform_src, i),958src2.Uint(vform_src, i),959LaneSizeInBitsFromFormat(vform_src)));960}961962return dst;963}964965966LogicVRegister Simulator::pmull2(VectorFormat vform,967LogicVRegister dst,968const LogicVRegister& src1,969const LogicVRegister& src2) {970dst.ClearForWrite(vform);971VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform);972973int lane_count = LaneCountFromFormat(vform);974for (int i = 0; i < lane_count; i++) {975dst.SetUint(vform,976i,977PolynomialMult128(src1.Uint(vform_src, lane_count + i),978src2.Uint(vform_src, lane_count + i),979LaneSizeInBitsFromFormat(vform_src)));980}981982return dst;983}984985986LogicVRegister Simulator::sub(VectorFormat vform,987LogicVRegister dst,988const LogicVRegister& src1,989const LogicVRegister& src2) {990int lane_size = LaneSizeInBitsFromFormat(vform);991dst.ClearForWrite(vform);992for (int i = 0; i < LaneCountFromFormat(vform); i++) {993// Test for unsigned saturation.994uint64_t ua = src1.UintLeftJustified(vform, i);995uint64_t ub = src2.UintLeftJustified(vform, i);996uint64_t ur = ua - ub;997if (ub > ua) {998dst.SetUnsignedSat(i, false);999}10001001// Test for signed saturation.1002bool pos_a = (ua >> 63) == 0;1003bool pos_b = (ub >> 63) == 0;1004bool pos_r = (ur >> 63) == 0;1005// If the signs of the operands are different, and the sign of the first1006// operand doesn't match the result, there was an overflow.1007if ((pos_a != pos_b) && (pos_a != pos_r)) {1008dst.SetSignedSat(i, pos_a);1009}10101011dst.SetInt(vform, i, ur >> (64 - lane_size));1012}1013return dst;1014}10151016LogicVRegister Simulator::sub_uint(VectorFormat vform,1017LogicVRegister dst,1018const LogicVRegister& src1,1019uint64_t value) {1020int lane_size = LaneSizeInBitsFromFormat(vform);1021VIXL_ASSERT(IsUintN(lane_size, value));1022dst.ClearForWrite(vform);1023// Left-justify `value`.1024uint64_t ub = value << (64 - lane_size);1025for (int i = 0; i < LaneCountFromFormat(vform); i++) {1026// Test for unsigned saturation.1027uint64_t ua = src1.UintLeftJustified(vform, i);1028uint64_t ur = ua - ub;1029if (ub > ua) {1030dst.SetUnsignedSat(i, false);1031}10321033// Test for signed saturation.1034// `value` is always positive, so we have an overflow if the (signed) result1035// is greater than the first operand.1036if (RawbitsToInt64(ur) > RawbitsToInt64(ua)) {1037dst.SetSignedSat(i, false);1038}10391040dst.SetInt(vform, i, ur >> (64 - lane_size));1041}1042return dst;1043}10441045LogicVRegister Simulator::and_(VectorFormat vform,1046LogicVRegister dst,1047const LogicVRegister& src1,1048const LogicVRegister& src2) {1049dst.ClearForWrite(vform);1050for (int i = 0; i < LaneCountFromFormat(vform); i++) {1051dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i));1052}1053return dst;1054}105510561057LogicVRegister Simulator::orr(VectorFormat vform,1058LogicVRegister dst,1059const LogicVRegister& src1,1060const LogicVRegister& src2) {1061dst.ClearForWrite(vform);1062for (int i = 0; i < LaneCountFromFormat(vform); i++) {1063dst.SetUint(vform, i, src1.Uint(vform, i) | src2.Uint(vform, i));1064}1065return dst;1066}106710681069LogicVRegister Simulator::orn(VectorFormat vform,1070LogicVRegister dst,1071const LogicVRegister& src1,1072const LogicVRegister& src2) {1073dst.ClearForWrite(vform);1074for (int i = 0; i < LaneCountFromFormat(vform); i++) {1075dst.SetUint(vform, i, src1.Uint(vform, i) | ~src2.Uint(vform, i));1076}1077return dst;1078}107910801081LogicVRegister Simulator::eor(VectorFormat vform,1082LogicVRegister dst,1083const LogicVRegister& src1,1084const LogicVRegister& src2) {1085dst.ClearForWrite(vform);1086for (int i = 0; i < LaneCountFromFormat(vform); i++) {1087dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i));1088}1089return dst;1090}109110921093LogicVRegister Simulator::bic(VectorFormat vform,1094LogicVRegister dst,1095const LogicVRegister& src1,1096const LogicVRegister& src2) {1097dst.ClearForWrite(vform);1098for (int i = 0; i < LaneCountFromFormat(vform); i++) {1099dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i));1100}1101return dst;1102}110311041105LogicVRegister Simulator::bic(VectorFormat vform,1106LogicVRegister dst,1107const LogicVRegister& src,1108uint64_t imm) {1109uint64_t result[16];1110int lane_count = LaneCountFromFormat(vform);1111for (int i = 0; i < lane_count; ++i) {1112result[i] = src.Uint(vform, i) & ~imm;1113}1114dst.ClearForWrite(vform);1115for (int i = 0; i < lane_count; ++i) {1116dst.SetUint(vform, i, result[i]);1117}1118return dst;1119}112011211122LogicVRegister Simulator::bif(VectorFormat vform,1123LogicVRegister dst,1124const LogicVRegister& src1,1125const LogicVRegister& src2) {1126dst.ClearForWrite(vform);1127for (int i = 0; i < LaneCountFromFormat(vform); i++) {1128uint64_t operand1 = dst.Uint(vform, i);1129uint64_t operand2 = ~src2.Uint(vform, i);1130uint64_t operand3 = src1.Uint(vform, i);1131uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);1132dst.SetUint(vform, i, result);1133}1134return dst;1135}113611371138LogicVRegister Simulator::bit(VectorFormat vform,1139LogicVRegister dst,1140const LogicVRegister& src1,1141const LogicVRegister& src2) {1142dst.ClearForWrite(vform);1143for (int i = 0; i < LaneCountFromFormat(vform); i++) {1144uint64_t operand1 = dst.Uint(vform, i);1145uint64_t operand2 = src2.Uint(vform, i);1146uint64_t operand3 = src1.Uint(vform, i);1147uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);1148dst.SetUint(vform, i, result);1149}1150return dst;1151}115211531154LogicVRegister Simulator::bsl(VectorFormat vform,1155LogicVRegister dst,1156const LogicVRegister& src_mask,1157const LogicVRegister& src1,1158const LogicVRegister& src2) {1159dst.ClearForWrite(vform);1160for (int i = 0; i < LaneCountFromFormat(vform); i++) {1161uint64_t operand1 = src2.Uint(vform, i);1162uint64_t operand2 = src_mask.Uint(vform, i);1163uint64_t operand3 = src1.Uint(vform, i);1164uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);1165dst.SetUint(vform, i, result);1166}1167return dst;1168}116911701171LogicVRegister Simulator::sminmax(VectorFormat vform,1172LogicVRegister dst,1173const LogicVRegister& src1,1174const LogicVRegister& src2,1175bool max) {1176dst.ClearForWrite(vform);1177for (int i = 0; i < LaneCountFromFormat(vform); i++) {1178int64_t src1_val = src1.Int(vform, i);1179int64_t src2_val = src2.Int(vform, i);1180int64_t dst_val;1181if (max) {1182dst_val = (src1_val > src2_val) ? src1_val : src2_val;1183} else {1184dst_val = (src1_val < src2_val) ? src1_val : src2_val;1185}1186dst.SetInt(vform, i, dst_val);1187}1188return dst;1189}119011911192LogicVRegister Simulator::smax(VectorFormat vform,1193LogicVRegister dst,1194const LogicVRegister& src1,1195const LogicVRegister& src2) {1196return sminmax(vform, dst, src1, src2, true);1197}119811991200LogicVRegister Simulator::smin(VectorFormat vform,1201LogicVRegister dst,1202const LogicVRegister& src1,1203const LogicVRegister& src2) {1204return sminmax(vform, dst, src1, src2, false);1205}120612071208LogicVRegister Simulator::sminmaxp(VectorFormat vform,1209LogicVRegister dst,1210const LogicVRegister& src1,1211const LogicVRegister& src2,1212bool max) {1213unsigned lanes = LaneCountFromFormat(vform);1214int64_t result[kZRegMaxSizeInBytes];1215const LogicVRegister* src = &src1;1216for (unsigned j = 0; j < 2; j++) {1217for (unsigned i = 0; i < lanes; i += 2) {1218int64_t first_val = src->Int(vform, i);1219int64_t second_val = src->Int(vform, i + 1);1220int64_t dst_val;1221if (max) {1222dst_val = (first_val > second_val) ? first_val : second_val;1223} else {1224dst_val = (first_val < second_val) ? first_val : second_val;1225}1226VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < ArrayLength(result));1227result[(i >> 1) + (j * lanes / 2)] = dst_val;1228}1229src = &src2;1230}1231dst.SetIntArray(vform, result);1232if (IsSVEFormat(vform)) {1233interleave_top_bottom(vform, dst, dst);1234}1235return dst;1236}123712381239LogicVRegister Simulator::smaxp(VectorFormat vform,1240LogicVRegister dst,1241const LogicVRegister& src1,1242const LogicVRegister& src2) {1243return sminmaxp(vform, dst, src1, src2, true);1244}124512461247LogicVRegister Simulator::sminp(VectorFormat vform,1248LogicVRegister dst,1249const LogicVRegister& src1,1250const LogicVRegister& src2) {1251return sminmaxp(vform, dst, src1, src2, false);1252}125312541255LogicVRegister Simulator::addp(VectorFormat vform,1256LogicVRegister dst,1257const LogicVRegister& src) {1258VIXL_ASSERT(vform == kFormatD);12591260uint64_t dst_val = src.Uint(kFormat2D, 0) + src.Uint(kFormat2D, 1);1261dst.ClearForWrite(vform);1262dst.SetUint(vform, 0, dst_val);1263return dst;1264}126512661267LogicVRegister Simulator::addv(VectorFormat vform,1268LogicVRegister dst,1269const LogicVRegister& src) {1270VectorFormat vform_dst =1271ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));127212731274int64_t dst_val = 0;1275for (int i = 0; i < LaneCountFromFormat(vform); i++) {1276dst_val += src.Int(vform, i);1277}12781279dst.ClearForWrite(vform_dst);1280dst.SetInt(vform_dst, 0, dst_val);1281return dst;1282}128312841285LogicVRegister Simulator::saddlv(VectorFormat vform,1286LogicVRegister dst,1287const LogicVRegister& src) {1288VectorFormat vform_dst =1289ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);12901291int64_t dst_val = 0;1292for (int i = 0; i < LaneCountFromFormat(vform); i++) {1293dst_val += src.Int(vform, i);1294}12951296dst.ClearForWrite(vform_dst);1297dst.SetInt(vform_dst, 0, dst_val);1298return dst;1299}130013011302LogicVRegister Simulator::uaddlv(VectorFormat vform,1303LogicVRegister dst,1304const LogicVRegister& src) {1305VectorFormat vform_dst =1306ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);13071308uint64_t dst_val = 0;1309for (int i = 0; i < LaneCountFromFormat(vform); i++) {1310dst_val += src.Uint(vform, i);1311}13121313dst.ClearForWrite(vform_dst);1314dst.SetUint(vform_dst, 0, dst_val);1315return dst;1316}131713181319LogicVRegister Simulator::sminmaxv(VectorFormat vform,1320LogicVRegister dst,1321const LogicPRegister& pg,1322const LogicVRegister& src,1323bool max) {1324int64_t dst_val = max ? INT64_MIN : INT64_MAX;1325for (int i = 0; i < LaneCountFromFormat(vform); i++) {1326if (!pg.IsActive(vform, i)) continue;13271328int64_t src_val = src.Int(vform, i);1329if (max) {1330dst_val = (src_val > dst_val) ? src_val : dst_val;1331} else {1332dst_val = (src_val < dst_val) ? src_val : dst_val;1333}1334}1335dst.ClearForWrite(ScalarFormatFromFormat(vform));1336dst.SetInt(vform, 0, dst_val);1337return dst;1338}133913401341LogicVRegister Simulator::smaxv(VectorFormat vform,1342LogicVRegister dst,1343const LogicVRegister& src) {1344sminmaxv(vform, dst, GetPTrue(), src, true);1345return dst;1346}134713481349LogicVRegister Simulator::sminv(VectorFormat vform,1350LogicVRegister dst,1351const LogicVRegister& src) {1352sminmaxv(vform, dst, GetPTrue(), src, false);1353return dst;1354}135513561357LogicVRegister Simulator::smaxv(VectorFormat vform,1358LogicVRegister dst,1359const LogicPRegister& pg,1360const LogicVRegister& src) {1361VIXL_ASSERT(IsSVEFormat(vform));1362sminmaxv(vform, dst, pg, src, true);1363return dst;1364}136513661367LogicVRegister Simulator::sminv(VectorFormat vform,1368LogicVRegister dst,1369const LogicPRegister& pg,1370const LogicVRegister& src) {1371VIXL_ASSERT(IsSVEFormat(vform));1372sminmaxv(vform, dst, pg, src, false);1373return dst;1374}137513761377LogicVRegister Simulator::uminmax(VectorFormat vform,1378LogicVRegister dst,1379const LogicVRegister& src1,1380const LogicVRegister& src2,1381bool max) {1382dst.ClearForWrite(vform);1383for (int i = 0; i < LaneCountFromFormat(vform); i++) {1384uint64_t src1_val = src1.Uint(vform, i);1385uint64_t src2_val = src2.Uint(vform, i);1386uint64_t dst_val;1387if (max) {1388dst_val = (src1_val > src2_val) ? src1_val : src2_val;1389} else {1390dst_val = (src1_val < src2_val) ? src1_val : src2_val;1391}1392dst.SetUint(vform, i, dst_val);1393}1394return dst;1395}139613971398LogicVRegister Simulator::umax(VectorFormat vform,1399LogicVRegister dst,1400const LogicVRegister& src1,1401const LogicVRegister& src2) {1402return uminmax(vform, dst, src1, src2, true);1403}140414051406LogicVRegister Simulator::umin(VectorFormat vform,1407LogicVRegister dst,1408const LogicVRegister& src1,1409const LogicVRegister& src2) {1410return uminmax(vform, dst, src1, src2, false);1411}141214131414LogicVRegister Simulator::uminmaxp(VectorFormat vform,1415LogicVRegister dst,1416const LogicVRegister& src1,1417const LogicVRegister& src2,1418bool max) {1419unsigned lanes = LaneCountFromFormat(vform);1420uint64_t result[kZRegMaxSizeInBytes];1421const LogicVRegister* src = &src1;1422for (unsigned j = 0; j < 2; j++) {1423for (unsigned i = 0; i < lanes; i += 2) {1424uint64_t first_val = src->Uint(vform, i);1425uint64_t second_val = src->Uint(vform, i + 1);1426uint64_t dst_val;1427if (max) {1428dst_val = (first_val > second_val) ? first_val : second_val;1429} else {1430dst_val = (first_val < second_val) ? first_val : second_val;1431}1432VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < ArrayLength(result));1433result[(i >> 1) + (j * lanes / 2)] = dst_val;1434}1435src = &src2;1436}1437dst.SetUintArray(vform, result);1438if (IsSVEFormat(vform)) {1439interleave_top_bottom(vform, dst, dst);1440}1441return dst;1442}144314441445LogicVRegister Simulator::umaxp(VectorFormat vform,1446LogicVRegister dst,1447const LogicVRegister& src1,1448const LogicVRegister& src2) {1449return uminmaxp(vform, dst, src1, src2, true);1450}145114521453LogicVRegister Simulator::uminp(VectorFormat vform,1454LogicVRegister dst,1455const LogicVRegister& src1,1456const LogicVRegister& src2) {1457return uminmaxp(vform, dst, src1, src2, false);1458}145914601461LogicVRegister Simulator::uminmaxv(VectorFormat vform,1462LogicVRegister dst,1463const LogicPRegister& pg,1464const LogicVRegister& src,1465bool max) {1466uint64_t dst_val = max ? 0 : UINT64_MAX;1467for (int i = 0; i < LaneCountFromFormat(vform); i++) {1468if (!pg.IsActive(vform, i)) continue;14691470uint64_t src_val = src.Uint(vform, i);1471if (max) {1472dst_val = (src_val > dst_val) ? src_val : dst_val;1473} else {1474dst_val = (src_val < dst_val) ? src_val : dst_val;1475}1476}1477dst.ClearForWrite(ScalarFormatFromFormat(vform));1478dst.SetUint(vform, 0, dst_val);1479return dst;1480}148114821483LogicVRegister Simulator::umaxv(VectorFormat vform,1484LogicVRegister dst,1485const LogicVRegister& src) {1486uminmaxv(vform, dst, GetPTrue(), src, true);1487return dst;1488}148914901491LogicVRegister Simulator::uminv(VectorFormat vform,1492LogicVRegister dst,1493const LogicVRegister& src) {1494uminmaxv(vform, dst, GetPTrue(), src, false);1495return dst;1496}149714981499LogicVRegister Simulator::umaxv(VectorFormat vform,1500LogicVRegister dst,1501const LogicPRegister& pg,1502const LogicVRegister& src) {1503VIXL_ASSERT(IsSVEFormat(vform));1504uminmaxv(vform, dst, pg, src, true);1505return dst;1506}150715081509LogicVRegister Simulator::uminv(VectorFormat vform,1510LogicVRegister dst,1511const LogicPRegister& pg,1512const LogicVRegister& src) {1513VIXL_ASSERT(IsSVEFormat(vform));1514uminmaxv(vform, dst, pg, src, false);1515return dst;1516}151715181519LogicVRegister Simulator::shl(VectorFormat vform,1520LogicVRegister dst,1521const LogicVRegister& src,1522int shift) {1523VIXL_ASSERT(shift >= 0);1524SimVRegister temp;1525LogicVRegister shiftreg = dup_immediate(vform, temp, shift);1526return ushl(vform, dst, src, shiftreg);1527}152815291530LogicVRegister Simulator::sshll(VectorFormat vform,1531LogicVRegister dst,1532const LogicVRegister& src,1533int shift) {1534VIXL_ASSERT(shift >= 0);1535SimVRegister temp1, temp2;1536LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);1537LogicVRegister extendedreg = sxtl(vform, temp2, src);1538return sshl(vform, dst, extendedreg, shiftreg);1539}154015411542LogicVRegister Simulator::sshll2(VectorFormat vform,1543LogicVRegister dst,1544const LogicVRegister& src,1545int shift) {1546VIXL_ASSERT(shift >= 0);1547SimVRegister temp1, temp2;1548LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);1549LogicVRegister extendedreg = sxtl2(vform, temp2, src);1550return sshl(vform, dst, extendedreg, shiftreg);1551}155215531554LogicVRegister Simulator::shll(VectorFormat vform,1555LogicVRegister dst,1556const LogicVRegister& src) {1557int shift = LaneSizeInBitsFromFormat(vform) / 2;1558return sshll(vform, dst, src, shift);1559}156015611562LogicVRegister Simulator::shll2(VectorFormat vform,1563LogicVRegister dst,1564const LogicVRegister& src) {1565int shift = LaneSizeInBitsFromFormat(vform) / 2;1566return sshll2(vform, dst, src, shift);1567}156815691570LogicVRegister Simulator::ushll(VectorFormat vform,1571LogicVRegister dst,1572const LogicVRegister& src,1573int shift) {1574VIXL_ASSERT(shift >= 0);1575SimVRegister temp1, temp2;1576LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);1577LogicVRegister extendedreg = uxtl(vform, temp2, src);1578return ushl(vform, dst, extendedreg, shiftreg);1579}158015811582LogicVRegister Simulator::ushll2(VectorFormat vform,1583LogicVRegister dst,1584const LogicVRegister& src,1585int shift) {1586VIXL_ASSERT(shift >= 0);1587SimVRegister temp1, temp2;1588LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);1589LogicVRegister extendedreg = uxtl2(vform, temp2, src);1590return ushl(vform, dst, extendedreg, shiftreg);1591}15921593std::pair<bool, uint64_t> Simulator::clast(VectorFormat vform,1594const LogicPRegister& pg,1595const LogicVRegister& src,1596int offset_from_last_active) {1597// Untested for any other values.1598VIXL_ASSERT((offset_from_last_active == 0) || (offset_from_last_active == 1));15991600int last_active = GetLastActive(vform, pg);1601int lane_count = LaneCountFromFormat(vform);1602int index =1603((last_active + offset_from_last_active) + lane_count) % lane_count;1604return std::make_pair(last_active >= 0, src.Uint(vform, index));1605}16061607LogicVRegister Simulator::compact(VectorFormat vform,1608LogicVRegister dst,1609const LogicPRegister& pg,1610const LogicVRegister& src) {1611int j = 0;1612for (int i = 0; i < LaneCountFromFormat(vform); i++) {1613if (pg.IsActive(vform, i)) {1614dst.SetUint(vform, j++, src.Uint(vform, i));1615}1616}1617for (; j < LaneCountFromFormat(vform); j++) {1618dst.SetUint(vform, j, 0);1619}1620return dst;1621}16221623LogicVRegister Simulator::splice(VectorFormat vform,1624LogicVRegister dst,1625const LogicPRegister& pg,1626const LogicVRegister& src1,1627const LogicVRegister& src2) {1628int lane_count = LaneCountFromFormat(vform);1629int first_active = GetFirstActive(vform, pg);1630int last_active = GetLastActive(vform, pg);1631int dst_idx = 0;1632uint64_t result[kZRegMaxSizeInBytes];16331634if (first_active >= 0) {1635VIXL_ASSERT(last_active >= first_active);1636VIXL_ASSERT(last_active < lane_count);1637for (int i = first_active; i <= last_active; i++) {1638result[dst_idx++] = src1.Uint(vform, i);1639}1640}16411642VIXL_ASSERT(dst_idx <= lane_count);1643for (int i = dst_idx; i < lane_count; i++) {1644result[i] = src2.Uint(vform, i - dst_idx);1645}16461647dst.SetUintArray(vform, result);16481649return dst;1650}16511652LogicVRegister Simulator::sel(VectorFormat vform,1653LogicVRegister dst,1654const SimPRegister& pg,1655const LogicVRegister& src1,1656const LogicVRegister& src2) {1657int p_reg_bits_per_lane =1658LaneSizeInBitsFromFormat(vform) / kZRegBitsPerPRegBit;1659for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {1660uint64_t lane_value = pg.GetBit(lane * p_reg_bits_per_lane)1661? src1.Uint(vform, lane)1662: src2.Uint(vform, lane);1663dst.SetUint(vform, lane, lane_value);1664}1665return dst;1666}166716681669LogicPRegister Simulator::sel(LogicPRegister dst,1670const LogicPRegister& pg,1671const LogicPRegister& src1,1672const LogicPRegister& src2) {1673for (int i = 0; i < dst.GetChunkCount(); i++) {1674LogicPRegister::ChunkType mask = pg.GetChunk(i);1675LogicPRegister::ChunkType result =1676(mask & src1.GetChunk(i)) | (~mask & src2.GetChunk(i));1677dst.SetChunk(i, result);1678}1679return dst;1680}168116821683LogicVRegister Simulator::sli(VectorFormat vform,1684LogicVRegister dst,1685const LogicVRegister& src,1686int shift) {1687dst.ClearForWrite(vform);1688int lane_count = LaneCountFromFormat(vform);1689for (int i = 0; i < lane_count; i++) {1690uint64_t src_lane = src.Uint(vform, i);1691uint64_t dst_lane = dst.Uint(vform, i);1692uint64_t shifted = src_lane << shift;1693uint64_t mask = MaxUintFromFormat(vform) << shift;1694dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);1695}1696return dst;1697}169816991700LogicVRegister Simulator::sqshl(VectorFormat vform,1701LogicVRegister dst,1702const LogicVRegister& src,1703int shift) {1704VIXL_ASSERT(shift >= 0);1705SimVRegister temp;1706LogicVRegister shiftreg = dup_immediate(vform, temp, shift);1707return sshl(vform, dst, src, shiftreg).SignedSaturate(vform);1708}170917101711LogicVRegister Simulator::uqshl(VectorFormat vform,1712LogicVRegister dst,1713const LogicVRegister& src,1714int shift) {1715VIXL_ASSERT(shift >= 0);1716SimVRegister temp;1717LogicVRegister shiftreg = dup_immediate(vform, temp, shift);1718return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform);1719}172017211722LogicVRegister Simulator::sqshlu(VectorFormat vform,1723LogicVRegister dst,1724const LogicVRegister& src,1725int shift) {1726VIXL_ASSERT(shift >= 0);1727SimVRegister temp;1728LogicVRegister shiftreg = dup_immediate(vform, temp, shift);1729return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform);1730}173117321733LogicVRegister Simulator::sri(VectorFormat vform,1734LogicVRegister dst,1735const LogicVRegister& src,1736int shift) {1737dst.ClearForWrite(vform);1738int lane_count = LaneCountFromFormat(vform);1739VIXL_ASSERT((shift > 0) &&1740(shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform))));1741for (int i = 0; i < lane_count; i++) {1742uint64_t src_lane = src.Uint(vform, i);1743uint64_t dst_lane = dst.Uint(vform, i);1744uint64_t shifted;1745uint64_t mask;1746if (shift == 64) {1747shifted = 0;1748mask = 0;1749} else {1750shifted = src_lane >> shift;1751mask = MaxUintFromFormat(vform) >> shift;1752}1753dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);1754}1755return dst;1756}175717581759LogicVRegister Simulator::ushr(VectorFormat vform,1760LogicVRegister dst,1761const LogicVRegister& src,1762int shift) {1763VIXL_ASSERT(shift >= 0);1764SimVRegister temp;1765LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);1766return ushl(vform, dst, src, shiftreg);1767}176817691770LogicVRegister Simulator::sshr(VectorFormat vform,1771LogicVRegister dst,1772const LogicVRegister& src,1773int shift) {1774VIXL_ASSERT(shift >= 0);1775SimVRegister temp;1776LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);1777return sshl(vform, dst, src, shiftreg);1778}177917801781LogicVRegister Simulator::ssra(VectorFormat vform,1782LogicVRegister dst,1783const LogicVRegister& src,1784int shift) {1785SimVRegister temp;1786LogicVRegister shifted_reg = sshr(vform, temp, src, shift);1787return add(vform, dst, dst, shifted_reg);1788}178917901791LogicVRegister Simulator::usra(VectorFormat vform,1792LogicVRegister dst,1793const LogicVRegister& src,1794int shift) {1795SimVRegister temp;1796LogicVRegister shifted_reg = ushr(vform, temp, src, shift);1797return add(vform, dst, dst, shifted_reg);1798}179918001801LogicVRegister Simulator::srsra(VectorFormat vform,1802LogicVRegister dst,1803const LogicVRegister& src,1804int shift) {1805SimVRegister temp;1806LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform);1807return add(vform, dst, dst, shifted_reg);1808}180918101811LogicVRegister Simulator::ursra(VectorFormat vform,1812LogicVRegister dst,1813const LogicVRegister& src,1814int shift) {1815SimVRegister temp;1816LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform);1817return add(vform, dst, dst, shifted_reg);1818}181918201821LogicVRegister Simulator::cls(VectorFormat vform,1822LogicVRegister dst,1823const LogicVRegister& src) {1824int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);1825int lane_count = LaneCountFromFormat(vform);18261827// Ensure that we can store one result per lane.1828int result[kZRegMaxSizeInBytes];18291830for (int i = 0; i < lane_count; i++) {1831result[i] = CountLeadingSignBits(src.Int(vform, i), lane_size_in_bits);1832}18331834dst.ClearForWrite(vform);1835for (int i = 0; i < lane_count; ++i) {1836dst.SetUint(vform, i, result[i]);1837}1838return dst;1839}184018411842LogicVRegister Simulator::clz(VectorFormat vform,1843LogicVRegister dst,1844const LogicVRegister& src) {1845int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);1846int lane_count = LaneCountFromFormat(vform);18471848// Ensure that we can store one result per lane.1849int result[kZRegMaxSizeInBytes];18501851for (int i = 0; i < lane_count; i++) {1852result[i] = CountLeadingZeros(src.Uint(vform, i), lane_size_in_bits);1853}18541855dst.ClearForWrite(vform);1856for (int i = 0; i < lane_count; ++i) {1857dst.SetUint(vform, i, result[i]);1858}1859return dst;1860}186118621863LogicVRegister Simulator::cnot(VectorFormat vform,1864LogicVRegister dst,1865const LogicVRegister& src) {1866dst.ClearForWrite(vform);1867for (int i = 0; i < LaneCountFromFormat(vform); i++) {1868uint64_t value = (src.Uint(vform, i) == 0) ? 1 : 0;1869dst.SetUint(vform, i, value);1870}1871return dst;1872}187318741875LogicVRegister Simulator::cnt(VectorFormat vform,1876LogicVRegister dst,1877const LogicVRegister& src) {1878int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);1879int lane_count = LaneCountFromFormat(vform);18801881// Ensure that we can store one result per lane.1882int result[kZRegMaxSizeInBytes];18831884for (int i = 0; i < lane_count; i++) {1885result[i] = CountSetBits(src.Uint(vform, i), lane_size_in_bits);1886}18871888dst.ClearForWrite(vform);1889for (int i = 0; i < lane_count; ++i) {1890dst.SetUint(vform, i, result[i]);1891}1892return dst;1893}18941895static int64_t CalculateSignedShiftDistance(int64_t shift_val,1896int esize,1897bool shift_in_ls_byte) {1898if (shift_in_ls_byte) {1899// Neon uses the least-significant byte of the lane as the shift distance.1900shift_val = ExtractSignedBitfield64(7, 0, shift_val);1901} else {1902// SVE uses a saturated shift distance in the range1903// -(esize + 1) ... (esize + 1).1904if (shift_val > (esize + 1)) shift_val = esize + 1;1905if (shift_val < -(esize + 1)) shift_val = -(esize + 1);1906}1907return shift_val;1908}19091910LogicVRegister Simulator::sshl(VectorFormat vform,1911LogicVRegister dst,1912const LogicVRegister& src1,1913const LogicVRegister& src2,1914bool shift_in_ls_byte) {1915dst.ClearForWrite(vform);1916int esize = LaneSizeInBitsFromFormat(vform);1917for (int i = 0; i < LaneCountFromFormat(vform); i++) {1918int64_t shift_val = CalculateSignedShiftDistance(src2.Int(vform, i),1919esize,1920shift_in_ls_byte);19211922int64_t lj_src_val = src1.IntLeftJustified(vform, i);19231924// Set signed saturation state.1925if ((shift_val > CountLeadingSignBits(lj_src_val)) && (lj_src_val != 0)) {1926dst.SetSignedSat(i, lj_src_val >= 0);1927}19281929// Set unsigned saturation state.1930if (lj_src_val < 0) {1931dst.SetUnsignedSat(i, false);1932} else if ((shift_val > CountLeadingZeros(lj_src_val)) &&1933(lj_src_val != 0)) {1934dst.SetUnsignedSat(i, true);1935}19361937int64_t src_val = src1.Int(vform, i);1938bool src_is_negative = src_val < 0;1939if (shift_val > 63) {1940dst.SetInt(vform, i, 0);1941} else if (shift_val < -63) {1942dst.SetRounding(i, src_is_negative);1943dst.SetInt(vform, i, src_is_negative ? -1 : 0);1944} else {1945// Use unsigned types for shifts, as behaviour is undefined for signed1946// lhs.1947uint64_t usrc_val = static_cast<uint64_t>(src_val);19481949if (shift_val < 0) {1950// Convert to right shift.1951shift_val = -shift_val;19521953// Set rounding state by testing most-significant bit shifted out.1954// Rounding only needed on right shifts.1955if (((usrc_val >> (shift_val - 1)) & 1) == 1) {1956dst.SetRounding(i, true);1957}19581959usrc_val >>= shift_val;19601961if (src_is_negative) {1962// Simulate sign-extension.1963usrc_val |= (~UINT64_C(0) << (64 - shift_val));1964}1965} else {1966usrc_val <<= shift_val;1967}1968dst.SetUint(vform, i, usrc_val);1969}1970}1971return dst;1972}197319741975LogicVRegister Simulator::ushl(VectorFormat vform,1976LogicVRegister dst,1977const LogicVRegister& src1,1978const LogicVRegister& src2,1979bool shift_in_ls_byte) {1980dst.ClearForWrite(vform);1981int esize = LaneSizeInBitsFromFormat(vform);1982for (int i = 0; i < LaneCountFromFormat(vform); i++) {1983int64_t shift_val = CalculateSignedShiftDistance(src2.Int(vform, i),1984esize,1985shift_in_ls_byte);19861987uint64_t lj_src_val = src1.UintLeftJustified(vform, i);19881989// Set saturation state.1990if ((shift_val > CountLeadingZeros(lj_src_val)) && (lj_src_val != 0)) {1991dst.SetUnsignedSat(i, true);1992}19931994uint64_t src_val = src1.Uint(vform, i);1995if ((shift_val > 63) || (shift_val < -64)) {1996dst.SetUint(vform, i, 0);1997} else {1998if (shift_val < 0) {1999// Set rounding state. Rounding only needed on right shifts.2000if (((src_val >> (-shift_val - 1)) & 1) == 1) {2001dst.SetRounding(i, true);2002}20032004if (shift_val == -64) {2005src_val = 0;2006} else {2007src_val >>= -shift_val;2008}2009} else {2010src_val <<= shift_val;2011}2012dst.SetUint(vform, i, src_val);2013}2014}2015return dst;2016}20172018LogicVRegister Simulator::sshr(VectorFormat vform,2019LogicVRegister dst,2020const LogicVRegister& src1,2021const LogicVRegister& src2) {2022SimVRegister temp;2023// Saturate to sidestep the min-int problem.2024neg(vform, temp, src2).SignedSaturate(vform);2025sshl(vform, dst, src1, temp, false);2026return dst;2027}20282029LogicVRegister Simulator::ushr(VectorFormat vform,2030LogicVRegister dst,2031const LogicVRegister& src1,2032const LogicVRegister& src2) {2033SimVRegister temp;2034// Saturate to sidestep the min-int problem.2035neg(vform, temp, src2).SignedSaturate(vform);2036ushl(vform, dst, src1, temp, false);2037return dst;2038}20392040LogicVRegister Simulator::neg(VectorFormat vform,2041LogicVRegister dst,2042const LogicVRegister& src) {2043dst.ClearForWrite(vform);2044for (int i = 0; i < LaneCountFromFormat(vform); i++) {2045// Test for signed saturation.2046int64_t sa = src.Int(vform, i);2047if (sa == MinIntFromFormat(vform)) {2048dst.SetSignedSat(i, true);2049}2050dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);2051}2052return dst;2053}205420552056LogicVRegister Simulator::suqadd(VectorFormat vform,2057LogicVRegister dst,2058const LogicVRegister& src1,2059const LogicVRegister& src2) {2060dst.ClearForWrite(vform);2061for (int i = 0; i < LaneCountFromFormat(vform); i++) {2062int64_t sa = src1.IntLeftJustified(vform, i);2063uint64_t ub = src2.UintLeftJustified(vform, i);2064uint64_t ur = sa + ub;20652066int64_t sr;2067memcpy(&sr, &ur, sizeof(sr));2068if (sr < sa) { // Test for signed positive saturation.2069dst.SetInt(vform, i, MaxIntFromFormat(vform));2070} else {2071dst.SetUint(vform, i, src1.Int(vform, i) + src2.Uint(vform, i));2072}2073}2074return dst;2075}207620772078LogicVRegister Simulator::usqadd(VectorFormat vform,2079LogicVRegister dst,2080const LogicVRegister& src1,2081const LogicVRegister& src2) {2082dst.ClearForWrite(vform);2083for (int i = 0; i < LaneCountFromFormat(vform); i++) {2084uint64_t ua = src1.UintLeftJustified(vform, i);2085int64_t sb = src2.IntLeftJustified(vform, i);2086uint64_t ur = ua + sb;20872088if ((sb > 0) && (ur <= ua)) {2089dst.SetUint(vform, i, MaxUintFromFormat(vform)); // Positive saturation.2090} else if ((sb < 0) && (ur >= ua)) {2091dst.SetUint(vform, i, 0); // Negative saturation.2092} else {2093dst.SetUint(vform, i, src1.Uint(vform, i) + src2.Int(vform, i));2094}2095}2096return dst;2097}209820992100LogicVRegister Simulator::abs(VectorFormat vform,2101LogicVRegister dst,2102const LogicVRegister& src) {2103dst.ClearForWrite(vform);2104for (int i = 0; i < LaneCountFromFormat(vform); i++) {2105// Test for signed saturation.2106int64_t sa = src.Int(vform, i);2107if (sa == MinIntFromFormat(vform)) {2108dst.SetSignedSat(i, true);2109}2110if (sa < 0) {2111dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);2112} else {2113dst.SetInt(vform, i, sa);2114}2115}2116return dst;2117}211821192120LogicVRegister Simulator::andv(VectorFormat vform,2121LogicVRegister dst,2122const LogicPRegister& pg,2123const LogicVRegister& src) {2124VIXL_ASSERT(IsSVEFormat(vform));2125uint64_t result = GetUintMask(LaneSizeInBitsFromFormat(vform));2126for (int i = 0; i < LaneCountFromFormat(vform); i++) {2127if (!pg.IsActive(vform, i)) continue;21282129result &= src.Uint(vform, i);2130}2131VectorFormat vform_dst =2132ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));2133dst.ClearForWrite(vform_dst);2134dst.SetUint(vform_dst, 0, result);2135return dst;2136}213721382139LogicVRegister Simulator::eorv(VectorFormat vform,2140LogicVRegister dst,2141const LogicPRegister& pg,2142const LogicVRegister& src) {2143VIXL_ASSERT(IsSVEFormat(vform));2144uint64_t result = 0;2145for (int i = 0; i < LaneCountFromFormat(vform); i++) {2146if (!pg.IsActive(vform, i)) continue;21472148result ^= src.Uint(vform, i);2149}2150VectorFormat vform_dst =2151ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));2152dst.ClearForWrite(vform_dst);2153dst.SetUint(vform_dst, 0, result);2154return dst;2155}215621572158LogicVRegister Simulator::orv(VectorFormat vform,2159LogicVRegister dst,2160const LogicPRegister& pg,2161const LogicVRegister& src) {2162VIXL_ASSERT(IsSVEFormat(vform));2163uint64_t result = 0;2164for (int i = 0; i < LaneCountFromFormat(vform); i++) {2165if (!pg.IsActive(vform, i)) continue;21662167result |= src.Uint(vform, i);2168}2169VectorFormat vform_dst =2170ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));2171dst.ClearForWrite(vform_dst);2172dst.SetUint(vform_dst, 0, result);2173return dst;2174}217521762177LogicVRegister Simulator::saddv(VectorFormat vform,2178LogicVRegister dst,2179const LogicPRegister& pg,2180const LogicVRegister& src) {2181VIXL_ASSERT(IsSVEFormat(vform));2182VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) <= kSRegSize);2183int64_t result = 0;2184for (int i = 0; i < LaneCountFromFormat(vform); i++) {2185if (!pg.IsActive(vform, i)) continue;21862187// The destination register always has D-lane sizes and the source register2188// always has S-lanes or smaller, so signed integer overflow -- undefined2189// behaviour -- can't occur.2190result += src.Int(vform, i);2191}21922193dst.ClearForWrite(kFormatD);2194dst.SetInt(kFormatD, 0, result);2195return dst;2196}219721982199LogicVRegister Simulator::uaddv(VectorFormat vform,2200LogicVRegister dst,2201const LogicPRegister& pg,2202const LogicVRegister& src) {2203VIXL_ASSERT(IsSVEFormat(vform));2204uint64_t result = 0;2205for (int i = 0; i < LaneCountFromFormat(vform); i++) {2206if (!pg.IsActive(vform, i)) continue;22072208result += src.Uint(vform, i);2209}22102211dst.ClearForWrite(kFormatD);2212dst.SetUint(kFormatD, 0, result);2213return dst;2214}221522162217LogicVRegister Simulator::extractnarrow(VectorFormat dstform,2218LogicVRegister dst,2219bool dst_is_signed,2220const LogicVRegister& src,2221bool src_is_signed) {2222bool upperhalf = false;2223VectorFormat srcform = dstform;2224if ((dstform == kFormat16B) || (dstform == kFormat8H) ||2225(dstform == kFormat4S)) {2226upperhalf = true;2227srcform = VectorFormatHalfLanes(srcform);2228}2229srcform = VectorFormatDoubleWidth(srcform);22302231LogicVRegister src_copy = src;22322233int offset;2234if (upperhalf) {2235offset = LaneCountFromFormat(dstform) / 2;2236} else {2237offset = 0;2238}22392240for (int i = 0; i < LaneCountFromFormat(srcform); i++) {2241int64_t ssrc = src_copy.Int(srcform, i);2242uint64_t usrc = src_copy.Uint(srcform, i);22432244// Test for signed saturation2245if (ssrc > MaxIntFromFormat(dstform)) {2246dst.SetSignedSat(offset + i, true);2247} else if (ssrc < MinIntFromFormat(dstform)) {2248dst.SetSignedSat(offset + i, false);2249}22502251// Test for unsigned saturation2252if (src_is_signed) {2253if (ssrc > static_cast<int64_t>(MaxUintFromFormat(dstform))) {2254dst.SetUnsignedSat(offset + i, true);2255} else if (ssrc < 0) {2256dst.SetUnsignedSat(offset + i, false);2257}2258} else {2259if (usrc > MaxUintFromFormat(dstform)) {2260dst.SetUnsignedSat(offset + i, true);2261}2262}22632264int64_t result;2265if (src_is_signed) {2266result = ssrc & MaxUintFromFormat(dstform);2267} else {2268result = usrc & MaxUintFromFormat(dstform);2269}22702271if (dst_is_signed) {2272dst.SetInt(dstform, offset + i, result);2273} else {2274dst.SetUint(dstform, offset + i, result);2275}2276}22772278if (upperhalf) {2279// Clear any bits beyond a Q register.2280dst.ClearForWrite(kFormat16B);2281} else {2282dst.ClearForWrite(dstform);2283}2284return dst;2285}228622872288LogicVRegister Simulator::xtn(VectorFormat vform,2289LogicVRegister dst,2290const LogicVRegister& src) {2291return extractnarrow(vform, dst, true, src, true);2292}229322942295LogicVRegister Simulator::sqxtn(VectorFormat vform,2296LogicVRegister dst,2297const LogicVRegister& src) {2298return extractnarrow(vform, dst, true, src, true).SignedSaturate(vform);2299}230023012302LogicVRegister Simulator::sqxtun(VectorFormat vform,2303LogicVRegister dst,2304const LogicVRegister& src) {2305return extractnarrow(vform, dst, false, src, true).UnsignedSaturate(vform);2306}230723082309LogicVRegister Simulator::uqxtn(VectorFormat vform,2310LogicVRegister dst,2311const LogicVRegister& src) {2312return extractnarrow(vform, dst, false, src, false).UnsignedSaturate(vform);2313}231423152316LogicVRegister Simulator::absdiff(VectorFormat vform,2317LogicVRegister dst,2318const LogicVRegister& src1,2319const LogicVRegister& src2,2320bool is_signed) {2321dst.ClearForWrite(vform);2322for (int i = 0; i < LaneCountFromFormat(vform); i++) {2323bool src1_gt_src2 = is_signed ? (src1.Int(vform, i) > src2.Int(vform, i))2324: (src1.Uint(vform, i) > src2.Uint(vform, i));2325// Always calculate the answer using unsigned arithmetic, to avoid2326// implementation-defined signed overflow.2327if (src1_gt_src2) {2328dst.SetUint(vform, i, src1.Uint(vform, i) - src2.Uint(vform, i));2329} else {2330dst.SetUint(vform, i, src2.Uint(vform, i) - src1.Uint(vform, i));2331}2332}2333return dst;2334}233523362337LogicVRegister Simulator::saba(VectorFormat vform,2338LogicVRegister dst,2339const LogicVRegister& src1,2340const LogicVRegister& src2) {2341SimVRegister temp;2342dst.ClearForWrite(vform);2343absdiff(vform, temp, src1, src2, true);2344add(vform, dst, dst, temp);2345return dst;2346}234723482349LogicVRegister Simulator::uaba(VectorFormat vform,2350LogicVRegister dst,2351const LogicVRegister& src1,2352const LogicVRegister& src2) {2353SimVRegister temp;2354dst.ClearForWrite(vform);2355absdiff(vform, temp, src1, src2, false);2356add(vform, dst, dst, temp);2357return dst;2358}235923602361LogicVRegister Simulator::not_(VectorFormat vform,2362LogicVRegister dst,2363const LogicVRegister& src) {2364dst.ClearForWrite(vform);2365for (int i = 0; i < LaneCountFromFormat(vform); i++) {2366dst.SetUint(vform, i, ~src.Uint(vform, i));2367}2368return dst;2369}237023712372LogicVRegister Simulator::rbit(VectorFormat vform,2373LogicVRegister dst,2374const LogicVRegister& src) {2375uint64_t result[kZRegMaxSizeInBytes];2376int lane_count = LaneCountFromFormat(vform);2377int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);2378uint64_t reversed_value;2379uint64_t value;2380for (int i = 0; i < lane_count; i++) {2381value = src.Uint(vform, i);2382reversed_value = 0;2383for (int j = 0; j < lane_size_in_bits; j++) {2384reversed_value = (reversed_value << 1) | (value & 1);2385value >>= 1;2386}2387result[i] = reversed_value;2388}23892390dst.ClearForWrite(vform);2391for (int i = 0; i < lane_count; ++i) {2392dst.SetUint(vform, i, result[i]);2393}2394return dst;2395}239623972398LogicVRegister Simulator::rev(VectorFormat vform,2399LogicVRegister dst,2400const LogicVRegister& src) {2401VIXL_ASSERT(IsSVEFormat(vform));2402int lane_count = LaneCountFromFormat(vform);2403for (int i = 0; i < lane_count / 2; i++) {2404uint64_t t = src.Uint(vform, i);2405dst.SetUint(vform, i, src.Uint(vform, lane_count - i - 1));2406dst.SetUint(vform, lane_count - i - 1, t);2407}2408return dst;2409}241024112412LogicVRegister Simulator::rev_byte(VectorFormat vform,2413LogicVRegister dst,2414const LogicVRegister& src,2415int rev_size) {2416uint64_t result[kZRegMaxSizeInBytes] = {};2417int lane_count = LaneCountFromFormat(vform);2418int lane_size = LaneSizeInBytesFromFormat(vform);2419int lanes_per_loop = rev_size / lane_size;2420for (int i = 0; i < lane_count; i += lanes_per_loop) {2421for (int j = 0; j < lanes_per_loop; j++) {2422result[i + lanes_per_loop - 1 - j] = src.Uint(vform, i + j);2423}2424}2425dst.ClearForWrite(vform);2426for (int i = 0; i < lane_count; ++i) {2427dst.SetUint(vform, i, result[i]);2428}2429return dst;2430}243124322433LogicVRegister Simulator::rev16(VectorFormat vform,2434LogicVRegister dst,2435const LogicVRegister& src) {2436return rev_byte(vform, dst, src, 2);2437}243824392440LogicVRegister Simulator::rev32(VectorFormat vform,2441LogicVRegister dst,2442const LogicVRegister& src) {2443return rev_byte(vform, dst, src, 4);2444}244524462447LogicVRegister Simulator::rev64(VectorFormat vform,2448LogicVRegister dst,2449const LogicVRegister& src) {2450return rev_byte(vform, dst, src, 8);2451}24522453LogicVRegister Simulator::addlp(VectorFormat vform,2454LogicVRegister dst,2455const LogicVRegister& src,2456bool is_signed,2457bool do_accumulate) {2458VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform);2459VIXL_ASSERT(LaneSizeInBitsFromFormat(vformsrc) <= kSRegSize);24602461uint64_t result[kZRegMaxSizeInBytes];2462int lane_count = LaneCountFromFormat(vform);2463for (int i = 0; i < lane_count; i++) {2464if (is_signed) {2465result[i] = static_cast<uint64_t>(src.Int(vformsrc, 2 * i) +2466src.Int(vformsrc, 2 * i + 1));2467} else {2468result[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1);2469}2470}24712472dst.ClearForWrite(vform);2473for (int i = 0; i < lane_count; ++i) {2474if (do_accumulate) {2475result[i] += dst.Uint(vform, i);2476}2477dst.SetUint(vform, i, result[i]);2478}24792480return dst;2481}248224832484LogicVRegister Simulator::saddlp(VectorFormat vform,2485LogicVRegister dst,2486const LogicVRegister& src) {2487return addlp(vform, dst, src, true, false);2488}248924902491LogicVRegister Simulator::uaddlp(VectorFormat vform,2492LogicVRegister dst,2493const LogicVRegister& src) {2494return addlp(vform, dst, src, false, false);2495}249624972498LogicVRegister Simulator::sadalp(VectorFormat vform,2499LogicVRegister dst,2500const LogicVRegister& src) {2501return addlp(vform, dst, src, true, true);2502}250325042505LogicVRegister Simulator::uadalp(VectorFormat vform,2506LogicVRegister dst,2507const LogicVRegister& src) {2508return addlp(vform, dst, src, false, true);2509}25102511LogicVRegister Simulator::ror(VectorFormat vform,2512LogicVRegister dst,2513const LogicVRegister& src,2514int rotation) {2515int width = LaneSizeInBitsFromFormat(vform);2516for (int i = 0; i < LaneCountFromFormat(vform); i++) {2517uint64_t value = src.Uint(vform, i);2518dst.SetUint(vform, i, RotateRight(value, rotation, width));2519}2520return dst;2521}25222523LogicVRegister Simulator::ext(VectorFormat vform,2524LogicVRegister dst,2525const LogicVRegister& src1,2526const LogicVRegister& src2,2527int index) {2528uint8_t result[kZRegMaxSizeInBytes] = {};2529int lane_count = LaneCountFromFormat(vform);2530for (int i = 0; i < lane_count - index; ++i) {2531result[i] = src1.Uint(vform, i + index);2532}2533for (int i = 0; i < index; ++i) {2534result[lane_count - index + i] = src2.Uint(vform, i);2535}2536dst.ClearForWrite(vform);2537for (int i = 0; i < lane_count; ++i) {2538dst.SetUint(vform, i, result[i]);2539}2540return dst;2541}25422543LogicVRegister Simulator::rotate_elements_right(VectorFormat vform,2544LogicVRegister dst,2545const LogicVRegister& src,2546int index) {2547if (index < 0) index += LaneCountFromFormat(vform);2548VIXL_ASSERT((index >= 0) && (index < LaneCountFromFormat(vform)));2549index *= LaneSizeInBytesFromFormat(vform);2550return ext(kFormatVnB, dst, src, src, index);2551}255225532554template <typename T>2555LogicVRegister Simulator::fadda(VectorFormat vform,2556LogicVRegister acc,2557const LogicPRegister& pg,2558const LogicVRegister& src) {2559T result = acc.Float<T>(0);2560for (int i = 0; i < LaneCountFromFormat(vform); i++) {2561if (!pg.IsActive(vform, i)) continue;25622563result = FPAdd(result, src.Float<T>(i));2564}2565VectorFormat vform_dst =2566ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));2567acc.ClearForWrite(vform_dst);2568acc.SetFloat(0, result);2569return acc;2570}25712572LogicVRegister Simulator::fadda(VectorFormat vform,2573LogicVRegister acc,2574const LogicPRegister& pg,2575const LogicVRegister& src) {2576switch (LaneSizeInBitsFromFormat(vform)) {2577case kHRegSize:2578fadda<SimFloat16>(vform, acc, pg, src);2579break;2580case kSRegSize:2581fadda<float>(vform, acc, pg, src);2582break;2583case kDRegSize:2584fadda<double>(vform, acc, pg, src);2585break;2586default:2587VIXL_UNREACHABLE();2588}2589return acc;2590}25912592template <typename T>2593LogicVRegister Simulator::fcadd(VectorFormat vform,2594LogicVRegister dst, // d2595const LogicVRegister& src1, // n2596const LogicVRegister& src2, // m2597int rot) {2598int elements = LaneCountFromFormat(vform);25992600T element1, element3;2601rot = (rot == 1) ? 270 : 90;26022603// Loop example:2604// 2S --> (2/2 = 1 - 1 = 0) --> 1 x Complex Number (2x components: r+i)2605// 4S --> (4/2 = 2) - 1 = 1) --> 2 x Complex Number (2x2 components: r+i)26062607for (int e = 0; e <= (elements / 2) - 1; e++) {2608switch (rot) {2609case 90:2610element1 = FPNeg(src2.Float<T>(e * 2 + 1));2611element3 = src2.Float<T>(e * 2);2612break;2613case 270:2614element1 = src2.Float<T>(e * 2 + 1);2615element3 = FPNeg(src2.Float<T>(e * 2));2616break;2617default:2618VIXL_UNREACHABLE();2619return dst; // prevents "element(n) may be unintialized" errors2620}2621dst.ClearForWrite(vform);2622dst.SetFloat<T>(e * 2, FPAdd(src1.Float<T>(e * 2), element1));2623dst.SetFloat<T>(e * 2 + 1, FPAdd(src1.Float<T>(e * 2 + 1), element3));2624}2625return dst;2626}262726282629LogicVRegister Simulator::fcadd(VectorFormat vform,2630LogicVRegister dst, // d2631const LogicVRegister& src1, // n2632const LogicVRegister& src2, // m2633int rot) {2634if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {2635fcadd<SimFloat16>(vform, dst, src1, src2, rot);2636} else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {2637fcadd<float>(vform, dst, src1, src2, rot);2638} else {2639VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);2640fcadd<double>(vform, dst, src1, src2, rot);2641}2642return dst;2643}26442645template <typename T>2646LogicVRegister Simulator::fcmla(VectorFormat vform,2647LogicVRegister dst,2648const LogicVRegister& src1,2649const LogicVRegister& src2,2650const LogicVRegister& acc,2651int index,2652int rot) {2653int elements = LaneCountFromFormat(vform);26542655T element1, element2, element3, element4;2656rot *= 90;26572658// Loop example:2659// 2S --> (2/2 = 1 - 1 = 0) --> 1 x Complex Number (2x components: r+i)2660// 4S --> (4/2 = 2) - 1 = 1) --> 2 x Complex Number (2x2 components: r+i)26612662for (int e = 0; e <= (elements / 2) - 1; e++) {2663// Index == -1 indicates a vector/vector rather than vector/indexed-element2664// operation.2665int f = (index < 0) ? e : index;26662667switch (rot) {2668case 0:2669element1 = src2.Float<T>(f * 2);2670element2 = src1.Float<T>(e * 2);2671element3 = src2.Float<T>(f * 2 + 1);2672element4 = src1.Float<T>(e * 2);2673break;2674case 90:2675element1 = FPNeg(src2.Float<T>(f * 2 + 1));2676element2 = src1.Float<T>(e * 2 + 1);2677element3 = src2.Float<T>(f * 2);2678element4 = src1.Float<T>(e * 2 + 1);2679break;2680case 180:2681element1 = FPNeg(src2.Float<T>(f * 2));2682element2 = src1.Float<T>(e * 2);2683element3 = FPNeg(src2.Float<T>(f * 2 + 1));2684element4 = src1.Float<T>(e * 2);2685break;2686case 270:2687element1 = src2.Float<T>(f * 2 + 1);2688element2 = src1.Float<T>(e * 2 + 1);2689element3 = FPNeg(src2.Float<T>(f * 2));2690element4 = src1.Float<T>(e * 2 + 1);2691break;2692default:2693VIXL_UNREACHABLE();2694return dst; // prevents "element(n) may be unintialized" errors2695}2696dst.ClearForWrite(vform);2697dst.SetFloat<T>(vform,2698e * 2,2699FPMulAdd(acc.Float<T>(e * 2), element2, element1));2700dst.SetFloat<T>(vform,2701e * 2 + 1,2702FPMulAdd(acc.Float<T>(e * 2 + 1), element4, element3));2703}2704return dst;2705}27062707LogicVRegister Simulator::fcmla(VectorFormat vform,2708LogicVRegister dst,2709const LogicVRegister& src1,2710const LogicVRegister& src2,2711const LogicVRegister& acc,2712int rot) {2713if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {2714fcmla<SimFloat16>(vform, dst, src1, src2, acc, -1, rot);2715} else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {2716fcmla<float>(vform, dst, src1, src2, acc, -1, rot);2717} else {2718fcmla<double>(vform, dst, src1, src2, acc, -1, rot);2719}2720return dst;2721}272227232724LogicVRegister Simulator::fcmla(VectorFormat vform,2725LogicVRegister dst, // d2726const LogicVRegister& src1, // n2727const LogicVRegister& src2, // m2728int index,2729int rot) {2730if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {2731fcmla<SimFloat16>(vform, dst, src1, src2, dst, index, rot);2732} else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {2733fcmla<float>(vform, dst, src1, src2, dst, index, rot);2734} else {2735fcmla<double>(vform, dst, src1, src2, dst, index, rot);2736}2737return dst;2738}27392740LogicVRegister Simulator::cadd(VectorFormat vform,2741LogicVRegister dst,2742const LogicVRegister& src1,2743const LogicVRegister& src2,2744int rot,2745bool saturate) {2746SimVRegister src1_r, src1_i;2747SimVRegister src2_r, src2_i;2748SimVRegister zero;2749zero.Clear();2750uzp1(vform, src1_r, src1, zero);2751uzp2(vform, src1_i, src1, zero);2752uzp1(vform, src2_r, src2, zero);2753uzp2(vform, src2_i, src2, zero);27542755if (rot == 90) {2756if (saturate) {2757sub(vform, src1_r, src1_r, src2_i).SignedSaturate(vform);2758add(vform, src1_i, src1_i, src2_r).SignedSaturate(vform);2759} else {2760sub(vform, src1_r, src1_r, src2_i);2761add(vform, src1_i, src1_i, src2_r);2762}2763} else {2764VIXL_ASSERT(rot == 270);2765if (saturate) {2766add(vform, src1_r, src1_r, src2_i).SignedSaturate(vform);2767sub(vform, src1_i, src1_i, src2_r).SignedSaturate(vform);2768} else {2769add(vform, src1_r, src1_r, src2_i);2770sub(vform, src1_i, src1_i, src2_r);2771}2772}27732774zip1(vform, dst, src1_r, src1_i);2775return dst;2776}27772778LogicVRegister Simulator::cmla(VectorFormat vform,2779LogicVRegister dst,2780const LogicVRegister& srca,2781const LogicVRegister& src1,2782const LogicVRegister& src2,2783int rot) {2784SimVRegister src1_a;2785SimVRegister src2_a, src2_b;2786SimVRegister srca_i, srca_r;2787SimVRegister zero, temp;2788zero.Clear();27892790if ((rot == 0) || (rot == 180)) {2791uzp1(vform, src1_a, src1, zero);2792uzp1(vform, src2_a, src2, zero);2793uzp2(vform, src2_b, src2, zero);2794} else {2795uzp2(vform, src1_a, src1, zero);2796uzp2(vform, src2_a, src2, zero);2797uzp1(vform, src2_b, src2, zero);2798}27992800uzp1(vform, srca_r, srca, zero);2801uzp2(vform, srca_i, srca, zero);28022803bool sub_r = (rot == 90) || (rot == 180);2804bool sub_i = (rot == 180) || (rot == 270);28052806mul(vform, temp, src1_a, src2_a);2807if (sub_r) {2808sub(vform, srca_r, srca_r, temp);2809} else {2810add(vform, srca_r, srca_r, temp);2811}28122813mul(vform, temp, src1_a, src2_b);2814if (sub_i) {2815sub(vform, srca_i, srca_i, temp);2816} else {2817add(vform, srca_i, srca_i, temp);2818}28192820zip1(vform, dst, srca_r, srca_i);2821return dst;2822}28232824LogicVRegister Simulator::cmla(VectorFormat vform,2825LogicVRegister dst,2826const LogicVRegister& srca,2827const LogicVRegister& src1,2828const LogicVRegister& src2,2829int index,2830int rot) {2831SimVRegister temp;2832dup_elements_to_segments(VectorFormatDoubleWidth(vform), temp, src2, index);2833return cmla(vform, dst, srca, src1, temp, rot);2834}28352836LogicVRegister Simulator::bgrp(VectorFormat vform,2837LogicVRegister dst,2838const LogicVRegister& src1,2839const LogicVRegister& src2,2840bool do_bext) {2841for (int i = 0; i < LaneCountFromFormat(vform); i++) {2842uint64_t value = src1.Uint(vform, i);2843uint64_t mask = src2.Uint(vform, i);2844int high_pos = 0;2845int low_pos = 0;2846uint64_t result_high = 0;2847uint64_t result_low = 0;2848for (unsigned j = 0; j < LaneSizeInBitsFromFormat(vform); j++) {2849if ((mask & 1) == 0) {2850result_high |= (value & 1) << high_pos;2851high_pos++;2852} else {2853result_low |= (value & 1) << low_pos;2854low_pos++;2855}2856mask >>= 1;2857value >>= 1;2858}28592860if (!do_bext) {2861result_low |= result_high << low_pos;2862}28632864dst.SetUint(vform, i, result_low);2865}2866return dst;2867}28682869LogicVRegister Simulator::bdep(VectorFormat vform,2870LogicVRegister dst,2871const LogicVRegister& src1,2872const LogicVRegister& src2) {2873for (int i = 0; i < LaneCountFromFormat(vform); i++) {2874uint64_t value = src1.Uint(vform, i);2875uint64_t mask = src2.Uint(vform, i);2876uint64_t result = 0;2877for (unsigned j = 0; j < LaneSizeInBitsFromFormat(vform); j++) {2878if ((mask & 1) == 1) {2879result |= (value & 1) << j;2880value >>= 1;2881}2882mask >>= 1;2883}2884dst.SetUint(vform, i, result);2885}2886return dst;2887}28882889LogicVRegister Simulator::histogram(VectorFormat vform,2890LogicVRegister dst,2891const LogicPRegister& pg,2892const LogicVRegister& src1,2893const LogicVRegister& src2,2894bool do_segmented) {2895int elements_per_segment = kQRegSize / LaneSizeInBitsFromFormat(vform);2896uint64_t result[kZRegMaxSizeInBytes];28972898for (int i = 0; i < LaneCountFromFormat(vform); i++) {2899uint64_t count = 0;2900uint64_t value = src1.Uint(vform, i);29012902int segment = do_segmented ? (i / elements_per_segment) : 0;2903int segment_offset = segment * elements_per_segment;2904int hist_limit = do_segmented ? elements_per_segment : (i + 1);2905for (int j = 0; j < hist_limit; j++) {2906if (pg.IsActive(vform, j) &&2907(value == src2.Uint(vform, j + segment_offset))) {2908count++;2909}2910}2911result[i] = count;2912}2913dst.SetUintArray(vform, result);2914return dst;2915}29162917LogicVRegister Simulator::dup_element(VectorFormat vform,2918LogicVRegister dst,2919const LogicVRegister& src,2920int src_index) {2921if ((vform == kFormatVnQ) || (vform == kFormatVnO)) {2922// When duplicating an element larger than 64 bits, split the element into2923// 64-bit parts, and duplicate the parts across the destination.2924uint64_t d[4];2925int count = (vform == kFormatVnQ) ? 2 : 4;2926for (int i = 0; i < count; i++) {2927d[i] = src.Uint(kFormatVnD, (src_index * count) + i);2928}2929dst.Clear();2930for (int i = 0; i < LaneCountFromFormat(vform) * count; i++) {2931dst.SetUint(kFormatVnD, i, d[i % count]);2932}2933} else {2934int lane_count = LaneCountFromFormat(vform);2935uint64_t value = src.Uint(vform, src_index);2936dst.ClearForWrite(vform);2937for (int i = 0; i < lane_count; ++i) {2938dst.SetUint(vform, i, value);2939}2940}2941return dst;2942}29432944LogicVRegister Simulator::dup_elements_to_segments(VectorFormat vform,2945LogicVRegister dst,2946const LogicVRegister& src,2947int src_index) {2948// In SVE, a segment is a 128-bit portion of a vector, like a Q register,2949// whereas in NEON, the size of segment is equal to the size of register2950// itself.2951int segment_size = std::min(kQRegSize, RegisterSizeInBitsFromFormat(vform));2952VIXL_ASSERT(IsMultiple(segment_size, LaneSizeInBitsFromFormat(vform)));2953int lanes_per_segment = segment_size / LaneSizeInBitsFromFormat(vform);29542955VIXL_ASSERT(src_index >= 0);2956VIXL_ASSERT(src_index < lanes_per_segment);29572958dst.ClearForWrite(vform);2959for (int j = 0; j < LaneCountFromFormat(vform); j += lanes_per_segment) {2960uint64_t value = src.Uint(vform, j + src_index);2961for (int i = 0; i < lanes_per_segment; i++) {2962dst.SetUint(vform, j + i, value);2963}2964}2965return dst;2966}29672968LogicVRegister Simulator::dup_elements_to_segments(2969VectorFormat vform,2970LogicVRegister dst,2971const std::pair<int, int>& src_and_index) {2972return dup_elements_to_segments(vform,2973dst,2974ReadVRegister(src_and_index.first),2975src_and_index.second);2976}29772978LogicVRegister Simulator::dup_immediate(VectorFormat vform,2979LogicVRegister dst,2980uint64_t imm) {2981int lane_count = LaneCountFromFormat(vform);2982uint64_t value = imm & MaxUintFromFormat(vform);2983dst.ClearForWrite(vform);2984for (int i = 0; i < lane_count; ++i) {2985dst.SetUint(vform, i, value);2986}2987return dst;2988}298929902991LogicVRegister Simulator::ins_element(VectorFormat vform,2992LogicVRegister dst,2993int dst_index,2994const LogicVRegister& src,2995int src_index) {2996dst.SetUint(vform, dst_index, src.Uint(vform, src_index));2997return dst;2998}299930003001LogicVRegister Simulator::ins_immediate(VectorFormat vform,3002LogicVRegister dst,3003int dst_index,3004uint64_t imm) {3005uint64_t value = imm & MaxUintFromFormat(vform);3006dst.SetUint(vform, dst_index, value);3007return dst;3008}300930103011LogicVRegister Simulator::index(VectorFormat vform,3012LogicVRegister dst,3013uint64_t start,3014uint64_t step) {3015VIXL_ASSERT(IsSVEFormat(vform));3016uint64_t value = start;3017for (int i = 0; i < LaneCountFromFormat(vform); i++) {3018dst.SetUint(vform, i, value);3019value += step;3020}3021return dst;3022}302330243025LogicVRegister Simulator::insr(VectorFormat vform,3026LogicVRegister dst,3027uint64_t imm) {3028VIXL_ASSERT(IsSVEFormat(vform));3029for (int i = LaneCountFromFormat(vform) - 1; i > 0; i--) {3030dst.SetUint(vform, i, dst.Uint(vform, i - 1));3031}3032dst.SetUint(vform, 0, imm);3033return dst;3034}303530363037LogicVRegister Simulator::mov(VectorFormat vform,3038LogicVRegister dst,3039const LogicVRegister& src) {3040dst.ClearForWrite(vform);3041for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {3042dst.SetUint(vform, lane, src.Uint(vform, lane));3043}3044return dst;3045}304630473048LogicPRegister Simulator::mov(LogicPRegister dst, const LogicPRegister& src) {3049// Avoid a copy if the registers already alias.3050if (dst.Aliases(src)) return dst;30513052for (int i = 0; i < dst.GetChunkCount(); i++) {3053dst.SetChunk(i, src.GetChunk(i));3054}3055return dst;3056}305730583059LogicVRegister Simulator::mov_merging(VectorFormat vform,3060LogicVRegister dst,3061const SimPRegister& pg,3062const LogicVRegister& src) {3063return sel(vform, dst, pg, src, dst);3064}30653066LogicVRegister Simulator::mov_zeroing(VectorFormat vform,3067LogicVRegister dst,3068const SimPRegister& pg,3069const LogicVRegister& src) {3070SimVRegister zero;3071dup_immediate(vform, zero, 0);3072return sel(vform, dst, pg, src, zero);3073}30743075LogicVRegister Simulator::mov_alternating(VectorFormat vform,3076LogicVRegister dst,3077const LogicVRegister& src,3078int start_at) {3079VIXL_ASSERT((start_at == 0) || (start_at == 1));3080for (int i = start_at; i < LaneCountFromFormat(vform); i += 2) {3081dst.SetUint(vform, i, src.Uint(vform, i));3082}3083return dst;3084}30853086LogicPRegister Simulator::mov_merging(LogicPRegister dst,3087const LogicPRegister& pg,3088const LogicPRegister& src) {3089return sel(dst, pg, src, dst);3090}30913092LogicPRegister Simulator::mov_zeroing(LogicPRegister dst,3093const LogicPRegister& pg,3094const LogicPRegister& src) {3095SimPRegister all_false;3096return sel(dst, pg, src, pfalse(all_false));3097}30983099LogicVRegister Simulator::movi(VectorFormat vform,3100LogicVRegister dst,3101uint64_t imm) {3102int lane_count = LaneCountFromFormat(vform);3103dst.ClearForWrite(vform);3104for (int i = 0; i < lane_count; ++i) {3105dst.SetUint(vform, i, imm);3106}3107return dst;3108}310931103111LogicVRegister Simulator::mvni(VectorFormat vform,3112LogicVRegister dst,3113uint64_t imm) {3114int lane_count = LaneCountFromFormat(vform);3115dst.ClearForWrite(vform);3116for (int i = 0; i < lane_count; ++i) {3117dst.SetUint(vform, i, ~imm);3118}3119return dst;3120}312131223123LogicVRegister Simulator::orr(VectorFormat vform,3124LogicVRegister dst,3125const LogicVRegister& src,3126uint64_t imm) {3127uint64_t result[16];3128int lane_count = LaneCountFromFormat(vform);3129for (int i = 0; i < lane_count; ++i) {3130result[i] = src.Uint(vform, i) | imm;3131}3132dst.ClearForWrite(vform);3133for (int i = 0; i < lane_count; ++i) {3134dst.SetUint(vform, i, result[i]);3135}3136return dst;3137}313831393140LogicVRegister Simulator::uxtl(VectorFormat vform,3141LogicVRegister dst,3142const LogicVRegister& src,3143bool is_2) {3144VectorFormat vform_half = VectorFormatHalfWidth(vform);3145int lane_count = LaneCountFromFormat(vform);3146int src_offset = is_2 ? lane_count : 0;31473148dst.ClearForWrite(vform);3149for (int i = 0; i < lane_count; i++) {3150dst.SetUint(vform, i, src.Uint(vform_half, src_offset + i));3151}3152return dst;3153}315431553156LogicVRegister Simulator::sxtl(VectorFormat vform,3157LogicVRegister dst,3158const LogicVRegister& src,3159bool is_2) {3160VectorFormat vform_half = VectorFormatHalfWidth(vform);3161int lane_count = LaneCountFromFormat(vform);3162int src_offset = is_2 ? lane_count : 0;31633164dst.ClearForWrite(vform);3165for (int i = 0; i < LaneCountFromFormat(vform); i++) {3166dst.SetInt(vform, i, src.Int(vform_half, src_offset + i));3167}3168return dst;3169}317031713172LogicVRegister Simulator::uxtl2(VectorFormat vform,3173LogicVRegister dst,3174const LogicVRegister& src) {3175return uxtl(vform, dst, src, /* is_2 = */ true);3176}317731783179LogicVRegister Simulator::sxtl2(VectorFormat vform,3180LogicVRegister dst,3181const LogicVRegister& src) {3182return sxtl(vform, dst, src, /* is_2 = */ true);3183}318431853186LogicVRegister Simulator::uxt(VectorFormat vform,3187LogicVRegister dst,3188const LogicVRegister& src,3189unsigned from_size_in_bits) {3190int lane_count = LaneCountFromFormat(vform);3191uint64_t mask = GetUintMask(from_size_in_bits);31923193dst.ClearForWrite(vform);3194for (int i = 0; i < lane_count; i++) {3195dst.SetInt(vform, i, src.Uint(vform, i) & mask);3196}3197return dst;3198}319932003201LogicVRegister Simulator::sxt(VectorFormat vform,3202LogicVRegister dst,3203const LogicVRegister& src,3204unsigned from_size_in_bits) {3205int lane_count = LaneCountFromFormat(vform);32063207dst.ClearForWrite(vform);3208for (int i = 0; i < lane_count; i++) {3209uint64_t value =3210ExtractSignedBitfield64(from_size_in_bits - 1, 0, src.Uint(vform, i));3211dst.SetInt(vform, i, value);3212}3213return dst;3214}321532163217LogicVRegister Simulator::shrn(VectorFormat vform,3218LogicVRegister dst,3219const LogicVRegister& src,3220int shift) {3221SimVRegister temp;3222VectorFormat vform_src = VectorFormatDoubleWidth(vform);3223VectorFormat vform_dst = vform;3224LogicVRegister shifted_src = ushr(vform_src, temp, src, shift);3225return extractnarrow(vform_dst, dst, false, shifted_src, false);3226}322732283229LogicVRegister Simulator::shrn2(VectorFormat vform,3230LogicVRegister dst,3231const LogicVRegister& src,3232int shift) {3233SimVRegister temp;3234VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));3235VectorFormat vformdst = vform;3236LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift);3237return extractnarrow(vformdst, dst, false, shifted_src, false);3238}323932403241LogicVRegister Simulator::rshrn(VectorFormat vform,3242LogicVRegister dst,3243const LogicVRegister& src,3244int shift) {3245SimVRegister temp;3246VectorFormat vformsrc = VectorFormatDoubleWidth(vform);3247VectorFormat vformdst = vform;3248LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);3249return extractnarrow(vformdst, dst, false, shifted_src, false);3250}325132523253LogicVRegister Simulator::rshrn2(VectorFormat vform,3254LogicVRegister dst,3255const LogicVRegister& src,3256int shift) {3257SimVRegister temp;3258VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));3259VectorFormat vformdst = vform;3260LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);3261return extractnarrow(vformdst, dst, false, shifted_src, false);3262}32633264LogicVRegister Simulator::Table(VectorFormat vform,3265LogicVRegister dst,3266const LogicVRegister& ind,3267bool zero_out_of_bounds,3268const LogicVRegister* tab1,3269const LogicVRegister* tab2,3270const LogicVRegister* tab3,3271const LogicVRegister* tab4) {3272VIXL_ASSERT(tab1 != NULL);3273int lane_count = LaneCountFromFormat(vform);3274VIXL_ASSERT((tab3 == NULL) || (lane_count <= 16));3275uint64_t table[kZRegMaxSizeInBytes * 2];3276uint64_t result[kZRegMaxSizeInBytes];32773278// For Neon, the table source registers are always 16B, and Neon allows only3279// 8B or 16B vform for the destination, so infer the table format from the3280// destination.3281VectorFormat vform_tab = (vform == kFormat8B) ? kFormat16B : vform;32823283uint64_t tab_size = tab1->UintArray(vform_tab, &table[0]);3284if (tab2 != NULL) tab_size += tab2->UintArray(vform_tab, &table[tab_size]);3285if (tab3 != NULL) tab_size += tab3->UintArray(vform_tab, &table[tab_size]);3286if (tab4 != NULL) tab_size += tab4->UintArray(vform_tab, &table[tab_size]);32873288for (int i = 0; i < lane_count; i++) {3289uint64_t index = ind.Uint(vform, i);3290result[i] = zero_out_of_bounds ? 0 : dst.Uint(vform, i);3291if (index < tab_size) result[i] = table[index];3292}3293dst.SetUintArray(vform, result);3294return dst;3295}32963297LogicVRegister Simulator::tbl(VectorFormat vform,3298LogicVRegister dst,3299const LogicVRegister& tab,3300const LogicVRegister& ind) {3301return Table(vform, dst, ind, true, &tab);3302}330333043305LogicVRegister Simulator::tbl(VectorFormat vform,3306LogicVRegister dst,3307const LogicVRegister& tab,3308const LogicVRegister& tab2,3309const LogicVRegister& ind) {3310return Table(vform, dst, ind, true, &tab, &tab2);3311}331233133314LogicVRegister Simulator::tbl(VectorFormat vform,3315LogicVRegister dst,3316const LogicVRegister& tab,3317const LogicVRegister& tab2,3318const LogicVRegister& tab3,3319const LogicVRegister& ind) {3320return Table(vform, dst, ind, true, &tab, &tab2, &tab3);3321}332233233324LogicVRegister Simulator::tbl(VectorFormat vform,3325LogicVRegister dst,3326const LogicVRegister& tab,3327const LogicVRegister& tab2,3328const LogicVRegister& tab3,3329const LogicVRegister& tab4,3330const LogicVRegister& ind) {3331return Table(vform, dst, ind, true, &tab, &tab2, &tab3, &tab4);3332}333333343335LogicVRegister Simulator::tbx(VectorFormat vform,3336LogicVRegister dst,3337const LogicVRegister& tab,3338const LogicVRegister& ind) {3339return Table(vform, dst, ind, false, &tab);3340}334133423343LogicVRegister Simulator::tbx(VectorFormat vform,3344LogicVRegister dst,3345const LogicVRegister& tab,3346const LogicVRegister& tab2,3347const LogicVRegister& ind) {3348return Table(vform, dst, ind, false, &tab, &tab2);3349}335033513352LogicVRegister Simulator::tbx(VectorFormat vform,3353LogicVRegister dst,3354const LogicVRegister& tab,3355const LogicVRegister& tab2,3356const LogicVRegister& tab3,3357const LogicVRegister& ind) {3358return Table(vform, dst, ind, false, &tab, &tab2, &tab3);3359}336033613362LogicVRegister Simulator::tbx(VectorFormat vform,3363LogicVRegister dst,3364const LogicVRegister& tab,3365const LogicVRegister& tab2,3366const LogicVRegister& tab3,3367const LogicVRegister& tab4,3368const LogicVRegister& ind) {3369return Table(vform, dst, ind, false, &tab, &tab2, &tab3, &tab4);3370}337133723373LogicVRegister Simulator::uqshrn(VectorFormat vform,3374LogicVRegister dst,3375const LogicVRegister& src,3376int shift) {3377return shrn(vform, dst, src, shift).UnsignedSaturate(vform);3378}337933803381LogicVRegister Simulator::uqshrn2(VectorFormat vform,3382LogicVRegister dst,3383const LogicVRegister& src,3384int shift) {3385return shrn2(vform, dst, src, shift).UnsignedSaturate(vform);3386}338733883389LogicVRegister Simulator::uqrshrn(VectorFormat vform,3390LogicVRegister dst,3391const LogicVRegister& src,3392int shift) {3393return rshrn(vform, dst, src, shift).UnsignedSaturate(vform);3394}339533963397LogicVRegister Simulator::uqrshrn2(VectorFormat vform,3398LogicVRegister dst,3399const LogicVRegister& src,3400int shift) {3401return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform);3402}340334043405LogicVRegister Simulator::sqshrn(VectorFormat vform,3406LogicVRegister dst,3407const LogicVRegister& src,3408int shift) {3409SimVRegister temp;3410VectorFormat vformsrc = VectorFormatDoubleWidth(vform);3411VectorFormat vformdst = vform;3412LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);3413return sqxtn(vformdst, dst, shifted_src);3414}341534163417LogicVRegister Simulator::sqshrn2(VectorFormat vform,3418LogicVRegister dst,3419const LogicVRegister& src,3420int shift) {3421SimVRegister temp;3422VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));3423VectorFormat vformdst = vform;3424LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);3425return sqxtn(vformdst, dst, shifted_src);3426}342734283429LogicVRegister Simulator::sqrshrn(VectorFormat vform,3430LogicVRegister dst,3431const LogicVRegister& src,3432int shift) {3433SimVRegister temp;3434VectorFormat vformsrc = VectorFormatDoubleWidth(vform);3435VectorFormat vformdst = vform;3436LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);3437return sqxtn(vformdst, dst, shifted_src);3438}343934403441LogicVRegister Simulator::sqrshrn2(VectorFormat vform,3442LogicVRegister dst,3443const LogicVRegister& src,3444int shift) {3445SimVRegister temp;3446VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));3447VectorFormat vformdst = vform;3448LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);3449return sqxtn(vformdst, dst, shifted_src);3450}345134523453LogicVRegister Simulator::sqshrun(VectorFormat vform,3454LogicVRegister dst,3455const LogicVRegister& src,3456int shift) {3457SimVRegister temp;3458VectorFormat vformsrc = VectorFormatDoubleWidth(vform);3459VectorFormat vformdst = vform;3460LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);3461return sqxtun(vformdst, dst, shifted_src);3462}346334643465LogicVRegister Simulator::sqshrun2(VectorFormat vform,3466LogicVRegister dst,3467const LogicVRegister& src,3468int shift) {3469SimVRegister temp;3470VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));3471VectorFormat vformdst = vform;3472LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);3473return sqxtun(vformdst, dst, shifted_src);3474}347534763477LogicVRegister Simulator::sqrshrun(VectorFormat vform,3478LogicVRegister dst,3479const LogicVRegister& src,3480int shift) {3481SimVRegister temp;3482VectorFormat vformsrc = VectorFormatDoubleWidth(vform);3483VectorFormat vformdst = vform;3484LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);3485return sqxtun(vformdst, dst, shifted_src);3486}348734883489LogicVRegister Simulator::sqrshrun2(VectorFormat vform,3490LogicVRegister dst,3491const LogicVRegister& src,3492int shift) {3493SimVRegister temp;3494VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));3495VectorFormat vformdst = vform;3496LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);3497return sqxtun(vformdst, dst, shifted_src);3498}349935003501LogicVRegister Simulator::uaddl(VectorFormat vform,3502LogicVRegister dst,3503const LogicVRegister& src1,3504const LogicVRegister& src2) {3505SimVRegister temp1, temp2;3506uxtl(vform, temp1, src1);3507uxtl(vform, temp2, src2);3508add(vform, dst, temp1, temp2);3509return dst;3510}351135123513LogicVRegister Simulator::uaddl2(VectorFormat vform,3514LogicVRegister dst,3515const LogicVRegister& src1,3516const LogicVRegister& src2) {3517SimVRegister temp1, temp2;3518uxtl2(vform, temp1, src1);3519uxtl2(vform, temp2, src2);3520add(vform, dst, temp1, temp2);3521return dst;3522}352335243525LogicVRegister Simulator::uaddw(VectorFormat vform,3526LogicVRegister dst,3527const LogicVRegister& src1,3528const LogicVRegister& src2) {3529SimVRegister temp;3530uxtl(vform, temp, src2);3531add(vform, dst, src1, temp);3532return dst;3533}353435353536LogicVRegister Simulator::uaddw2(VectorFormat vform,3537LogicVRegister dst,3538const LogicVRegister& src1,3539const LogicVRegister& src2) {3540SimVRegister temp;3541uxtl2(vform, temp, src2);3542add(vform, dst, src1, temp);3543return dst;3544}354535463547LogicVRegister Simulator::saddl(VectorFormat vform,3548LogicVRegister dst,3549const LogicVRegister& src1,3550const LogicVRegister& src2) {3551SimVRegister temp1, temp2;3552sxtl(vform, temp1, src1);3553sxtl(vform, temp2, src2);3554add(vform, dst, temp1, temp2);3555return dst;3556}355735583559LogicVRegister Simulator::saddl2(VectorFormat vform,3560LogicVRegister dst,3561const LogicVRegister& src1,3562const LogicVRegister& src2) {3563SimVRegister temp1, temp2;3564sxtl2(vform, temp1, src1);3565sxtl2(vform, temp2, src2);3566add(vform, dst, temp1, temp2);3567return dst;3568}356935703571LogicVRegister Simulator::saddw(VectorFormat vform,3572LogicVRegister dst,3573const LogicVRegister& src1,3574const LogicVRegister& src2) {3575SimVRegister temp;3576sxtl(vform, temp, src2);3577add(vform, dst, src1, temp);3578return dst;3579}358035813582LogicVRegister Simulator::saddw2(VectorFormat vform,3583LogicVRegister dst,3584const LogicVRegister& src1,3585const LogicVRegister& src2) {3586SimVRegister temp;3587sxtl2(vform, temp, src2);3588add(vform, dst, src1, temp);3589return dst;3590}359135923593LogicVRegister Simulator::usubl(VectorFormat vform,3594LogicVRegister dst,3595const LogicVRegister& src1,3596const LogicVRegister& src2) {3597SimVRegister temp1, temp2;3598uxtl(vform, temp1, src1);3599uxtl(vform, temp2, src2);3600sub(vform, dst, temp1, temp2);3601return dst;3602}360336043605LogicVRegister Simulator::usubl2(VectorFormat vform,3606LogicVRegister dst,3607const LogicVRegister& src1,3608const LogicVRegister& src2) {3609SimVRegister temp1, temp2;3610uxtl2(vform, temp1, src1);3611uxtl2(vform, temp2, src2);3612sub(vform, dst, temp1, temp2);3613return dst;3614}361536163617LogicVRegister Simulator::usubw(VectorFormat vform,3618LogicVRegister dst,3619const LogicVRegister& src1,3620const LogicVRegister& src2) {3621SimVRegister temp;3622uxtl(vform, temp, src2);3623sub(vform, dst, src1, temp);3624return dst;3625}362636273628LogicVRegister Simulator::usubw2(VectorFormat vform,3629LogicVRegister dst,3630const LogicVRegister& src1,3631const LogicVRegister& src2) {3632SimVRegister temp;3633uxtl2(vform, temp, src2);3634sub(vform, dst, src1, temp);3635return dst;3636}363736383639LogicVRegister Simulator::ssubl(VectorFormat vform,3640LogicVRegister dst,3641const LogicVRegister& src1,3642const LogicVRegister& src2) {3643SimVRegister temp1, temp2;3644sxtl(vform, temp1, src1);3645sxtl(vform, temp2, src2);3646sub(vform, dst, temp1, temp2);3647return dst;3648}364936503651LogicVRegister Simulator::ssubl2(VectorFormat vform,3652LogicVRegister dst,3653const LogicVRegister& src1,3654const LogicVRegister& src2) {3655SimVRegister temp1, temp2;3656sxtl2(vform, temp1, src1);3657sxtl2(vform, temp2, src2);3658sub(vform, dst, temp1, temp2);3659return dst;3660}366136623663LogicVRegister Simulator::ssubw(VectorFormat vform,3664LogicVRegister dst,3665const LogicVRegister& src1,3666const LogicVRegister& src2) {3667SimVRegister temp;3668sxtl(vform, temp, src2);3669sub(vform, dst, src1, temp);3670return dst;3671}367236733674LogicVRegister Simulator::ssubw2(VectorFormat vform,3675LogicVRegister dst,3676const LogicVRegister& src1,3677const LogicVRegister& src2) {3678SimVRegister temp;3679sxtl2(vform, temp, src2);3680sub(vform, dst, src1, temp);3681return dst;3682}368336843685LogicVRegister Simulator::uabal(VectorFormat vform,3686LogicVRegister dst,3687const LogicVRegister& src1,3688const LogicVRegister& src2) {3689SimVRegister temp1, temp2;3690uxtl(vform, temp1, src1);3691uxtl(vform, temp2, src2);3692uaba(vform, dst, temp1, temp2);3693return dst;3694}369536963697LogicVRegister Simulator::uabal2(VectorFormat vform,3698LogicVRegister dst,3699const LogicVRegister& src1,3700const LogicVRegister& src2) {3701SimVRegister temp1, temp2;3702uxtl2(vform, temp1, src1);3703uxtl2(vform, temp2, src2);3704uaba(vform, dst, temp1, temp2);3705return dst;3706}370737083709LogicVRegister Simulator::sabal(VectorFormat vform,3710LogicVRegister dst,3711const LogicVRegister& src1,3712const LogicVRegister& src2) {3713SimVRegister temp1, temp2;3714sxtl(vform, temp1, src1);3715sxtl(vform, temp2, src2);3716saba(vform, dst, temp1, temp2);3717return dst;3718}371937203721LogicVRegister Simulator::sabal2(VectorFormat vform,3722LogicVRegister dst,3723const LogicVRegister& src1,3724const LogicVRegister& src2) {3725SimVRegister temp1, temp2;3726sxtl2(vform, temp1, src1);3727sxtl2(vform, temp2, src2);3728saba(vform, dst, temp1, temp2);3729return dst;3730}373137323733LogicVRegister Simulator::uabdl(VectorFormat vform,3734LogicVRegister dst,3735const LogicVRegister& src1,3736const LogicVRegister& src2) {3737SimVRegister temp1, temp2;3738uxtl(vform, temp1, src1);3739uxtl(vform, temp2, src2);3740absdiff(vform, dst, temp1, temp2, false);3741return dst;3742}374337443745LogicVRegister Simulator::uabdl2(VectorFormat vform,3746LogicVRegister dst,3747const LogicVRegister& src1,3748const LogicVRegister& src2) {3749SimVRegister temp1, temp2;3750uxtl2(vform, temp1, src1);3751uxtl2(vform, temp2, src2);3752absdiff(vform, dst, temp1, temp2, false);3753return dst;3754}375537563757LogicVRegister Simulator::sabdl(VectorFormat vform,3758LogicVRegister dst,3759const LogicVRegister& src1,3760const LogicVRegister& src2) {3761SimVRegister temp1, temp2;3762sxtl(vform, temp1, src1);3763sxtl(vform, temp2, src2);3764absdiff(vform, dst, temp1, temp2, true);3765return dst;3766}376737683769LogicVRegister Simulator::sabdl2(VectorFormat vform,3770LogicVRegister dst,3771const LogicVRegister& src1,3772const LogicVRegister& src2) {3773SimVRegister temp1, temp2;3774sxtl2(vform, temp1, src1);3775sxtl2(vform, temp2, src2);3776absdiff(vform, dst, temp1, temp2, true);3777return dst;3778}377937803781LogicVRegister Simulator::umull(VectorFormat vform,3782LogicVRegister dst,3783const LogicVRegister& src1,3784const LogicVRegister& src2,3785bool is_2) {3786SimVRegister temp1, temp2;3787uxtl(vform, temp1, src1, is_2);3788uxtl(vform, temp2, src2, is_2);3789mul(vform, dst, temp1, temp2);3790return dst;3791}379237933794LogicVRegister Simulator::umull2(VectorFormat vform,3795LogicVRegister dst,3796const LogicVRegister& src1,3797const LogicVRegister& src2) {3798return umull(vform, dst, src1, src2, /* is_2 = */ true);3799}380038013802LogicVRegister Simulator::smull(VectorFormat vform,3803LogicVRegister dst,3804const LogicVRegister& src1,3805const LogicVRegister& src2,3806bool is_2) {3807SimVRegister temp1, temp2;3808sxtl(vform, temp1, src1, is_2);3809sxtl(vform, temp2, src2, is_2);3810mul(vform, dst, temp1, temp2);3811return dst;3812}381338143815LogicVRegister Simulator::smull2(VectorFormat vform,3816LogicVRegister dst,3817const LogicVRegister& src1,3818const LogicVRegister& src2) {3819return smull(vform, dst, src1, src2, /* is_2 = */ true);3820}382138223823LogicVRegister Simulator::umlsl(VectorFormat vform,3824LogicVRegister dst,3825const LogicVRegister& src1,3826const LogicVRegister& src2,3827bool is_2) {3828SimVRegister temp1, temp2;3829uxtl(vform, temp1, src1, is_2);3830uxtl(vform, temp2, src2, is_2);3831mls(vform, dst, dst, temp1, temp2);3832return dst;3833}383438353836LogicVRegister Simulator::umlsl2(VectorFormat vform,3837LogicVRegister dst,3838const LogicVRegister& src1,3839const LogicVRegister& src2) {3840return umlsl(vform, dst, src1, src2, /* is_2 = */ true);3841}384238433844LogicVRegister Simulator::smlsl(VectorFormat vform,3845LogicVRegister dst,3846const LogicVRegister& src1,3847const LogicVRegister& src2,3848bool is_2) {3849SimVRegister temp1, temp2;3850sxtl(vform, temp1, src1, is_2);3851sxtl(vform, temp2, src2, is_2);3852mls(vform, dst, dst, temp1, temp2);3853return dst;3854}385538563857LogicVRegister Simulator::smlsl2(VectorFormat vform,3858LogicVRegister dst,3859const LogicVRegister& src1,3860const LogicVRegister& src2) {3861return smlsl(vform, dst, src1, src2, /* is_2 = */ true);3862}386338643865LogicVRegister Simulator::umlal(VectorFormat vform,3866LogicVRegister dst,3867const LogicVRegister& src1,3868const LogicVRegister& src2,3869bool is_2) {3870SimVRegister temp1, temp2;3871uxtl(vform, temp1, src1, is_2);3872uxtl(vform, temp2, src2, is_2);3873mla(vform, dst, dst, temp1, temp2);3874return dst;3875}387638773878LogicVRegister Simulator::umlal2(VectorFormat vform,3879LogicVRegister dst,3880const LogicVRegister& src1,3881const LogicVRegister& src2) {3882return umlal(vform, dst, src1, src2, /* is_2 = */ true);3883}388438853886LogicVRegister Simulator::smlal(VectorFormat vform,3887LogicVRegister dst,3888const LogicVRegister& src1,3889const LogicVRegister& src2,3890bool is_2) {3891SimVRegister temp1, temp2;3892sxtl(vform, temp1, src1, is_2);3893sxtl(vform, temp2, src2, is_2);3894mla(vform, dst, dst, temp1, temp2);3895return dst;3896}389738983899LogicVRegister Simulator::smlal2(VectorFormat vform,3900LogicVRegister dst,3901const LogicVRegister& src1,3902const LogicVRegister& src2) {3903return smlal(vform, dst, src1, src2, /* is_2 = */ true);3904}390539063907LogicVRegister Simulator::sqdmlal(VectorFormat vform,3908LogicVRegister dst,3909const LogicVRegister& src1,3910const LogicVRegister& src2,3911bool is_2) {3912SimVRegister temp;3913LogicVRegister product = sqdmull(vform, temp, src1, src2, is_2);3914return add(vform, dst, dst, product).SignedSaturate(vform);3915}391639173918LogicVRegister Simulator::sqdmlal2(VectorFormat vform,3919LogicVRegister dst,3920const LogicVRegister& src1,3921const LogicVRegister& src2) {3922return sqdmlal(vform, dst, src1, src2, /* is_2 = */ true);3923}392439253926LogicVRegister Simulator::sqdmlsl(VectorFormat vform,3927LogicVRegister dst,3928const LogicVRegister& src1,3929const LogicVRegister& src2,3930bool is_2) {3931SimVRegister temp;3932LogicVRegister product = sqdmull(vform, temp, src1, src2, is_2);3933return sub(vform, dst, dst, product).SignedSaturate(vform);3934}393539363937LogicVRegister Simulator::sqdmlsl2(VectorFormat vform,3938LogicVRegister dst,3939const LogicVRegister& src1,3940const LogicVRegister& src2) {3941return sqdmlsl(vform, dst, src1, src2, /* is_2 = */ true);3942}394339443945LogicVRegister Simulator::sqdmull(VectorFormat vform,3946LogicVRegister dst,3947const LogicVRegister& src1,3948const LogicVRegister& src2,3949bool is_2) {3950SimVRegister temp;3951LogicVRegister product = smull(vform, temp, src1, src2, is_2);3952return add(vform, dst, product, product).SignedSaturate(vform);3953}395439553956LogicVRegister Simulator::sqdmull2(VectorFormat vform,3957LogicVRegister dst,3958const LogicVRegister& src1,3959const LogicVRegister& src2) {3960return sqdmull(vform, dst, src1, src2, /* is_2 = */ true);3961}39623963LogicVRegister Simulator::sqrdmulh(VectorFormat vform,3964LogicVRegister dst,3965const LogicVRegister& src1,3966const LogicVRegister& src2,3967bool round) {3968int esize = LaneSizeInBitsFromFormat(vform);39693970SimVRegister temp_lo, temp_hi;39713972// Compute low and high multiplication results.3973mul(vform, temp_lo, src1, src2);3974smulh(vform, temp_hi, src1, src2);39753976// Double by shifting high half, and adding in most-significant bit of low3977// half.3978shl(vform, temp_hi, temp_hi, 1);3979usra(vform, temp_hi, temp_lo, esize - 1);39803981if (round) {3982// Add the second (due to doubling) most-significant bit of the low half3983// into the result.3984shl(vform, temp_lo, temp_lo, 1);3985usra(vform, temp_hi, temp_lo, esize - 1);3986}39873988SimPRegister not_sat;3989LogicPRegister ptemp(not_sat);3990dst.ClearForWrite(vform);3991for (int i = 0; i < LaneCountFromFormat(vform); i++) {3992// Saturation only occurs when src1 = src2 = minimum representable value.3993// Check this as a special case.3994ptemp.SetActive(vform, i, true);3995if ((src1.Int(vform, i) == MinIntFromFormat(vform)) &&3996(src2.Int(vform, i) == MinIntFromFormat(vform))) {3997ptemp.SetActive(vform, i, false);3998}3999dst.SetInt(vform, i, MaxIntFromFormat(vform));4000}40014002mov_merging(vform, dst, not_sat, temp_hi);4003return dst;4004}400540064007LogicVRegister Simulator::dot(VectorFormat vform,4008LogicVRegister dst,4009const LogicVRegister& src1,4010const LogicVRegister& src2,4011bool is_src1_signed,4012bool is_src2_signed) {4013VectorFormat quarter_vform =4014VectorFormatHalfWidthDoubleLanes(VectorFormatHalfWidthDoubleLanes(vform));40154016dst.ClearForWrite(vform);4017for (int e = 0; e < LaneCountFromFormat(vform); e++) {4018uint64_t result = 0;4019int64_t element1, element2;4020for (int i = 0; i < 4; i++) {4021int index = 4 * e + i;4022if (is_src1_signed) {4023element1 = src1.Int(quarter_vform, index);4024} else {4025element1 = src1.Uint(quarter_vform, index);4026}4027if (is_src2_signed) {4028element2 = src2.Int(quarter_vform, index);4029} else {4030element2 = src2.Uint(quarter_vform, index);4031}4032result += element1 * element2;4033}4034dst.SetUint(vform, e, result + dst.Uint(vform, e));4035}4036return dst;4037}403840394040LogicVRegister Simulator::sdot(VectorFormat vform,4041LogicVRegister dst,4042const LogicVRegister& src1,4043const LogicVRegister& src2) {4044return dot(vform, dst, src1, src2, true, true);4045}404640474048LogicVRegister Simulator::udot(VectorFormat vform,4049LogicVRegister dst,4050const LogicVRegister& src1,4051const LogicVRegister& src2) {4052return dot(vform, dst, src1, src2, false, false);4053}40544055LogicVRegister Simulator::usdot(VectorFormat vform,4056LogicVRegister dst,4057const LogicVRegister& src1,4058const LogicVRegister& src2) {4059return dot(vform, dst, src1, src2, false, true);4060}40614062LogicVRegister Simulator::cdot(VectorFormat vform,4063LogicVRegister dst,4064const LogicVRegister& acc,4065const LogicVRegister& src1,4066const LogicVRegister& src2,4067int rot) {4068VIXL_ASSERT((rot == 0) || (rot == 90) || (rot == 180) || (rot == 270));4069VectorFormat quarter_vform =4070VectorFormatHalfWidthDoubleLanes(VectorFormatHalfWidthDoubleLanes(vform));40714072int sel_a = ((rot == 0) || (rot == 180)) ? 0 : 1;4073int sel_b = 1 - sel_a;4074int sub_i = ((rot == 90) || (rot == 180)) ? 1 : -1;40754076for (int i = 0; i < LaneCountFromFormat(vform); i++) {4077int64_t result = acc.Int(vform, i);4078for (int j = 0; j < 2; j++) {4079int64_t r1 = src1.Int(quarter_vform, (4 * i) + (2 * j) + 0);4080int64_t i1 = src1.Int(quarter_vform, (4 * i) + (2 * j) + 1);4081int64_t r2 = src2.Int(quarter_vform, (4 * i) + (2 * j) + sel_a);4082int64_t i2 = src2.Int(quarter_vform, (4 * i) + (2 * j) + sel_b);4083result += (r1 * r2) + (sub_i * i1 * i2);4084}4085dst.SetInt(vform, i, result);4086}4087return dst;4088}40894090LogicVRegister Simulator::sqrdcmlah(VectorFormat vform,4091LogicVRegister dst,4092const LogicVRegister& srca,4093const LogicVRegister& src1,4094const LogicVRegister& src2,4095int rot) {4096SimVRegister src1_a, src1_b;4097SimVRegister src2_a, src2_b;4098SimVRegister srca_i, srca_r;4099SimVRegister zero, temp;4100zero.Clear();41014102if ((rot == 0) || (rot == 180)) {4103uzp1(vform, src1_a, src1, zero);4104uzp1(vform, src2_a, src2, zero);4105uzp2(vform, src2_b, src2, zero);4106} else {4107uzp2(vform, src1_a, src1, zero);4108uzp2(vform, src2_a, src2, zero);4109uzp1(vform, src2_b, src2, zero);4110}41114112uzp1(vform, srca_r, srca, zero);4113uzp2(vform, srca_i, srca, zero);41144115bool sub_r = (rot == 90) || (rot == 180);4116bool sub_i = (rot == 180) || (rot == 270);41174118const bool round = true;4119sqrdmlash(vform, srca_r, src1_a, src2_a, round, sub_r);4120sqrdmlash(vform, srca_i, src1_a, src2_b, round, sub_i);4121zip1(vform, dst, srca_r, srca_i);4122return dst;4123}41244125LogicVRegister Simulator::sqrdcmlah(VectorFormat vform,4126LogicVRegister dst,4127const LogicVRegister& srca,4128const LogicVRegister& src1,4129const LogicVRegister& src2,4130int index,4131int rot) {4132SimVRegister temp;4133dup_elements_to_segments(VectorFormatDoubleWidth(vform), temp, src2, index);4134return sqrdcmlah(vform, dst, srca, src1, temp, rot);4135}41364137LogicVRegister Simulator::sqrdmlash_d(VectorFormat vform,4138LogicVRegister dst,4139const LogicVRegister& src1,4140const LogicVRegister& src2,4141bool round,4142bool sub_op) {4143// 2 * INT_64_MIN * INT_64_MIN causes INT_128 to overflow.4144// To avoid this, we use:4145// (dst << (esize - 1) + src1 * src2 + 1 << (esize - 2)) >> (esize - 1)4146// which is same as:4147// (dst << esize + 2 * src1 * src2 + 1 << (esize - 1)) >> esize.41484149VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);4150int esize = kDRegSize;4151vixl_uint128_t round_const, accum;4152round_const.first = 0;4153if (round) {4154round_const.second = UINT64_C(1) << (esize - 2);4155} else {4156round_const.second = 0;4157}41584159dst.ClearForWrite(vform);4160for (int i = 0; i < LaneCountFromFormat(vform); i++) {4161// Shift the whole value left by `esize - 1` bits.4162accum.first = dst.Int(vform, i) >> 1;4163accum.second = dst.Int(vform, i) << (esize - 1);41644165vixl_uint128_t product = Mul64(src1.Int(vform, i), src2.Int(vform, i));41664167if (sub_op) {4168product = Neg128(product);4169}4170accum = Add128(accum, product);41714172// Perform rounding.4173accum = Add128(accum, round_const);41744175// Arithmetic shift the whole value right by `esize - 1` bits.4176accum.second = (accum.first << 1) | (accum.second >> (esize - 1));4177accum.first = -(accum.first >> (esize - 1));41784179// Perform saturation.4180bool is_pos = (accum.first == 0) ? true : false;4181if (is_pos &&4182(accum.second > static_cast<uint64_t>(MaxIntFromFormat(vform)))) {4183accum.second = MaxIntFromFormat(vform);4184} else if (!is_pos && (accum.second <4185static_cast<uint64_t>(MinIntFromFormat(vform)))) {4186accum.second = MinIntFromFormat(vform);4187}41884189dst.SetInt(vform, i, accum.second);4190}41914192return dst;4193}41944195LogicVRegister Simulator::sqrdmlash(VectorFormat vform,4196LogicVRegister dst,4197const LogicVRegister& src1,4198const LogicVRegister& src2,4199bool round,4200bool sub_op) {4201// 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow.4202// To avoid this, we use:4203// (dst << (esize - 1) + src1 * src2 + 1 << (esize - 2)) >> (esize - 1)4204// which is same as:4205// (dst << esize + 2 * src1 * src2 + 1 << (esize - 1)) >> esize.42064207if (vform == kFormatVnD) {4208return sqrdmlash_d(vform, dst, src1, src2, round, sub_op);4209}42104211int esize = LaneSizeInBitsFromFormat(vform);4212int round_const = round ? (1 << (esize - 2)) : 0;4213int64_t accum;42144215dst.ClearForWrite(vform);4216for (int i = 0; i < LaneCountFromFormat(vform); i++) {4217accum = dst.Int(vform, i) << (esize - 1);4218if (sub_op) {4219accum -= src1.Int(vform, i) * src2.Int(vform, i);4220} else {4221accum += src1.Int(vform, i) * src2.Int(vform, i);4222}4223accum += round_const;4224accum = accum >> (esize - 1);42254226if (accum > MaxIntFromFormat(vform)) {4227accum = MaxIntFromFormat(vform);4228} else if (accum < MinIntFromFormat(vform)) {4229accum = MinIntFromFormat(vform);4230}4231dst.SetInt(vform, i, accum);4232}4233return dst;4234}423542364237LogicVRegister Simulator::sqrdmlah(VectorFormat vform,4238LogicVRegister dst,4239const LogicVRegister& src1,4240const LogicVRegister& src2,4241bool round) {4242return sqrdmlash(vform, dst, src1, src2, round, false);4243}424442454246LogicVRegister Simulator::sqrdmlsh(VectorFormat vform,4247LogicVRegister dst,4248const LogicVRegister& src1,4249const LogicVRegister& src2,4250bool round) {4251return sqrdmlash(vform, dst, src1, src2, round, true);4252}425342544255LogicVRegister Simulator::sqdmulh(VectorFormat vform,4256LogicVRegister dst,4257const LogicVRegister& src1,4258const LogicVRegister& src2) {4259return sqrdmulh(vform, dst, src1, src2, false);4260}426142624263LogicVRegister Simulator::addhn(VectorFormat vform,4264LogicVRegister dst,4265const LogicVRegister& src1,4266const LogicVRegister& src2) {4267SimVRegister temp;4268add(VectorFormatDoubleWidth(vform), temp, src1, src2);4269shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));4270return dst;4271}427242734274LogicVRegister Simulator::addhn2(VectorFormat vform,4275LogicVRegister dst,4276const LogicVRegister& src1,4277const LogicVRegister& src2) {4278SimVRegister temp;4279add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);4280shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));4281return dst;4282}428342844285LogicVRegister Simulator::raddhn(VectorFormat vform,4286LogicVRegister dst,4287const LogicVRegister& src1,4288const LogicVRegister& src2) {4289SimVRegister temp;4290add(VectorFormatDoubleWidth(vform), temp, src1, src2);4291rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));4292return dst;4293}429442954296LogicVRegister Simulator::raddhn2(VectorFormat vform,4297LogicVRegister dst,4298const LogicVRegister& src1,4299const LogicVRegister& src2) {4300SimVRegister temp;4301add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);4302rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));4303return dst;4304}430543064307LogicVRegister Simulator::subhn(VectorFormat vform,4308LogicVRegister dst,4309const LogicVRegister& src1,4310const LogicVRegister& src2) {4311SimVRegister temp;4312sub(VectorFormatDoubleWidth(vform), temp, src1, src2);4313shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));4314return dst;4315}431643174318LogicVRegister Simulator::subhn2(VectorFormat vform,4319LogicVRegister dst,4320const LogicVRegister& src1,4321const LogicVRegister& src2) {4322SimVRegister temp;4323sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);4324shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));4325return dst;4326}432743284329LogicVRegister Simulator::rsubhn(VectorFormat vform,4330LogicVRegister dst,4331const LogicVRegister& src1,4332const LogicVRegister& src2) {4333SimVRegister temp;4334sub(VectorFormatDoubleWidth(vform), temp, src1, src2);4335rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));4336return dst;4337}433843394340LogicVRegister Simulator::rsubhn2(VectorFormat vform,4341LogicVRegister dst,4342const LogicVRegister& src1,4343const LogicVRegister& src2) {4344SimVRegister temp;4345sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);4346rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));4347return dst;4348}434943504351LogicVRegister Simulator::trn1(VectorFormat vform,4352LogicVRegister dst,4353const LogicVRegister& src1,4354const LogicVRegister& src2) {4355uint64_t result[kZRegMaxSizeInBytes] = {};4356int lane_count = LaneCountFromFormat(vform);4357int pairs = lane_count / 2;4358for (int i = 0; i < pairs; ++i) {4359result[2 * i] = src1.Uint(vform, 2 * i);4360result[(2 * i) + 1] = src2.Uint(vform, 2 * i);4361}43624363dst.ClearForWrite(vform);4364for (int i = 0; i < lane_count; ++i) {4365dst.SetUint(vform, i, result[i]);4366}4367return dst;4368}436943704371LogicVRegister Simulator::trn2(VectorFormat vform,4372LogicVRegister dst,4373const LogicVRegister& src1,4374const LogicVRegister& src2) {4375uint64_t result[kZRegMaxSizeInBytes] = {};4376int lane_count = LaneCountFromFormat(vform);4377int pairs = lane_count / 2;4378for (int i = 0; i < pairs; ++i) {4379result[2 * i] = src1.Uint(vform, (2 * i) + 1);4380result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1);4381}43824383dst.ClearForWrite(vform);4384for (int i = 0; i < lane_count; ++i) {4385dst.SetUint(vform, i, result[i]);4386}4387return dst;4388}438943904391LogicVRegister Simulator::zip1(VectorFormat vform,4392LogicVRegister dst,4393const LogicVRegister& src1,4394const LogicVRegister& src2) {4395uint64_t result[kZRegMaxSizeInBytes] = {};4396int lane_count = LaneCountFromFormat(vform);4397int pairs = lane_count / 2;4398for (int i = 0; i < pairs; ++i) {4399result[2 * i] = src1.Uint(vform, i);4400result[(2 * i) + 1] = src2.Uint(vform, i);4401}44024403dst.ClearForWrite(vform);4404for (int i = 0; i < lane_count; ++i) {4405dst.SetUint(vform, i, result[i]);4406}4407return dst;4408}440944104411LogicVRegister Simulator::zip2(VectorFormat vform,4412LogicVRegister dst,4413const LogicVRegister& src1,4414const LogicVRegister& src2) {4415uint64_t result[kZRegMaxSizeInBytes] = {};4416int lane_count = LaneCountFromFormat(vform);4417int pairs = lane_count / 2;4418for (int i = 0; i < pairs; ++i) {4419result[2 * i] = src1.Uint(vform, pairs + i);4420result[(2 * i) + 1] = src2.Uint(vform, pairs + i);4421}44224423dst.ClearForWrite(vform);4424for (int i = 0; i < lane_count; ++i) {4425dst.SetUint(vform, i, result[i]);4426}4427return dst;4428}442944304431LogicVRegister Simulator::uzp1(VectorFormat vform,4432LogicVRegister dst,4433const LogicVRegister& src1,4434const LogicVRegister& src2) {4435uint64_t result[kZRegMaxSizeInBytes * 2];4436int lane_count = LaneCountFromFormat(vform);4437for (int i = 0; i < lane_count; ++i) {4438result[i] = src1.Uint(vform, i);4439result[lane_count + i] = src2.Uint(vform, i);4440}44414442dst.ClearForWrite(vform);4443for (int i = 0; i < lane_count; ++i) {4444dst.SetUint(vform, i, result[2 * i]);4445}4446return dst;4447}444844494450LogicVRegister Simulator::uzp2(VectorFormat vform,4451LogicVRegister dst,4452const LogicVRegister& src1,4453const LogicVRegister& src2) {4454uint64_t result[kZRegMaxSizeInBytes * 2];4455int lane_count = LaneCountFromFormat(vform);4456for (int i = 0; i < lane_count; ++i) {4457result[i] = src1.Uint(vform, i);4458result[lane_count + i] = src2.Uint(vform, i);4459}44604461dst.ClearForWrite(vform);4462for (int i = 0; i < lane_count; ++i) {4463dst.SetUint(vform, i, result[(2 * i) + 1]);4464}4465return dst;4466}44674468LogicVRegister Simulator::interleave_top_bottom(VectorFormat vform,4469LogicVRegister dst,4470const LogicVRegister& src) {4471// Interleave the top and bottom half of a vector, ie. for a vector:4472//4473// [ ... | F | D | B | ... | E | C | A ]4474//4475// where B is the first element in the top half of the vector, produce a4476// result vector:4477//4478// [ ... | ... | F | E | D | C | B | A ]44794480uint64_t result[kZRegMaxSizeInBytes] = {};4481int lane_count = LaneCountFromFormat(vform);4482for (int i = 0; i < lane_count; i += 2) {4483result[i] = src.Uint(vform, i / 2);4484result[i + 1] = src.Uint(vform, (lane_count / 2) + (i / 2));4485}4486dst.SetUintArray(vform, result);4487return dst;4488}44894490template <typename T>4491T Simulator::FPNeg(T op) {4492return -op;4493}44944495template <typename T>4496T Simulator::FPAdd(T op1, T op2) {4497T result = FPProcessNaNs(op1, op2);4498if (IsNaN(result)) {4499return result;4500}45014502if (IsInf(op1) && IsInf(op2) && (op1 != op2)) {4503// inf + -inf returns the default NaN.4504FPProcessException();4505return FPDefaultNaN<T>();4506} else {4507// Other cases should be handled by standard arithmetic.4508return op1 + op2;4509}4510}451145124513template <typename T>4514T Simulator::FPSub(T op1, T op2) {4515// NaNs should be handled elsewhere.4516VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2));45174518if (IsInf(op1) && IsInf(op2) && (op1 == op2)) {4519// inf - inf returns the default NaN.4520FPProcessException();4521return FPDefaultNaN<T>();4522} else {4523// Other cases should be handled by standard arithmetic.4524return op1 - op2;4525}4526}45274528template <typename T>4529T Simulator::FPMulNaNs(T op1, T op2) {4530T result = FPProcessNaNs(op1, op2);4531return IsNaN(result) ? result : FPMul(op1, op2);4532}45334534template <typename T>4535T Simulator::FPMul(T op1, T op2) {4536// NaNs should be handled elsewhere.4537VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2));45384539if ((IsInf(op1) && (op2 == 0.0)) || (IsInf(op2) && (op1 == 0.0))) {4540// inf * 0.0 returns the default NaN.4541FPProcessException();4542return FPDefaultNaN<T>();4543} else {4544// Other cases should be handled by standard arithmetic.4545return op1 * op2;4546}4547}454845494550template <typename T>4551T Simulator::FPMulx(T op1, T op2) {4552if ((IsInf(op1) && (op2 == 0.0)) || (IsInf(op2) && (op1 == 0.0))) {4553// inf * 0.0 returns +/-2.0.4554T two = 2.0;4555return copysign(1.0, op1) * copysign(1.0, op2) * two;4556}4557return FPMul(op1, op2);4558}455945604561template <typename T>4562T Simulator::FPMulAdd(T a, T op1, T op2) {4563T result = FPProcessNaNs3(a, op1, op2);45644565T sign_a = copysign(1.0, a);4566T sign_prod = copysign(1.0, op1) * copysign(1.0, op2);4567bool isinf_prod = IsInf(op1) || IsInf(op2);4568bool operation_generates_nan =4569(IsInf(op1) && (op2 == 0.0)) || // inf * 0.04570(IsInf(op2) && (op1 == 0.0)) || // 0.0 * inf4571(IsInf(a) && isinf_prod && (sign_a != sign_prod)); // inf - inf45724573if (IsNaN(result)) {4574// Generated NaNs override quiet NaNs propagated from a.4575if (operation_generates_nan && IsQuietNaN(a)) {4576FPProcessException();4577return FPDefaultNaN<T>();4578} else {4579return result;4580}4581}45824583// If the operation would produce a NaN, return the default NaN.4584if (operation_generates_nan) {4585FPProcessException();4586return FPDefaultNaN<T>();4587}45884589// Work around broken fma implementations for exact zero results: The sign of4590// exact 0.0 results is positive unless both a and op1 * op2 are negative.4591if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) {4592return ((sign_a < T(0.0)) && (sign_prod < T(0.0))) ? -0.0 : 0.0;4593}45944595result = FusedMultiplyAdd(op1, op2, a);4596VIXL_ASSERT(!IsNaN(result));45974598// Work around broken fma implementations for rounded zero results: If a is4599// 0.0, the sign of the result is the sign of op1 * op2 before rounding.4600if ((a == 0.0) && (result == 0.0)) {4601return copysign(0.0, sign_prod);4602}46034604return result;4605}460646074608template <typename T>4609T Simulator::FPDiv(T op1, T op2) {4610// NaNs should be handled elsewhere.4611VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2));46124613if ((IsInf(op1) && IsInf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) {4614// inf / inf and 0.0 / 0.0 return the default NaN.4615FPProcessException();4616return FPDefaultNaN<T>();4617} else {4618if (op2 == 0.0) {4619FPProcessException();4620if (!IsNaN(op1)) {4621double op1_sign = copysign(1.0, op1);4622double op2_sign = copysign(1.0, op2);4623return static_cast<T>(op1_sign * op2_sign * kFP64PositiveInfinity);4624}4625}46264627// Other cases should be handled by standard arithmetic.4628return op1 / op2;4629}4630}463146324633template <typename T>4634T Simulator::FPSqrt(T op) {4635if (IsNaN(op)) {4636return FPProcessNaN(op);4637} else if (op < T(0.0)) {4638FPProcessException();4639return FPDefaultNaN<T>();4640} else {4641return sqrt(op);4642}4643}464446454646template <typename T>4647T Simulator::FPMax(T a, T b) {4648T result = FPProcessNaNs(a, b);4649if (IsNaN(result)) return result;46504651if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {4652// a and b are zero, and the sign differs: return +0.0.4653return 0.0;4654} else {4655return (a > b) ? a : b;4656}4657}465846594660template <typename T>4661T Simulator::FPMaxNM(T a, T b) {4662if (IsQuietNaN(a) && !IsQuietNaN(b)) {4663a = kFP64NegativeInfinity;4664} else if (!IsQuietNaN(a) && IsQuietNaN(b)) {4665b = kFP64NegativeInfinity;4666}46674668T result = FPProcessNaNs(a, b);4669return IsNaN(result) ? result : FPMax(a, b);4670}467146724673template <typename T>4674T Simulator::FPMin(T a, T b) {4675T result = FPProcessNaNs(a, b);4676if (IsNaN(result)) return result;46774678if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {4679// a and b are zero, and the sign differs: return -0.0.4680return -0.0;4681} else {4682return (a < b) ? a : b;4683}4684}468546864687template <typename T>4688T Simulator::FPMinNM(T a, T b) {4689if (IsQuietNaN(a) && !IsQuietNaN(b)) {4690a = kFP64PositiveInfinity;4691} else if (!IsQuietNaN(a) && IsQuietNaN(b)) {4692b = kFP64PositiveInfinity;4693}46944695T result = FPProcessNaNs(a, b);4696return IsNaN(result) ? result : FPMin(a, b);4697}469846994700template <typename T>4701T Simulator::FPRecipStepFused(T op1, T op2) {4702const T two = 2.0;4703if ((IsInf(op1) && (op2 == 0.0)) || ((op1 == 0.0) && (IsInf(op2)))) {4704return two;4705} else if (IsInf(op1) || IsInf(op2)) {4706// Return +inf if signs match, otherwise -inf.4707return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity4708: kFP64NegativeInfinity;4709} else {4710return FusedMultiplyAdd(op1, op2, two);4711}4712}47134714template <typename T>4715bool IsNormal(T value) {4716return std::isnormal(value);4717}47184719template <>4720bool IsNormal(SimFloat16 value) {4721uint16_t rawbits = Float16ToRawbits(value);4722uint16_t exp_mask = 0x7c00;4723// Check that the exponent is neither all zeroes or all ones.4724return ((rawbits & exp_mask) != 0) && ((~rawbits & exp_mask) != 0);4725}472647274728template <typename T>4729T Simulator::FPRSqrtStepFused(T op1, T op2) {4730const T one_point_five = 1.5;4731const T two = 2.0;47324733if ((IsInf(op1) && (op2 == 0.0)) || ((op1 == 0.0) && (IsInf(op2)))) {4734return one_point_five;4735} else if (IsInf(op1) || IsInf(op2)) {4736// Return +inf if signs match, otherwise -inf.4737return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity4738: kFP64NegativeInfinity;4739} else {4740// The multiply-add-halve operation must be fully fused, so avoid interim4741// rounding by checking which operand can be losslessly divided by two4742// before doing the multiply-add.4743if (IsNormal(op1 / two)) {4744return FusedMultiplyAdd(op1 / two, op2, one_point_five);4745} else if (IsNormal(op2 / two)) {4746return FusedMultiplyAdd(op1, op2 / two, one_point_five);4747} else {4748// Neither operand is normal after halving: the result is dominated by4749// the addition term, so just return that.4750return one_point_five;4751}4752}4753}47544755int32_t Simulator::FPToFixedJS(double value) {4756// The Z-flag is set when the conversion from double precision floating-point4757// to 32-bit integer is exact. If the source value is +/-Infinity, -0.0, NaN,4758// outside the bounds of a 32-bit integer, or isn't an exact integer then the4759// Z-flag is unset.4760int Z = 1;4761int32_t result;47624763if ((value == 0.0) || (value == kFP64PositiveInfinity) ||4764(value == kFP64NegativeInfinity)) {4765// +/- zero and infinity all return zero, however -0 and +/- Infinity also4766// unset the Z-flag.4767result = 0.0;4768if ((value != 0.0) || std::signbit(value)) {4769Z = 0;4770}4771} else if (std::isnan(value)) {4772// NaN values unset the Z-flag and set the result to 0.4773FPProcessNaN(value);4774result = 0;4775Z = 0;4776} else {4777// All other values are converted to an integer representation, rounded4778// toward zero.4779double int_result = std::floor(value);4780double error = value - int_result;47814782if ((error != 0.0) && (int_result < 0.0)) {4783int_result++;4784}47854786// Constrain the value into the range [INT32_MIN, INT32_MAX]. We can almost4787// write a one-liner with std::round, but the behaviour on ties is incorrect4788// for our purposes.4789double mod_const = static_cast<double>(UINT64_C(1) << 32);4790double mod_error =4791(int_result / mod_const) - std::floor(int_result / mod_const);4792double constrained;4793if (mod_error == 0.5) {4794constrained = INT32_MIN;4795} else {4796constrained = int_result - mod_const * round(int_result / mod_const);4797}47984799VIXL_ASSERT(std::floor(constrained) == constrained);4800VIXL_ASSERT(constrained >= INT32_MIN);4801VIXL_ASSERT(constrained <= INT32_MAX);48024803// Take the bottom 32 bits of the result as a 32-bit integer.4804result = static_cast<int32_t>(constrained);48054806if ((int_result < INT32_MIN) || (int_result > INT32_MAX) ||4807(error != 0.0)) {4808// If the integer result is out of range or the conversion isn't exact,4809// take exception and unset the Z-flag.4810FPProcessException();4811Z = 0;4812}4813}48144815ReadNzcv().SetN(0);4816ReadNzcv().SetZ(Z);4817ReadNzcv().SetC(0);4818ReadNzcv().SetV(0);48194820return result;4821}48224823double Simulator::FPRoundIntCommon(double value, FPRounding round_mode) {4824VIXL_ASSERT((value != kFP64PositiveInfinity) &&4825(value != kFP64NegativeInfinity));4826VIXL_ASSERT(!IsNaN(value));48274828double int_result = std::floor(value);4829double error = value - int_result;4830switch (round_mode) {4831case FPTieAway: {4832// Take care of correctly handling the range ]-0.5, -0.0], which must4833// yield -0.0.4834if ((-0.5 < value) && (value < 0.0)) {4835int_result = -0.0;48364837} else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) {4838// If the error is greater than 0.5, or is equal to 0.5 and the integer4839// result is positive, round up.4840int_result++;4841}4842break;4843}4844case FPTieEven: {4845// Take care of correctly handling the range [-0.5, -0.0], which must4846// yield -0.0.4847if ((-0.5 <= value) && (value < 0.0)) {4848int_result = -0.0;48494850// If the error is greater than 0.5, or is equal to 0.5 and the integer4851// result is odd, round up.4852} else if ((error > 0.5) ||4853((error == 0.5) && (std::fmod(int_result, 2) != 0))) {4854int_result++;4855}4856break;4857}4858case FPZero: {4859// If value>0 then we take floor(value)4860// otherwise, ceil(value).4861if (value < 0) {4862int_result = ceil(value);4863}4864break;4865}4866case FPNegativeInfinity: {4867// We always use floor(value).4868break;4869}4870case FPPositiveInfinity: {4871// Take care of correctly handling the range ]-1.0, -0.0], which must4872// yield -0.0.4873if ((-1.0 < value) && (value < 0.0)) {4874int_result = -0.0;48754876// If the error is non-zero, round up.4877} else if (error > 0.0) {4878int_result++;4879}4880break;4881}4882default:4883VIXL_UNIMPLEMENTED();4884}4885return int_result;4886}48874888double Simulator::FPRoundInt(double value, FPRounding round_mode) {4889if ((value == 0.0) || (value == kFP64PositiveInfinity) ||4890(value == kFP64NegativeInfinity)) {4891return value;4892} else if (IsNaN(value)) {4893return FPProcessNaN(value);4894}4895return FPRoundIntCommon(value, round_mode);4896}48974898double Simulator::FPRoundInt(double value,4899FPRounding round_mode,4900FrintMode frint_mode) {4901if (frint_mode == kFrintToInteger) {4902return FPRoundInt(value, round_mode);4903}49044905VIXL_ASSERT((frint_mode == kFrintToInt32) || (frint_mode == kFrintToInt64));49064907if (value == 0.0) {4908return value;4909}49104911if ((value == kFP64PositiveInfinity) || (value == kFP64NegativeInfinity) ||4912IsNaN(value)) {4913if (frint_mode == kFrintToInt32) {4914return INT32_MIN;4915} else {4916return INT64_MIN;4917}4918}49194920double result = FPRoundIntCommon(value, round_mode);49214922// We want to compare `result > INT64_MAX` below, but INT64_MAX isn't exactly4923// representable as a double, and is rounded to (INT64_MAX + 1) when4924// converted. To avoid this, we compare `result >= int64_max_plus_one`4925// instead; this is safe because `result` is known to be integral, and4926// `int64_max_plus_one` is exactly representable as a double.4927constexpr uint64_t int64_max_plus_one = static_cast<uint64_t>(INT64_MAX) + 1;4928VIXL_STATIC_ASSERT(static_cast<uint64_t>(static_cast<double>(4929int64_max_plus_one)) == int64_max_plus_one);49304931if (frint_mode == kFrintToInt32) {4932if ((result > INT32_MAX) || (result < INT32_MIN)) {4933return INT32_MIN;4934}4935} else if ((result >= int64_max_plus_one) || (result < INT64_MIN)) {4936return INT64_MIN;4937}49384939return result;4940}49414942int16_t Simulator::FPToInt16(double value, FPRounding rmode) {4943value = FPRoundInt(value, rmode);4944if (value >= kHMaxInt) {4945return kHMaxInt;4946} else if (value < kHMinInt) {4947return kHMinInt;4948}4949return IsNaN(value) ? 0 : static_cast<int16_t>(value);4950}495149524953int32_t Simulator::FPToInt32(double value, FPRounding rmode) {4954value = FPRoundInt(value, rmode);4955if (value >= kWMaxInt) {4956return kWMaxInt;4957} else if (value < kWMinInt) {4958return kWMinInt;4959}4960return IsNaN(value) ? 0 : static_cast<int32_t>(value);4961}496249634964int64_t Simulator::FPToInt64(double value, FPRounding rmode) {4965value = FPRoundInt(value, rmode);4966// This is equivalent to "if (value >= kXMaxInt)" but avoids rounding issues4967// as a result of kMaxInt not being representable as a double.4968if (value >= 9223372036854775808.) {4969return kXMaxInt;4970} else if (value < kXMinInt) {4971return kXMinInt;4972}4973return IsNaN(value) ? 0 : static_cast<int64_t>(value);4974}497549764977uint16_t Simulator::FPToUInt16(double value, FPRounding rmode) {4978value = FPRoundInt(value, rmode);4979if (value >= kHMaxUInt) {4980return kHMaxUInt;4981} else if (value < 0.0) {4982return 0;4983}4984return IsNaN(value) ? 0 : static_cast<uint16_t>(value);4985}498649874988uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) {4989value = FPRoundInt(value, rmode);4990if (value >= kWMaxUInt) {4991return kWMaxUInt;4992} else if (value < 0.0) {4993return 0;4994}4995return IsNaN(value) ? 0 : static_cast<uint32_t>(value);4996}499749984999uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) {5000value = FPRoundInt(value, rmode);5001// This is equivalent to "if (value >= kXMaxUInt)" but avoids rounding issues5002// as a result of kMaxUInt not being representable as a double.5003if (value >= 18446744073709551616.) {5004return kXMaxUInt;5005} else if (value < 0.0) {5006return 0;5007}5008return IsNaN(value) ? 0 : static_cast<uint64_t>(value);5009}501050115012#define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN) \5013template <typename T> \5014LogicVRegister Simulator::FN(VectorFormat vform, \5015LogicVRegister dst, \5016const LogicVRegister& src1, \5017const LogicVRegister& src2) { \5018dst.ClearForWrite(vform); \5019for (int i = 0; i < LaneCountFromFormat(vform); i++) { \5020T op1 = src1.Float<T>(i); \5021T op2 = src2.Float<T>(i); \5022T result; \5023if (PROCNAN) { \5024result = FPProcessNaNs(op1, op2); \5025if (!IsNaN(result)) { \5026result = OP(op1, op2); \5027} \5028} else { \5029result = OP(op1, op2); \5030} \5031dst.SetFloat(vform, i, result); \5032} \5033return dst; \5034} \5035\5036LogicVRegister Simulator::FN(VectorFormat vform, \5037LogicVRegister dst, \5038const LogicVRegister& src1, \5039const LogicVRegister& src2) { \5040if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { \5041FN<SimFloat16>(vform, dst, src1, src2); \5042} else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { \5043FN<float>(vform, dst, src1, src2); \5044} else { \5045VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); \5046FN<double>(vform, dst, src1, src2); \5047} \5048return dst; \5049}5050NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)5051#undef DEFINE_NEON_FP_VECTOR_OP505250535054LogicVRegister Simulator::fnmul(VectorFormat vform,5055LogicVRegister dst,5056const LogicVRegister& src1,5057const LogicVRegister& src2) {5058SimVRegister temp;5059LogicVRegister product = fmul(vform, temp, src1, src2);5060return fneg(vform, dst, product);5061}506250635064template <typename T>5065LogicVRegister Simulator::frecps(VectorFormat vform,5066LogicVRegister dst,5067const LogicVRegister& src1,5068const LogicVRegister& src2) {5069dst.ClearForWrite(vform);5070for (int i = 0; i < LaneCountFromFormat(vform); i++) {5071T op1 = -src1.Float<T>(i);5072T op2 = src2.Float<T>(i);5073T result = FPProcessNaNs(op1, op2);5074dst.SetFloat(vform, i, IsNaN(result) ? result : FPRecipStepFused(op1, op2));5075}5076return dst;5077}507850795080LogicVRegister Simulator::frecps(VectorFormat vform,5081LogicVRegister dst,5082const LogicVRegister& src1,5083const LogicVRegister& src2) {5084if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {5085frecps<SimFloat16>(vform, dst, src1, src2);5086} else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {5087frecps<float>(vform, dst, src1, src2);5088} else {5089VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);5090frecps<double>(vform, dst, src1, src2);5091}5092return dst;5093}509450955096template <typename T>5097LogicVRegister Simulator::frsqrts(VectorFormat vform,5098LogicVRegister dst,5099const LogicVRegister& src1,5100const LogicVRegister& src2) {5101dst.ClearForWrite(vform);5102for (int i = 0; i < LaneCountFromFormat(vform); i++) {5103T op1 = -src1.Float<T>(i);5104T op2 = src2.Float<T>(i);5105T result = FPProcessNaNs(op1, op2);5106dst.SetFloat(vform, i, IsNaN(result) ? result : FPRSqrtStepFused(op1, op2));5107}5108return dst;5109}511051115112LogicVRegister Simulator::frsqrts(VectorFormat vform,5113LogicVRegister dst,5114const LogicVRegister& src1,5115const LogicVRegister& src2) {5116if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {5117frsqrts<SimFloat16>(vform, dst, src1, src2);5118} else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {5119frsqrts<float>(vform, dst, src1, src2);5120} else {5121VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);5122frsqrts<double>(vform, dst, src1, src2);5123}5124return dst;5125}512651275128template <typename T>5129LogicVRegister Simulator::fcmp(VectorFormat vform,5130LogicVRegister dst,5131const LogicVRegister& src1,5132const LogicVRegister& src2,5133Condition cond) {5134dst.ClearForWrite(vform);5135for (int i = 0; i < LaneCountFromFormat(vform); i++) {5136bool result = false;5137T op1 = src1.Float<T>(i);5138T op2 = src2.Float<T>(i);5139bool unordered = IsNaN(FPProcessNaNs(op1, op2));51405141switch (cond) {5142case eq:5143result = (op1 == op2);5144break;5145case ge:5146result = (op1 >= op2);5147break;5148case gt:5149result = (op1 > op2);5150break;5151case le:5152result = (op1 <= op2);5153break;5154case lt:5155result = (op1 < op2);5156break;5157case ne:5158result = (op1 != op2);5159break;5160case uo:5161result = unordered;5162break;5163default:5164// Other conditions are defined in terms of those above.5165VIXL_UNREACHABLE();5166break;5167}51685169if (result && unordered) {5170// Only `uo` and `ne` can be true for unordered comparisons.5171VIXL_ASSERT((cond == uo) || (cond == ne));5172}51735174dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);5175}5176return dst;5177}517851795180LogicVRegister Simulator::fcmp(VectorFormat vform,5181LogicVRegister dst,5182const LogicVRegister& src1,5183const LogicVRegister& src2,5184Condition cond) {5185if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {5186fcmp<SimFloat16>(vform, dst, src1, src2, cond);5187} else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {5188fcmp<float>(vform, dst, src1, src2, cond);5189} else {5190VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);5191fcmp<double>(vform, dst, src1, src2, cond);5192}5193return dst;5194}519551965197LogicVRegister Simulator::fcmp_zero(VectorFormat vform,5198LogicVRegister dst,5199const LogicVRegister& src,5200Condition cond) {5201SimVRegister temp;5202if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {5203LogicVRegister zero_reg =5204dup_immediate(vform, temp, Float16ToRawbits(SimFloat16(0.0)));5205fcmp<SimFloat16>(vform, dst, src, zero_reg, cond);5206} else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {5207LogicVRegister zero_reg = dup_immediate(vform, temp, FloatToRawbits(0.0));5208fcmp<float>(vform, dst, src, zero_reg, cond);5209} else {5210VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);5211LogicVRegister zero_reg = dup_immediate(vform, temp, DoubleToRawbits(0.0));5212fcmp<double>(vform, dst, src, zero_reg, cond);5213}5214return dst;5215}521652175218LogicVRegister Simulator::fabscmp(VectorFormat vform,5219LogicVRegister dst,5220const LogicVRegister& src1,5221const LogicVRegister& src2,5222Condition cond) {5223SimVRegister temp1, temp2;5224if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {5225LogicVRegister abs_src1 = fabs_<SimFloat16>(vform, temp1, src1);5226LogicVRegister abs_src2 = fabs_<SimFloat16>(vform, temp2, src2);5227fcmp<SimFloat16>(vform, dst, abs_src1, abs_src2, cond);5228} else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {5229LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1);5230LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2);5231fcmp<float>(vform, dst, abs_src1, abs_src2, cond);5232} else {5233VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);5234LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1);5235LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2);5236fcmp<double>(vform, dst, abs_src1, abs_src2, cond);5237}5238return dst;5239}524052415242template <typename T>5243LogicVRegister Simulator::fmla(VectorFormat vform,5244LogicVRegister dst,5245const LogicVRegister& srca,5246const LogicVRegister& src1,5247const LogicVRegister& src2) {5248dst.ClearForWrite(vform);5249for (int i = 0; i < LaneCountFromFormat(vform); i++) {5250T op1 = src1.Float<T>(i);5251T op2 = src2.Float<T>(i);5252T acc = srca.Float<T>(i);5253T result = FPMulAdd(acc, op1, op2);5254dst.SetFloat(vform, i, result);5255}5256return dst;5257}525852595260LogicVRegister Simulator::fmla(VectorFormat vform,5261LogicVRegister dst,5262const LogicVRegister& srca,5263const LogicVRegister& src1,5264const LogicVRegister& src2) {5265if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {5266fmla<SimFloat16>(vform, dst, srca, src1, src2);5267} else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {5268fmla<float>(vform, dst, srca, src1, src2);5269} else {5270VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);5271fmla<double>(vform, dst, srca, src1, src2);5272}5273return dst;5274}527552765277template <typename T>5278LogicVRegister Simulator::fmls(VectorFormat vform,5279LogicVRegister dst,5280const LogicVRegister& srca,5281const LogicVRegister& src1,5282const LogicVRegister& src2) {5283dst.ClearForWrite(vform);5284for (int i = 0; i < LaneCountFromFormat(vform); i++) {5285T op1 = -src1.Float<T>(i);5286T op2 = src2.Float<T>(i);5287T acc = srca.Float<T>(i);5288T result = FPMulAdd(acc, op1, op2);5289dst.SetFloat(i, result);5290}5291return dst;5292}529352945295LogicVRegister Simulator::fmls(VectorFormat vform,5296LogicVRegister dst,5297const LogicVRegister& srca,5298const LogicVRegister& src1,5299const LogicVRegister& src2) {5300if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {5301fmls<SimFloat16>(vform, dst, srca, src1, src2);5302} else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {5303fmls<float>(vform, dst, srca, src1, src2);5304} else {5305VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);5306fmls<double>(vform, dst, srca, src1, src2);5307}5308return dst;5309}531053115312LogicVRegister Simulator::fmlal(VectorFormat vform,5313LogicVRegister dst,5314const LogicVRegister& src1,5315const LogicVRegister& src2) {5316VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);5317dst.ClearForWrite(vform);5318for (int i = 0; i < LaneCountFromFormat(vform); i++) {5319float op1 = FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN);5320float op2 = FPToFloat(src2.Float<SimFloat16>(i), kIgnoreDefaultNaN);5321float acc = dst.Float<float>(i);5322float result = FPMulAdd(acc, op1, op2);5323dst.SetFloat(i, result);5324}5325return dst;5326}532753285329LogicVRegister Simulator::fmlal2(VectorFormat vform,5330LogicVRegister dst,5331const LogicVRegister& src1,5332const LogicVRegister& src2) {5333VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);5334dst.ClearForWrite(vform);5335for (int i = 0; i < LaneCountFromFormat(vform); i++) {5336int src = i + LaneCountFromFormat(vform);5337float op1 = FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN);5338float op2 = FPToFloat(src2.Float<SimFloat16>(src), kIgnoreDefaultNaN);5339float acc = dst.Float<float>(i);5340float result = FPMulAdd(acc, op1, op2);5341dst.SetFloat(i, result);5342}5343return dst;5344}534553465347LogicVRegister Simulator::fmlsl(VectorFormat vform,5348LogicVRegister dst,5349const LogicVRegister& src1,5350const LogicVRegister& src2) {5351VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);5352dst.ClearForWrite(vform);5353for (int i = 0; i < LaneCountFromFormat(vform); i++) {5354float op1 = -FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN);5355float op2 = FPToFloat(src2.Float<SimFloat16>(i), kIgnoreDefaultNaN);5356float acc = dst.Float<float>(i);5357float result = FPMulAdd(acc, op1, op2);5358dst.SetFloat(i, result);5359}5360return dst;5361}536253635364LogicVRegister Simulator::fmlsl2(VectorFormat vform,5365LogicVRegister dst,5366const LogicVRegister& src1,5367const LogicVRegister& src2) {5368VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);5369dst.ClearForWrite(vform);5370for (int i = 0; i < LaneCountFromFormat(vform); i++) {5371int src = i + LaneCountFromFormat(vform);5372float op1 = -FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN);5373float op2 = FPToFloat(src2.Float<SimFloat16>(src), kIgnoreDefaultNaN);5374float acc = dst.Float<float>(i);5375float result = FPMulAdd(acc, op1, op2);5376dst.SetFloat(i, result);5377}5378return dst;5379}538053815382LogicVRegister Simulator::fmlal(VectorFormat vform,5383LogicVRegister dst,5384const LogicVRegister& src1,5385const LogicVRegister& src2,5386int index) {5387VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);5388dst.ClearForWrite(vform);5389float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN);5390for (int i = 0; i < LaneCountFromFormat(vform); i++) {5391float op1 = FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN);5392float acc = dst.Float<float>(i);5393float result = FPMulAdd(acc, op1, op2);5394dst.SetFloat(i, result);5395}5396return dst;5397}539853995400LogicVRegister Simulator::fmlal2(VectorFormat vform,5401LogicVRegister dst,5402const LogicVRegister& src1,5403const LogicVRegister& src2,5404int index) {5405VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);5406dst.ClearForWrite(vform);5407float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN);5408for (int i = 0; i < LaneCountFromFormat(vform); i++) {5409int src = i + LaneCountFromFormat(vform);5410float op1 = FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN);5411float acc = dst.Float<float>(i);5412float result = FPMulAdd(acc, op1, op2);5413dst.SetFloat(i, result);5414}5415return dst;5416}541754185419LogicVRegister Simulator::fmlsl(VectorFormat vform,5420LogicVRegister dst,5421const LogicVRegister& src1,5422const LogicVRegister& src2,5423int index) {5424VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);5425dst.ClearForWrite(vform);5426float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN);5427for (int i = 0; i < LaneCountFromFormat(vform); i++) {5428float op1 = -FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN);5429float acc = dst.Float<float>(i);5430float result = FPMulAdd(acc, op1, op2);5431dst.SetFloat(i, result);5432}5433return dst;5434}543554365437LogicVRegister Simulator::fmlsl2(VectorFormat vform,5438LogicVRegister dst,5439const LogicVRegister& src1,5440const LogicVRegister& src2,5441int index) {5442VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);5443dst.ClearForWrite(vform);5444float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN);5445for (int i = 0; i < LaneCountFromFormat(vform); i++) {5446int src = i + LaneCountFromFormat(vform);5447float op1 = -FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN);5448float acc = dst.Float<float>(i);5449float result = FPMulAdd(acc, op1, op2);5450dst.SetFloat(i, result);5451}5452return dst;5453}545454555456template <typename T>5457LogicVRegister Simulator::fneg(VectorFormat vform,5458LogicVRegister dst,5459const LogicVRegister& src) {5460dst.ClearForWrite(vform);5461for (int i = 0; i < LaneCountFromFormat(vform); i++) {5462T op = src.Float<T>(i);5463op = -op;5464dst.SetFloat(i, op);5465}5466return dst;5467}546854695470LogicVRegister Simulator::fneg(VectorFormat vform,5471LogicVRegister dst,5472const LogicVRegister& src) {5473if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {5474fneg<SimFloat16>(vform, dst, src);5475} else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {5476fneg<float>(vform, dst, src);5477} else {5478VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);5479fneg<double>(vform, dst, src);5480}5481return dst;5482}548354845485template <typename T>5486LogicVRegister Simulator::fabs_(VectorFormat vform,5487LogicVRegister dst,5488const LogicVRegister& src) {5489dst.ClearForWrite(vform);5490for (int i = 0; i < LaneCountFromFormat(vform); i++) {5491T op = src.Float<T>(i);5492if (copysign(1.0, op) < 0.0) {5493op = -op;5494}5495dst.SetFloat(i, op);5496}5497return dst;5498}549955005501LogicVRegister Simulator::fabs_(VectorFormat vform,5502LogicVRegister dst,5503const LogicVRegister& src) {5504if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {5505fabs_<SimFloat16>(vform, dst, src);5506} else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {5507fabs_<float>(vform, dst, src);5508} else {5509VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);5510fabs_<double>(vform, dst, src);5511}5512return dst;5513}551455155516LogicVRegister Simulator::fabd(VectorFormat vform,5517LogicVRegister dst,5518const LogicVRegister& src1,5519const LogicVRegister& src2) {5520SimVRegister temp;5521fsub(vform, temp, src1, src2);5522fabs_(vform, dst, temp);5523return dst;5524}552555265527LogicVRegister Simulator::fsqrt(VectorFormat vform,5528LogicVRegister dst,5529const LogicVRegister& src) {5530dst.ClearForWrite(vform);5531if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {5532for (int i = 0; i < LaneCountFromFormat(vform); i++) {5533SimFloat16 result = FPSqrt(src.Float<SimFloat16>(i));5534dst.SetFloat(i, result);5535}5536} else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {5537for (int i = 0; i < LaneCountFromFormat(vform); i++) {5538float result = FPSqrt(src.Float<float>(i));5539dst.SetFloat(i, result);5540}5541} else {5542VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);5543for (int i = 0; i < LaneCountFromFormat(vform); i++) {5544double result = FPSqrt(src.Float<double>(i));5545dst.SetFloat(i, result);5546}5547}5548return dst;5549}555055515552#define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP) \5553LogicVRegister Simulator::FNP(VectorFormat vform, \5554LogicVRegister dst, \5555const LogicVRegister& src1, \5556const LogicVRegister& src2) { \5557SimVRegister temp1, temp2; \5558uzp1(vform, temp1, src1, src2); \5559uzp2(vform, temp2, src1, src2); \5560FN(vform, dst, temp1, temp2); \5561if (IsSVEFormat(vform)) { \5562interleave_top_bottom(vform, dst, dst); \5563} \5564return dst; \5565} \5566\5567LogicVRegister Simulator::FNP(VectorFormat vform, \5568LogicVRegister dst, \5569const LogicVRegister& src) { \5570if (vform == kFormatH) { \5571SimFloat16 result(OP(SimFloat16(RawbitsToFloat16(src.Uint(vform, 0))), \5572SimFloat16(RawbitsToFloat16(src.Uint(vform, 1))))); \5573dst.SetUint(vform, 0, Float16ToRawbits(result)); \5574} else if (vform == kFormatS) { \5575float result = OP(src.Float<float>(0), src.Float<float>(1)); \5576dst.SetFloat(0, result); \5577} else { \5578VIXL_ASSERT(vform == kFormatD); \5579double result = OP(src.Float<double>(0), src.Float<double>(1)); \5580dst.SetFloat(0, result); \5581} \5582dst.ClearForWrite(vform); \5583return dst; \5584}5585NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)5586#undef DEFINE_NEON_FP_PAIR_OP55875588template <typename T>5589LogicVRegister Simulator::FPPairedAcrossHelper(VectorFormat vform,5590LogicVRegister dst,5591const LogicVRegister& src,5592typename TFPPairOp<T>::type fn,5593uint64_t inactive_value) {5594int lane_count = LaneCountFromFormat(vform);5595T result[kZRegMaxSizeInBytes / sizeof(T)];5596// Copy the source vector into a working array. Initialise the unused elements5597// at the end of the array to the same value that a false predicate would set.5598for (int i = 0; i < static_cast<int>(ArrayLength(result)); i++) {5599result[i] = (i < lane_count)5600? src.Float<T>(i)5601: RawbitsWithSizeToFP<T>(sizeof(T) * 8, inactive_value);5602}56035604// Pairwise reduce the elements to a single value, using the pair op function5605// argument.5606for (int step = 1; step < lane_count; step *= 2) {5607for (int i = 0; i < lane_count; i += step * 2) {5608result[i] = (this->*fn)(result[i], result[i + step]);5609}5610}5611dst.ClearForWrite(ScalarFormatFromFormat(vform));5612dst.SetFloat<T>(0, result[0]);5613return dst;5614}56155616LogicVRegister Simulator::FPPairedAcrossHelper(5617VectorFormat vform,5618LogicVRegister dst,5619const LogicVRegister& src,5620typename TFPPairOp<SimFloat16>::type fn16,5621typename TFPPairOp<float>::type fn32,5622typename TFPPairOp<double>::type fn64,5623uint64_t inactive_value) {5624switch (LaneSizeInBitsFromFormat(vform)) {5625case kHRegSize:5626return FPPairedAcrossHelper<SimFloat16>(vform,5627dst,5628src,5629fn16,5630inactive_value);5631case kSRegSize:5632return FPPairedAcrossHelper<float>(vform, dst, src, fn32, inactive_value);5633default:5634VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);5635return FPPairedAcrossHelper<double>(vform,5636dst,5637src,5638fn64,5639inactive_value);5640}5641}56425643LogicVRegister Simulator::faddv(VectorFormat vform,5644LogicVRegister dst,5645const LogicVRegister& src) {5646return FPPairedAcrossHelper(vform,5647dst,5648src,5649&Simulator::FPAdd<SimFloat16>,5650&Simulator::FPAdd<float>,5651&Simulator::FPAdd<double>,56520);5653}56545655LogicVRegister Simulator::fmaxv(VectorFormat vform,5656LogicVRegister dst,5657const LogicVRegister& src) {5658int lane_size = LaneSizeInBitsFromFormat(vform);5659uint64_t inactive_value =5660FPToRawbitsWithSize(lane_size, kFP64NegativeInfinity);5661return FPPairedAcrossHelper(vform,5662dst,5663src,5664&Simulator::FPMax<SimFloat16>,5665&Simulator::FPMax<float>,5666&Simulator::FPMax<double>,5667inactive_value);5668}566956705671LogicVRegister Simulator::fminv(VectorFormat vform,5672LogicVRegister dst,5673const LogicVRegister& src) {5674int lane_size = LaneSizeInBitsFromFormat(vform);5675uint64_t inactive_value =5676FPToRawbitsWithSize(lane_size, kFP64PositiveInfinity);5677return FPPairedAcrossHelper(vform,5678dst,5679src,5680&Simulator::FPMin<SimFloat16>,5681&Simulator::FPMin<float>,5682&Simulator::FPMin<double>,5683inactive_value);5684}568556865687LogicVRegister Simulator::fmaxnmv(VectorFormat vform,5688LogicVRegister dst,5689const LogicVRegister& src) {5690int lane_size = LaneSizeInBitsFromFormat(vform);5691uint64_t inactive_value = FPToRawbitsWithSize(lane_size, kFP64DefaultNaN);5692return FPPairedAcrossHelper(vform,5693dst,5694src,5695&Simulator::FPMaxNM<SimFloat16>,5696&Simulator::FPMaxNM<float>,5697&Simulator::FPMaxNM<double>,5698inactive_value);5699}570057015702LogicVRegister Simulator::fminnmv(VectorFormat vform,5703LogicVRegister dst,5704const LogicVRegister& src) {5705int lane_size = LaneSizeInBitsFromFormat(vform);5706uint64_t inactive_value = FPToRawbitsWithSize(lane_size, kFP64DefaultNaN);5707return FPPairedAcrossHelper(vform,5708dst,5709src,5710&Simulator::FPMinNM<SimFloat16>,5711&Simulator::FPMinNM<float>,5712&Simulator::FPMinNM<double>,5713inactive_value);5714}571557165717LogicVRegister Simulator::fmul(VectorFormat vform,5718LogicVRegister dst,5719const LogicVRegister& src1,5720const LogicVRegister& src2,5721int index) {5722dst.ClearForWrite(vform);5723SimVRegister temp;5724if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {5725LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);5726fmul<SimFloat16>(vform, dst, src1, index_reg);5727} else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {5728LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);5729fmul<float>(vform, dst, src1, index_reg);5730} else {5731VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);5732LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);5733fmul<double>(vform, dst, src1, index_reg);5734}5735return dst;5736}573757385739LogicVRegister Simulator::fmla(VectorFormat vform,5740LogicVRegister dst,5741const LogicVRegister& src1,5742const LogicVRegister& src2,5743int index) {5744dst.ClearForWrite(vform);5745SimVRegister temp;5746if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {5747LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);5748fmla<SimFloat16>(vform, dst, dst, src1, index_reg);5749} else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {5750LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);5751fmla<float>(vform, dst, dst, src1, index_reg);5752} else {5753VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);5754LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);5755fmla<double>(vform, dst, dst, src1, index_reg);5756}5757return dst;5758}575957605761LogicVRegister Simulator::fmls(VectorFormat vform,5762LogicVRegister dst,5763const LogicVRegister& src1,5764const LogicVRegister& src2,5765int index) {5766dst.ClearForWrite(vform);5767SimVRegister temp;5768if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {5769LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);5770fmls<SimFloat16>(vform, dst, dst, src1, index_reg);5771} else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {5772LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);5773fmls<float>(vform, dst, dst, src1, index_reg);5774} else {5775VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);5776LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);5777fmls<double>(vform, dst, dst, src1, index_reg);5778}5779return dst;5780}578157825783LogicVRegister Simulator::fmulx(VectorFormat vform,5784LogicVRegister dst,5785const LogicVRegister& src1,5786const LogicVRegister& src2,5787int index) {5788dst.ClearForWrite(vform);5789SimVRegister temp;5790if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {5791LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);5792fmulx<SimFloat16>(vform, dst, src1, index_reg);5793} else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {5794LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);5795fmulx<float>(vform, dst, src1, index_reg);5796} else {5797VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);5798LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);5799fmulx<double>(vform, dst, src1, index_reg);5800}5801return dst;5802}580358045805LogicVRegister Simulator::frint(VectorFormat vform,5806LogicVRegister dst,5807const LogicVRegister& src,5808FPRounding rounding_mode,5809bool inexact_exception,5810FrintMode frint_mode) {5811dst.ClearForWrite(vform);5812if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {5813VIXL_ASSERT(frint_mode == kFrintToInteger);5814for (int i = 0; i < LaneCountFromFormat(vform); i++) {5815SimFloat16 input = src.Float<SimFloat16>(i);5816SimFloat16 rounded = FPRoundInt(input, rounding_mode);5817if (inexact_exception && !IsNaN(input) && (input != rounded)) {5818FPProcessException();5819}5820dst.SetFloat<SimFloat16>(i, rounded);5821}5822} else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {5823for (int i = 0; i < LaneCountFromFormat(vform); i++) {5824float input = src.Float<float>(i);5825float rounded = FPRoundInt(input, rounding_mode, frint_mode);58265827if (inexact_exception && !IsNaN(input) && (input != rounded)) {5828FPProcessException();5829}5830dst.SetFloat<float>(i, rounded);5831}5832} else {5833VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);5834for (int i = 0; i < LaneCountFromFormat(vform); i++) {5835double input = src.Float<double>(i);5836double rounded = FPRoundInt(input, rounding_mode, frint_mode);5837if (inexact_exception && !IsNaN(input) && (input != rounded)) {5838FPProcessException();5839}5840dst.SetFloat<double>(i, rounded);5841}5842}5843return dst;5844}58455846LogicVRegister Simulator::fcvt(VectorFormat dst_vform,5847VectorFormat src_vform,5848LogicVRegister dst,5849const LogicPRegister& pg,5850const LogicVRegister& src) {5851unsigned dst_data_size_in_bits = LaneSizeInBitsFromFormat(dst_vform);5852unsigned src_data_size_in_bits = LaneSizeInBitsFromFormat(src_vform);5853VectorFormat vform = SVEFormatFromLaneSizeInBits(5854std::max(dst_data_size_in_bits, src_data_size_in_bits));58555856for (int i = 0; i < LaneCountFromFormat(vform); i++) {5857if (!pg.IsActive(vform, i)) continue;58585859uint64_t src_raw_bits = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,58600,5861src.Uint(vform, i));5862double dst_value =5863RawbitsWithSizeToFP<double>(src_data_size_in_bits, src_raw_bits);58645865uint64_t dst_raw_bits =5866FPToRawbitsWithSize(dst_data_size_in_bits, dst_value);58675868dst.SetUint(vform, i, dst_raw_bits);5869}58705871return dst;5872}58735874LogicVRegister Simulator::fcvts(VectorFormat vform,5875unsigned dst_data_size_in_bits,5876unsigned src_data_size_in_bits,5877LogicVRegister dst,5878const LogicPRegister& pg,5879const LogicVRegister& src,5880FPRounding round,5881int fbits) {5882VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);5883VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);58845885for (int i = 0; i < LaneCountFromFormat(vform); i++) {5886if (!pg.IsActive(vform, i)) continue;58875888uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,58890,5890src.Uint(vform, i));5891double result = RawbitsWithSizeToFP<double>(src_data_size_in_bits, value) *5892std::pow(2.0, fbits);58935894switch (dst_data_size_in_bits) {5895case kHRegSize:5896dst.SetInt(vform, i, FPToInt16(result, round));5897break;5898case kSRegSize:5899dst.SetInt(vform, i, FPToInt32(result, round));5900break;5901case kDRegSize:5902dst.SetInt(vform, i, FPToInt64(result, round));5903break;5904default:5905VIXL_UNIMPLEMENTED();5906break;5907}5908}59095910return dst;5911}59125913LogicVRegister Simulator::fcvts(VectorFormat vform,5914LogicVRegister dst,5915const LogicVRegister& src,5916FPRounding round,5917int fbits) {5918dst.ClearForWrite(vform);5919return fcvts(vform,5920LaneSizeInBitsFromFormat(vform),5921LaneSizeInBitsFromFormat(vform),5922dst,5923GetPTrue(),5924src,5925round,5926fbits);5927}59285929LogicVRegister Simulator::fcvtu(VectorFormat vform,5930unsigned dst_data_size_in_bits,5931unsigned src_data_size_in_bits,5932LogicVRegister dst,5933const LogicPRegister& pg,5934const LogicVRegister& src,5935FPRounding round,5936int fbits) {5937VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);5938VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);59395940for (int i = 0; i < LaneCountFromFormat(vform); i++) {5941if (!pg.IsActive(vform, i)) continue;59425943uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,59440,5945src.Uint(vform, i));5946double result = RawbitsWithSizeToFP<double>(src_data_size_in_bits, value) *5947std::pow(2.0, fbits);59485949switch (dst_data_size_in_bits) {5950case kHRegSize:5951dst.SetUint(vform, i, FPToUInt16(result, round));5952break;5953case kSRegSize:5954dst.SetUint(vform, i, FPToUInt32(result, round));5955break;5956case kDRegSize:5957dst.SetUint(vform, i, FPToUInt64(result, round));5958break;5959default:5960VIXL_UNIMPLEMENTED();5961break;5962}5963}59645965return dst;5966}59675968LogicVRegister Simulator::fcvtu(VectorFormat vform,5969LogicVRegister dst,5970const LogicVRegister& src,5971FPRounding round,5972int fbits) {5973dst.ClearForWrite(vform);5974return fcvtu(vform,5975LaneSizeInBitsFromFormat(vform),5976LaneSizeInBitsFromFormat(vform),5977dst,5978GetPTrue(),5979src,5980round,5981fbits);5982}59835984LogicVRegister Simulator::fcvtl(VectorFormat vform,5985LogicVRegister dst,5986const LogicVRegister& src) {5987dst.ClearForWrite(vform);5988if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {5989for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {5990// TODO: Full support for SimFloat16 in SimRegister(s).5991dst.SetFloat(i,5992FPToFloat(RawbitsToFloat16(src.Float<uint16_t>(i)),5993ReadDN()));5994}5995} else {5996VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);5997for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {5998dst.SetFloat(i, FPToDouble(src.Float<float>(i), ReadDN()));5999}6000}6001return dst;6002}600360046005LogicVRegister Simulator::fcvtl2(VectorFormat vform,6006LogicVRegister dst,6007const LogicVRegister& src) {6008dst.ClearForWrite(vform);6009int lane_count = LaneCountFromFormat(vform);6010if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {6011for (int i = 0; i < lane_count; i++) {6012// TODO: Full support for SimFloat16 in SimRegister(s).6013dst.SetFloat(i,6014FPToFloat(RawbitsToFloat16(6015src.Float<uint16_t>(i + lane_count)),6016ReadDN()));6017}6018} else {6019VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);6020for (int i = 0; i < lane_count; i++) {6021dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count), ReadDN()));6022}6023}6024return dst;6025}602660276028LogicVRegister Simulator::fcvtn(VectorFormat vform,6029LogicVRegister dst,6030const LogicVRegister& src) {6031SimVRegister tmp;6032LogicVRegister srctmp = mov(kFormat2D, tmp, src);6033dst.ClearForWrite(vform);6034if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {6035for (int i = 0; i < LaneCountFromFormat(vform); i++) {6036dst.SetFloat(i,6037Float16ToRawbits(FPToFloat16(srctmp.Float<float>(i),6038FPTieEven,6039ReadDN())));6040}6041} else {6042VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);6043for (int i = 0; i < LaneCountFromFormat(vform); i++) {6044dst.SetFloat(i, FPToFloat(srctmp.Float<double>(i), FPTieEven, ReadDN()));6045}6046}6047return dst;6048}604960506051LogicVRegister Simulator::fcvtn2(VectorFormat vform,6052LogicVRegister dst,6053const LogicVRegister& src) {6054dst.ClearForWrite(vform);6055int lane_count = LaneCountFromFormat(vform) / 2;6056if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {6057for (int i = lane_count - 1; i >= 0; i--) {6058dst.SetFloat(i + lane_count,6059Float16ToRawbits(6060FPToFloat16(src.Float<float>(i), FPTieEven, ReadDN())));6061}6062} else {6063VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);6064for (int i = lane_count - 1; i >= 0; i--) {6065dst.SetFloat(i + lane_count,6066FPToFloat(src.Float<double>(i), FPTieEven, ReadDN()));6067}6068}6069return dst;6070}607160726073LogicVRegister Simulator::fcvtxn(VectorFormat vform,6074LogicVRegister dst,6075const LogicVRegister& src) {6076SimVRegister tmp;6077LogicVRegister srctmp = mov(kFormat2D, tmp, src);6078int input_lane_count = LaneCountFromFormat(vform);6079if (IsSVEFormat(vform)) {6080mov(kFormatVnB, tmp, src);6081input_lane_count /= 2;6082}60836084dst.ClearForWrite(vform);6085VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);60866087for (int i = 0; i < input_lane_count; i++) {6088dst.SetFloat(i, FPToFloat(srctmp.Float<double>(i), FPRoundOdd, ReadDN()));6089}6090return dst;6091}609260936094LogicVRegister Simulator::fcvtxn2(VectorFormat vform,6095LogicVRegister dst,6096const LogicVRegister& src) {6097VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);6098dst.ClearForWrite(vform);6099int lane_count = LaneCountFromFormat(vform) / 2;6100for (int i = lane_count - 1; i >= 0; i--) {6101dst.SetFloat(i + lane_count,6102FPToFloat(src.Float<double>(i), FPRoundOdd, ReadDN()));6103}6104return dst;6105}610661076108// Based on reference C function recip_sqrt_estimate from ARM ARM.6109double Simulator::recip_sqrt_estimate(double a) {6110int quot0, quot1, s;6111double r;6112if (a < 0.5) {6113quot0 = static_cast<int>(a * 512.0);6114r = 1.0 / sqrt((static_cast<double>(quot0) + 0.5) / 512.0);6115} else {6116quot1 = static_cast<int>(a * 256.0);6117r = 1.0 / sqrt((static_cast<double>(quot1) + 0.5) / 256.0);6118}6119s = static_cast<int>(256.0 * r + 0.5);6120return static_cast<double>(s) / 256.0;6121}612261236124static inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) {6125return ExtractUnsignedBitfield64(start_bit, end_bit, val);6126}612761286129template <typename T>6130T Simulator::FPRecipSqrtEstimate(T op) {6131if (IsNaN(op)) {6132return FPProcessNaN(op);6133} else if (op == 0.0) {6134if (copysign(1.0, op) < 0.0) {6135return kFP64NegativeInfinity;6136} else {6137return kFP64PositiveInfinity;6138}6139} else if (copysign(1.0, op) < 0.0) {6140FPProcessException();6141return FPDefaultNaN<T>();6142} else if (IsInf(op)) {6143return 0.0;6144} else {6145uint64_t fraction;6146int exp, result_exp;61476148if (IsFloat16<T>()) {6149exp = Float16Exp(op);6150fraction = Float16Mantissa(op);6151fraction <<= 42;6152} else if (IsFloat32<T>()) {6153exp = FloatExp(op);6154fraction = FloatMantissa(op);6155fraction <<= 29;6156} else {6157VIXL_ASSERT(IsFloat64<T>());6158exp = DoubleExp(op);6159fraction = DoubleMantissa(op);6160}61616162if (exp == 0) {6163while (Bits(fraction, 51, 51) == 0) {6164fraction = Bits(fraction, 50, 0) << 1;6165exp -= 1;6166}6167fraction = Bits(fraction, 50, 0) << 1;6168}61696170double scaled;6171if (Bits(exp, 0, 0) == 0) {6172scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44);6173} else {6174scaled = DoublePack(0, 1021, Bits(fraction, 51, 44) << 44);6175}61766177if (IsFloat16<T>()) {6178result_exp = (44 - exp) / 2;6179} else if (IsFloat32<T>()) {6180result_exp = (380 - exp) / 2;6181} else {6182VIXL_ASSERT(IsFloat64<T>());6183result_exp = (3068 - exp) / 2;6184}61856186uint64_t estimate = DoubleToRawbits(recip_sqrt_estimate(scaled));61876188if (IsFloat16<T>()) {6189uint16_t exp_bits = static_cast<uint16_t>(Bits(result_exp, 4, 0));6190uint16_t est_bits = static_cast<uint16_t>(Bits(estimate, 51, 42));6191return Float16Pack(0, exp_bits, est_bits);6192} else if (IsFloat32<T>()) {6193uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));6194uint32_t est_bits = static_cast<uint32_t>(Bits(estimate, 51, 29));6195return FloatPack(0, exp_bits, est_bits);6196} else {6197VIXL_ASSERT(IsFloat64<T>());6198return DoublePack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0));6199}6200}6201}620262036204LogicVRegister Simulator::frsqrte(VectorFormat vform,6205LogicVRegister dst,6206const LogicVRegister& src) {6207dst.ClearForWrite(vform);6208if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {6209for (int i = 0; i < LaneCountFromFormat(vform); i++) {6210SimFloat16 input = src.Float<SimFloat16>(i);6211dst.SetFloat(vform, i, FPRecipSqrtEstimate<SimFloat16>(input));6212}6213} else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {6214for (int i = 0; i < LaneCountFromFormat(vform); i++) {6215float input = src.Float<float>(i);6216dst.SetFloat(vform, i, FPRecipSqrtEstimate<float>(input));6217}6218} else {6219VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);6220for (int i = 0; i < LaneCountFromFormat(vform); i++) {6221double input = src.Float<double>(i);6222dst.SetFloat(vform, i, FPRecipSqrtEstimate<double>(input));6223}6224}6225return dst;6226}62276228template <typename T>6229T Simulator::FPRecipEstimate(T op, FPRounding rounding) {6230uint32_t sign;62316232if (IsFloat16<T>()) {6233sign = Float16Sign(op);6234} else if (IsFloat32<T>()) {6235sign = FloatSign(op);6236} else {6237VIXL_ASSERT(IsFloat64<T>());6238sign = DoubleSign(op);6239}62406241if (IsNaN(op)) {6242return FPProcessNaN(op);6243} else if (IsInf(op)) {6244return (sign == 1) ? -0.0 : 0.0;6245} else if (op == 0.0) {6246FPProcessException(); // FPExc_DivideByZero exception.6247return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;6248} else if ((IsFloat16<T>() && (std::fabs(op) < std::pow(2.0, -16.0))) ||6249(IsFloat32<T>() && (std::fabs(op) < std::pow(2.0, -128.0))) ||6250(IsFloat64<T>() && (std::fabs(op) < std::pow(2.0, -1024.0)))) {6251bool overflow_to_inf = false;6252switch (rounding) {6253case FPTieEven:6254overflow_to_inf = true;6255break;6256case FPPositiveInfinity:6257overflow_to_inf = (sign == 0);6258break;6259case FPNegativeInfinity:6260overflow_to_inf = (sign == 1);6261break;6262case FPZero:6263overflow_to_inf = false;6264break;6265default:6266break;6267}6268FPProcessException(); // FPExc_Overflow and FPExc_Inexact.6269if (overflow_to_inf) {6270return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;6271} else {6272// Return FPMaxNormal(sign).6273if (IsFloat16<T>()) {6274return Float16Pack(sign, 0x1f, 0x3ff);6275} else if (IsFloat32<T>()) {6276return FloatPack(sign, 0xfe, 0x07fffff);6277} else {6278VIXL_ASSERT(IsFloat64<T>());6279return DoublePack(sign, 0x7fe, 0x0fffffffffffffl);6280}6281}6282} else {6283uint64_t fraction;6284int exp, result_exp;62856286if (IsFloat16<T>()) {6287sign = Float16Sign(op);6288exp = Float16Exp(op);6289fraction = Float16Mantissa(op);6290fraction <<= 42;6291} else if (IsFloat32<T>()) {6292sign = FloatSign(op);6293exp = FloatExp(op);6294fraction = FloatMantissa(op);6295fraction <<= 29;6296} else {6297VIXL_ASSERT(IsFloat64<T>());6298sign = DoubleSign(op);6299exp = DoubleExp(op);6300fraction = DoubleMantissa(op);6301}63026303if (exp == 0) {6304if (Bits(fraction, 51, 51) == 0) {6305exp -= 1;6306fraction = Bits(fraction, 49, 0) << 2;6307} else {6308fraction = Bits(fraction, 50, 0) << 1;6309}6310}63116312double scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44);63136314if (IsFloat16<T>()) {6315result_exp = (29 - exp); // In range 29-30 = -1 to 29+1 = 30.6316} else if (IsFloat32<T>()) {6317result_exp = (253 - exp); // In range 253-254 = -1 to 253+1 = 254.6318} else {6319VIXL_ASSERT(IsFloat64<T>());6320result_exp = (2045 - exp); // In range 2045-2046 = -1 to 2045+1 = 2046.6321}63226323double estimate = recip_estimate(scaled);63246325fraction = DoubleMantissa(estimate);6326if (result_exp == 0) {6327fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1);6328} else if (result_exp == -1) {6329fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2);6330result_exp = 0;6331}6332if (IsFloat16<T>()) {6333uint16_t exp_bits = static_cast<uint16_t>(Bits(result_exp, 4, 0));6334uint16_t frac_bits = static_cast<uint16_t>(Bits(fraction, 51, 42));6335return Float16Pack(sign, exp_bits, frac_bits);6336} else if (IsFloat32<T>()) {6337uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));6338uint32_t frac_bits = static_cast<uint32_t>(Bits(fraction, 51, 29));6339return FloatPack(sign, exp_bits, frac_bits);6340} else {6341VIXL_ASSERT(IsFloat64<T>());6342return DoublePack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0));6343}6344}6345}634663476348LogicVRegister Simulator::frecpe(VectorFormat vform,6349LogicVRegister dst,6350const LogicVRegister& src,6351FPRounding round) {6352dst.ClearForWrite(vform);6353if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {6354for (int i = 0; i < LaneCountFromFormat(vform); i++) {6355SimFloat16 input = src.Float<SimFloat16>(i);6356dst.SetFloat(vform, i, FPRecipEstimate<SimFloat16>(input, round));6357}6358} else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {6359for (int i = 0; i < LaneCountFromFormat(vform); i++) {6360float input = src.Float<float>(i);6361dst.SetFloat(vform, i, FPRecipEstimate<float>(input, round));6362}6363} else {6364VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);6365for (int i = 0; i < LaneCountFromFormat(vform); i++) {6366double input = src.Float<double>(i);6367dst.SetFloat(vform, i, FPRecipEstimate<double>(input, round));6368}6369}6370return dst;6371}637263736374LogicVRegister Simulator::ursqrte(VectorFormat vform,6375LogicVRegister dst,6376const LogicVRegister& src) {6377dst.ClearForWrite(vform);6378uint64_t operand;6379uint32_t result;6380double dp_operand, dp_result;6381for (int i = 0; i < LaneCountFromFormat(vform); i++) {6382operand = src.Uint(vform, i);6383if (operand <= 0x3FFFFFFF) {6384result = 0xFFFFFFFF;6385} else {6386dp_operand = operand * std::pow(2.0, -32);6387dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31);6388result = static_cast<uint32_t>(dp_result);6389}6390dst.SetUint(vform, i, result);6391}6392return dst;6393}639463956396// Based on reference C function recip_estimate from ARM ARM.6397double Simulator::recip_estimate(double a) {6398int q, s;6399double r;6400q = static_cast<int>(a * 512.0);6401r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0);6402s = static_cast<int>(256.0 * r + 0.5);6403return static_cast<double>(s) / 256.0;6404}640564066407LogicVRegister Simulator::urecpe(VectorFormat vform,6408LogicVRegister dst,6409const LogicVRegister& src) {6410dst.ClearForWrite(vform);6411uint64_t operand;6412uint32_t result;6413double dp_operand, dp_result;6414for (int i = 0; i < LaneCountFromFormat(vform); i++) {6415operand = src.Uint(vform, i);6416if (operand <= 0x7FFFFFFF) {6417result = 0xFFFFFFFF;6418} else {6419dp_operand = operand * std::pow(2.0, -32);6420dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31);6421result = static_cast<uint32_t>(dp_result);6422}6423dst.SetUint(vform, i, result);6424}6425return dst;6426}64276428LogicPRegister Simulator::pfalse(LogicPRegister dst) {6429dst.Clear();6430return dst;6431}64326433LogicPRegister Simulator::pfirst(LogicPRegister dst,6434const LogicPRegister& pg,6435const LogicPRegister& src) {6436int first_pg = GetFirstActive(kFormatVnB, pg);6437VIXL_ASSERT(first_pg < LaneCountFromFormat(kFormatVnB));6438mov(dst, src);6439if (first_pg >= 0) dst.SetActive(kFormatVnB, first_pg, true);6440return dst;6441}64426443LogicPRegister Simulator::ptrue(VectorFormat vform,6444LogicPRegister dst,6445int pattern) {6446int count = GetPredicateConstraintLaneCount(vform, pattern);6447for (int i = 0; i < LaneCountFromFormat(vform); i++) {6448dst.SetActive(vform, i, i < count);6449}6450return dst;6451}64526453LogicPRegister Simulator::pnext(VectorFormat vform,6454LogicPRegister dst,6455const LogicPRegister& pg,6456const LogicPRegister& src) {6457int next = GetLastActive(vform, src) + 1;6458while (next < LaneCountFromFormat(vform)) {6459if (pg.IsActive(vform, next)) break;6460next++;6461}64626463for (int i = 0; i < LaneCountFromFormat(vform); i++) {6464dst.SetActive(vform, i, (i == next));6465}6466return dst;6467}64686469template <typename T>6470LogicVRegister Simulator::frecpx(VectorFormat vform,6471LogicVRegister dst,6472const LogicVRegister& src) {6473dst.ClearForWrite(vform);6474for (int i = 0; i < LaneCountFromFormat(vform); i++) {6475T op = src.Float<T>(i);6476T result;6477if (IsNaN(op)) {6478result = FPProcessNaN(op);6479} else {6480int exp;6481uint32_t sign;6482if (IsFloat16<T>()) {6483sign = Float16Sign(op);6484exp = Float16Exp(op);6485exp = (exp == 0) ? (0x1F - 1) : static_cast<int>(Bits(~exp, 4, 0));6486result = Float16Pack(sign, exp, 0);6487} else if (IsFloat32<T>()) {6488sign = FloatSign(op);6489exp = FloatExp(op);6490exp = (exp == 0) ? (0xFF - 1) : static_cast<int>(Bits(~exp, 7, 0));6491result = FloatPack(sign, exp, 0);6492} else {6493VIXL_ASSERT(IsFloat64<T>());6494sign = DoubleSign(op);6495exp = DoubleExp(op);6496exp = (exp == 0) ? (0x7FF - 1) : static_cast<int>(Bits(~exp, 10, 0));6497result = DoublePack(sign, exp, 0);6498}6499}6500dst.SetFloat(i, result);6501}6502return dst;6503}650465056506LogicVRegister Simulator::frecpx(VectorFormat vform,6507LogicVRegister dst,6508const LogicVRegister& src) {6509if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {6510frecpx<SimFloat16>(vform, dst, src);6511} else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {6512frecpx<float>(vform, dst, src);6513} else {6514VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);6515frecpx<double>(vform, dst, src);6516}6517return dst;6518}65196520LogicVRegister Simulator::flogb(VectorFormat vform,6521LogicVRegister dst,6522const LogicVRegister& src) {6523for (int i = 0; i < LaneCountFromFormat(vform); i++) {6524double op = 0.0;6525switch (vform) {6526case kFormatVnH:6527op = FPToDouble(src.Float<SimFloat16>(i), kIgnoreDefaultNaN);6528break;6529case kFormatVnS:6530op = src.Float<float>(i);6531break;6532case kFormatVnD:6533op = src.Float<double>(i);6534break;6535default:6536VIXL_UNREACHABLE();6537}65386539switch (std::fpclassify(op)) {6540case FP_INFINITE:6541dst.SetInt(vform, i, MaxIntFromFormat(vform));6542break;6543case FP_NAN:6544case FP_ZERO:6545dst.SetInt(vform, i, MinIntFromFormat(vform));6546break;6547case FP_SUBNORMAL: {6548// DoubleMantissa returns the mantissa of its input, leaving 12 zero6549// bits where the sign and exponent would be. We subtract 12 to6550// find the number of leading zero bits in the mantissa itself.6551int64_t mant_zero_count = CountLeadingZeros(DoubleMantissa(op)) - 12;6552// Log2 of a subnormal is the lowest exponent a normal number can6553// represent, together with the zeros in the mantissa.6554dst.SetInt(vform, i, -1023 - mant_zero_count);6555break;6556}6557case FP_NORMAL:6558// Log2 of a normal number is the exponent minus the bias.6559dst.SetInt(vform, i, static_cast<int64_t>(DoubleExp(op)) - 1023);6560break;6561}6562}6563return dst;6564}65656566LogicVRegister Simulator::ftsmul(VectorFormat vform,6567LogicVRegister dst,6568const LogicVRegister& src1,6569const LogicVRegister& src2) {6570SimVRegister maybe_neg_src1;65716572// The bottom bit of src2 controls the sign of the result. Use it to6573// conditionally invert the sign of one `fmul` operand.6574shl(vform, maybe_neg_src1, src2, LaneSizeInBitsFromFormat(vform) - 1);6575eor(vform, maybe_neg_src1, maybe_neg_src1, src1);65766577// Multiply src1 by the modified neg_src1, which is potentially its negation.6578// In the case of NaNs, NaN * -NaN will return the first NaN intact, so src1,6579// rather than neg_src1, must be the first source argument.6580fmul(vform, dst, src1, maybe_neg_src1);65816582return dst;6583}65846585LogicVRegister Simulator::ftssel(VectorFormat vform,6586LogicVRegister dst,6587const LogicVRegister& src1,6588const LogicVRegister& src2) {6589unsigned lane_bits = LaneSizeInBitsFromFormat(vform);6590uint64_t sign_bit = UINT64_C(1) << (lane_bits - 1);6591uint64_t one;65926593if (lane_bits == kHRegSize) {6594one = Float16ToRawbits(Float16(1.0));6595} else if (lane_bits == kSRegSize) {6596one = FloatToRawbits(1.0);6597} else {6598VIXL_ASSERT(lane_bits == kDRegSize);6599one = DoubleToRawbits(1.0);6600}66016602for (int i = 0; i < LaneCountFromFormat(vform); i++) {6603// Use integer accessors for this operation, as this is a data manipulation6604// task requiring no calculation.6605uint64_t op = src1.Uint(vform, i);66066607// Only the bottom two bits of the src2 register are significant, indicating6608// the quadrant. Bit 0 controls whether src1 or 1.0 is written to dst. Bit 16609// determines the sign of the value written to dst.6610uint64_t q = src2.Uint(vform, i);6611if ((q & 1) == 1) op = one;6612if ((q & 2) == 2) op ^= sign_bit;66136614dst.SetUint(vform, i, op);6615}66166617return dst;6618}66196620template <typename T>6621LogicVRegister Simulator::FTMaddHelper(VectorFormat vform,6622LogicVRegister dst,6623const LogicVRegister& src1,6624const LogicVRegister& src2,6625uint64_t coeff_pos,6626uint64_t coeff_neg) {6627SimVRegister zero;6628dup_immediate(kFormatVnB, zero, 0);66296630SimVRegister cf;6631SimVRegister cfn;6632dup_immediate(vform, cf, coeff_pos);6633dup_immediate(vform, cfn, coeff_neg);66346635// The specification requires testing the top bit of the raw value, rather6636// than the sign of the floating point number, so use an integer comparison6637// here.6638SimPRegister is_neg;6639SVEIntCompareVectorsHelper(lt,6640vform,6641is_neg,6642GetPTrue(),6643src2,6644zero,6645false,6646LeaveFlags);6647mov_merging(vform, cf, is_neg, cfn);66486649SimVRegister temp;6650fabs_<T>(vform, temp, src2);6651fmla<T>(vform, cf, cf, src1, temp);6652mov(vform, dst, cf);6653return dst;6654}665566566657LogicVRegister Simulator::ftmad(VectorFormat vform,6658LogicVRegister dst,6659const LogicVRegister& src1,6660const LogicVRegister& src2,6661unsigned index) {6662static const uint64_t ftmad_coeff16[] = {0x3c00,66630xb155,66640x2030,66650x0000,66660x0000,66670x0000,66680x0000,66690x0000,66700x3c00,66710xb800,66720x293a,66730x0000,66740x0000,66750x0000,66760x0000,66770x0000};66786679static const uint64_t ftmad_coeff32[] = {0x3f800000,66800xbe2aaaab,66810x3c088886,66820xb95008b9,66830x36369d6d,66840x00000000,66850x00000000,66860x00000000,66870x3f800000,66880xbf000000,66890x3d2aaaa6,66900xbab60705,66910x37cd37cc,66920x00000000,66930x00000000,66940x00000000};66956696static const uint64_t ftmad_coeff64[] = {0x3ff0000000000000,66970xbfc5555555555543,66980x3f8111111110f30c,66990xbf2a01a019b92fc6,67000x3ec71de351f3d22b,67010xbe5ae5e2b60f7b91,67020x3de5d8408868552f,67030x0000000000000000,67040x3ff0000000000000,67050xbfe0000000000000,67060x3fa5555555555536,67070xbf56c16c16c13a0b,67080x3efa01a019b1e8d8,67090xbe927e4f7282f468,67100x3e21ee96d2641b13,67110xbda8f76380fbb401};6712VIXL_ASSERT((index + 8) < ArrayLength(ftmad_coeff64));6713VIXL_ASSERT(ArrayLength(ftmad_coeff16) == ArrayLength(ftmad_coeff64));6714VIXL_ASSERT(ArrayLength(ftmad_coeff32) == ArrayLength(ftmad_coeff64));67156716if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {6717FTMaddHelper<SimFloat16>(vform,6718dst,6719src1,6720src2,6721ftmad_coeff16[index],6722ftmad_coeff16[index + 8]);6723} else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {6724FTMaddHelper<float>(vform,6725dst,6726src1,6727src2,6728ftmad_coeff32[index],6729ftmad_coeff32[index + 8]);6730} else {6731VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);6732FTMaddHelper<double>(vform,6733dst,6734src1,6735src2,6736ftmad_coeff64[index],6737ftmad_coeff64[index + 8]);6738}6739return dst;6740}67416742LogicVRegister Simulator::fexpa(VectorFormat vform,6743LogicVRegister dst,6744const LogicVRegister& src) {6745static const uint64_t fexpa_coeff16[] = {0x0000, 0x0016, 0x002d, 0x0045,67460x005d, 0x0075, 0x008e, 0x00a8,67470x00c2, 0x00dc, 0x00f8, 0x0114,67480x0130, 0x014d, 0x016b, 0x0189,67490x01a8, 0x01c8, 0x01e8, 0x0209,67500x022b, 0x024e, 0x0271, 0x0295,67510x02ba, 0x02e0, 0x0306, 0x032e,67520x0356, 0x037f, 0x03a9, 0x03d4};67536754static const uint64_t fexpa_coeff32[] =6755{0x000000, 0x0164d2, 0x02cd87, 0x043a29, 0x05aac3, 0x071f62, 0x08980f,67560x0a14d5, 0x0b95c2, 0x0d1adf, 0x0ea43a, 0x1031dc, 0x11c3d3, 0x135a2b,67570x14f4f0, 0x16942d, 0x1837f0, 0x19e046, 0x1b8d3a, 0x1d3eda, 0x1ef532,67580x20b051, 0x227043, 0x243516, 0x25fed7, 0x27cd94, 0x29a15b, 0x2b7a3a,67590x2d583f, 0x2f3b79, 0x3123f6, 0x3311c4, 0x3504f3, 0x36fd92, 0x38fbaf,67600x3aff5b, 0x3d08a4, 0x3f179a, 0x412c4d, 0x4346cd, 0x45672a, 0x478d75,67610x49b9be, 0x4bec15, 0x4e248c, 0x506334, 0x52a81e, 0x54f35b, 0x5744fd,67620x599d16, 0x5bfbb8, 0x5e60f5, 0x60ccdf, 0x633f89, 0x65b907, 0x68396a,67630x6ac0c7, 0x6d4f30, 0x6fe4ba, 0x728177, 0x75257d, 0x77d0df, 0x7a83b3,67640x7d3e0c};67656766static const uint64_t fexpa_coeff64[] =6767{0X0000000000000, 0X02c9a3e778061, 0X059b0d3158574, 0X0874518759bc8,67680X0b5586cf9890f, 0X0e3ec32d3d1a2, 0X11301d0125b51, 0X1429aaea92de0,67690X172b83c7d517b, 0X1a35beb6fcb75, 0X1d4873168b9aa, 0X2063b88628cd6,67700X2387a6e756238, 0X26b4565e27cdd, 0X29e9df51fdee1, 0X2d285a6e4030b,67710X306fe0a31b715, 0X33c08b26416ff, 0X371a7373aa9cb, 0X3a7db34e59ff7,67720X3dea64c123422, 0X4160a21f72e2a, 0X44e086061892d, 0X486a2b5c13cd0,67730X4bfdad5362a27, 0X4f9b2769d2ca7, 0X5342b569d4f82, 0X56f4736b527da,67740X5ab07dd485429, 0X5e76f15ad2148, 0X6247eb03a5585, 0X6623882552225,67750X6a09e667f3bcd, 0X6dfb23c651a2f, 0X71f75e8ec5f74, 0X75feb564267c9,67760X7a11473eb0187, 0X7e2f336cf4e62, 0X82589994cce13, 0X868d99b4492ed,67770X8ace5422aa0db, 0X8f1ae99157736, 0X93737b0cdc5e5, 0X97d829fde4e50,67780X9c49182a3f090, 0Xa0c667b5de565, 0Xa5503b23e255d, 0Xa9e6b5579fdbf,67790Xae89f995ad3ad, 0Xb33a2b84f15fb, 0Xb7f76f2fb5e47, 0Xbcc1e904bc1d2,67800Xc199bdd85529c, 0Xc67f12e57d14b, 0Xcb720dcef9069, 0Xd072d4a07897c,67810Xd5818dcfba487, 0Xda9e603db3285, 0Xdfc97337b9b5f, 0Xe502ee78b3ff6,67820Xea4afa2a490da, 0Xefa1bee615a27, 0Xf50765b6e4540, 0Xfa7c1819e90d8};67836784unsigned lane_size = LaneSizeInBitsFromFormat(vform);6785int index_highbit = 5;6786int op_highbit, op_shift;6787const uint64_t* fexpa_coeff;67886789if (lane_size == kHRegSize) {6790index_highbit = 4;6791VIXL_ASSERT(ArrayLength(fexpa_coeff16) == (1U << (index_highbit + 1)));6792fexpa_coeff = fexpa_coeff16;6793op_highbit = 9;6794op_shift = 10;6795} else if (lane_size == kSRegSize) {6796VIXL_ASSERT(ArrayLength(fexpa_coeff32) == (1U << (index_highbit + 1)));6797fexpa_coeff = fexpa_coeff32;6798op_highbit = 13;6799op_shift = 23;6800} else {6801VIXL_ASSERT(lane_size == kDRegSize);6802VIXL_ASSERT(ArrayLength(fexpa_coeff64) == (1U << (index_highbit + 1)));6803fexpa_coeff = fexpa_coeff64;6804op_highbit = 16;6805op_shift = 52;6806}68076808for (int i = 0; i < LaneCountFromFormat(vform); i++) {6809uint64_t op = src.Uint(vform, i);6810uint64_t result = fexpa_coeff[Bits(op, index_highbit, 0)];6811result |= (Bits(op, op_highbit, index_highbit + 1) << op_shift);6812dst.SetUint(vform, i, result);6813}6814return dst;6815}68166817template <typename T>6818LogicVRegister Simulator::fscale(VectorFormat vform,6819LogicVRegister dst,6820const LogicVRegister& src1,6821const LogicVRegister& src2) {6822T two = T(2.0);6823for (int i = 0; i < LaneCountFromFormat(vform); i++) {6824T src1_val = src1.Float<T>(i);6825if (!IsNaN(src1_val)) {6826int64_t scale = src2.Int(vform, i);6827// TODO: this is a low-performance implementation, but it's simple and6828// less likely to be buggy. Consider replacing it with something faster.68296830// Scales outside of these bounds become infinity or zero, so there's no6831// point iterating further.6832scale = std::min<int64_t>(std::max<int64_t>(scale, -2048), 2048);68336834// Compute src1_val * 2 ^ scale. If scale is positive, multiply by two and6835// decrement scale until it's zero.6836while (scale-- > 0) {6837src1_val = FPMul(src1_val, two);6838}68396840// If scale is negative, divide by two and increment scale until it's6841// zero. Initially, scale is (src2 - 1), so we pre-increment.6842while (++scale < 0) {6843src1_val = FPDiv(src1_val, two);6844}6845}6846dst.SetFloat<T>(i, src1_val);6847}6848return dst;6849}68506851LogicVRegister Simulator::fscale(VectorFormat vform,6852LogicVRegister dst,6853const LogicVRegister& src1,6854const LogicVRegister& src2) {6855if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {6856fscale<SimFloat16>(vform, dst, src1, src2);6857} else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {6858fscale<float>(vform, dst, src1, src2);6859} else {6860VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);6861fscale<double>(vform, dst, src1, src2);6862}6863return dst;6864}68656866LogicVRegister Simulator::scvtf(VectorFormat vform,6867unsigned dst_data_size_in_bits,6868unsigned src_data_size_in_bits,6869LogicVRegister dst,6870const LogicPRegister& pg,6871const LogicVRegister& src,6872FPRounding round,6873int fbits) {6874VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);6875VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);6876dst.ClearForWrite(vform);68776878for (int i = 0; i < LaneCountFromFormat(vform); i++) {6879if (!pg.IsActive(vform, i)) continue;68806881int64_t value = ExtractSignedBitfield64(src_data_size_in_bits - 1,68820,6883src.Uint(vform, i));68846885switch (dst_data_size_in_bits) {6886case kHRegSize: {6887SimFloat16 result = FixedToFloat16(value, fbits, round);6888dst.SetUint(vform, i, Float16ToRawbits(result));6889break;6890}6891case kSRegSize: {6892float result = FixedToFloat(value, fbits, round);6893dst.SetUint(vform, i, FloatToRawbits(result));6894break;6895}6896case kDRegSize: {6897double result = FixedToDouble(value, fbits, round);6898dst.SetUint(vform, i, DoubleToRawbits(result));6899break;6900}6901default:6902VIXL_UNIMPLEMENTED();6903break;6904}6905}69066907return dst;6908}69096910LogicVRegister Simulator::scvtf(VectorFormat vform,6911LogicVRegister dst,6912const LogicVRegister& src,6913int fbits,6914FPRounding round) {6915return scvtf(vform,6916LaneSizeInBitsFromFormat(vform),6917LaneSizeInBitsFromFormat(vform),6918dst,6919GetPTrue(),6920src,6921round,6922fbits);6923}69246925LogicVRegister Simulator::ucvtf(VectorFormat vform,6926unsigned dst_data_size_in_bits,6927unsigned src_data_size_in_bits,6928LogicVRegister dst,6929const LogicPRegister& pg,6930const LogicVRegister& src,6931FPRounding round,6932int fbits) {6933VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);6934VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);6935dst.ClearForWrite(vform);69366937for (int i = 0; i < LaneCountFromFormat(vform); i++) {6938if (!pg.IsActive(vform, i)) continue;69396940uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,69410,6942src.Uint(vform, i));69436944switch (dst_data_size_in_bits) {6945case kHRegSize: {6946SimFloat16 result = UFixedToFloat16(value, fbits, round);6947dst.SetUint(vform, i, Float16ToRawbits(result));6948break;6949}6950case kSRegSize: {6951float result = UFixedToFloat(value, fbits, round);6952dst.SetUint(vform, i, FloatToRawbits(result));6953break;6954}6955case kDRegSize: {6956double result = UFixedToDouble(value, fbits, round);6957dst.SetUint(vform, i, DoubleToRawbits(result));6958break;6959}6960default:6961VIXL_UNIMPLEMENTED();6962break;6963}6964}69656966return dst;6967}69686969LogicVRegister Simulator::ucvtf(VectorFormat vform,6970LogicVRegister dst,6971const LogicVRegister& src,6972int fbits,6973FPRounding round) {6974return ucvtf(vform,6975LaneSizeInBitsFromFormat(vform),6976LaneSizeInBitsFromFormat(vform),6977dst,6978GetPTrue(),6979src,6980round,6981fbits);6982}69836984LogicVRegister Simulator::unpk(VectorFormat vform,6985LogicVRegister dst,6986const LogicVRegister& src,6987UnpackType unpack_type,6988ExtendType extend_type) {6989VectorFormat vform_half = VectorFormatHalfWidth(vform);6990const int lane_count = LaneCountFromFormat(vform);6991const int src_start_lane = (unpack_type == kLoHalf) ? 0 : lane_count;69926993switch (extend_type) {6994case kSignedExtend: {6995int64_t result[kZRegMaxSizeInBytes];6996for (int i = 0; i < lane_count; ++i) {6997result[i] = src.Int(vform_half, i + src_start_lane);6998}6999for (int i = 0; i < lane_count; ++i) {7000dst.SetInt(vform, i, result[i]);7001}7002break;7003}7004case kUnsignedExtend: {7005uint64_t result[kZRegMaxSizeInBytes];7006for (int i = 0; i < lane_count; ++i) {7007result[i] = src.Uint(vform_half, i + src_start_lane);7008}7009for (int i = 0; i < lane_count; ++i) {7010dst.SetUint(vform, i, result[i]);7011}7012break;7013}7014default:7015VIXL_UNREACHABLE();7016}7017return dst;7018}70197020LogicPRegister Simulator::SVEIntCompareVectorsHelper(Condition cond,7021VectorFormat vform,7022LogicPRegister dst,7023const LogicPRegister& mask,7024const LogicVRegister& src1,7025const LogicVRegister& src2,7026bool is_wide_elements,7027FlagsUpdate flags) {7028for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {7029bool result = false;7030if (mask.IsActive(vform, lane)) {7031int64_t op1 = 0xbadbeef;7032int64_t op2 = 0xbadbeef;7033int d_lane = (lane * LaneSizeInBitsFromFormat(vform)) / kDRegSize;7034switch (cond) {7035case eq:7036case ge:7037case gt:7038case lt:7039case le:7040case ne:7041op1 = src1.Int(vform, lane);7042op2 = is_wide_elements ? src2.Int(kFormatVnD, d_lane)7043: src2.Int(vform, lane);7044break;7045case hi:7046case hs:7047case ls:7048case lo:7049op1 = src1.Uint(vform, lane);7050op2 = is_wide_elements ? src2.Uint(kFormatVnD, d_lane)7051: src2.Uint(vform, lane);7052break;7053default:7054VIXL_UNREACHABLE();7055}70567057switch (cond) {7058case eq:7059result = (op1 == op2);7060break;7061case ne:7062result = (op1 != op2);7063break;7064case ge:7065result = (op1 >= op2);7066break;7067case gt:7068result = (op1 > op2);7069break;7070case le:7071result = (op1 <= op2);7072break;7073case lt:7074result = (op1 < op2);7075break;7076case hs:7077result = (static_cast<uint64_t>(op1) >= static_cast<uint64_t>(op2));7078break;7079case hi:7080result = (static_cast<uint64_t>(op1) > static_cast<uint64_t>(op2));7081break;7082case ls:7083result = (static_cast<uint64_t>(op1) <= static_cast<uint64_t>(op2));7084break;7085case lo:7086result = (static_cast<uint64_t>(op1) < static_cast<uint64_t>(op2));7087break;7088default:7089VIXL_UNREACHABLE();7090}7091}7092dst.SetActive(vform, lane, result);7093}70947095if (flags == SetFlags) PredTest(vform, mask, dst);70967097return dst;7098}70997100LogicVRegister Simulator::SVEBitwiseShiftHelper(Shift shift_op,7101VectorFormat vform,7102LogicVRegister dst,7103const LogicVRegister& src1,7104const LogicVRegister& src2,7105bool is_wide_elements) {7106unsigned lane_size = LaneSizeInBitsFromFormat(vform);7107VectorFormat shift_vform = is_wide_elements ? kFormatVnD : vform;71087109for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {7110int shift_src_lane = lane;7111if (is_wide_elements) {7112// If the shift amount comes from wide elements, select the D-sized lane7113// which occupies the corresponding lanes of the value to be shifted.7114shift_src_lane = (lane * lane_size) / kDRegSize;7115}7116uint64_t shift_amount = src2.Uint(shift_vform, shift_src_lane);71177118// Saturate shift_amount to the size of the lane that will be shifted.7119if (shift_amount > lane_size) shift_amount = lane_size;71207121uint64_t value = src1.Uint(vform, lane);7122int64_t result = ShiftOperand(lane_size,7123value,7124shift_op,7125static_cast<unsigned>(shift_amount));7126dst.SetUint(vform, lane, result);7127}71287129return dst;7130}71317132LogicVRegister Simulator::asrd(VectorFormat vform,7133LogicVRegister dst,7134const LogicVRegister& src1,7135int shift) {7136VIXL_ASSERT((shift > 0) && (static_cast<unsigned>(shift) <=7137LaneSizeInBitsFromFormat(vform)));71387139for (int i = 0; i < LaneCountFromFormat(vform); i++) {7140int64_t value = src1.Int(vform, i);7141if (shift <= 63) {7142if (value < 0) {7143// The max possible mask is 0x7fff'ffff'ffff'ffff, which can be safely7144// cast to int64_t, and cannot cause signed overflow in the result.7145value = value + GetUintMask(shift);7146}7147value = ShiftOperand(kDRegSize, value, ASR, shift);7148} else {7149value = 0;7150}7151dst.SetInt(vform, i, value);7152}7153return dst;7154}71557156LogicVRegister Simulator::SVEBitwiseLogicalUnpredicatedHelper(7157LogicalOp logical_op,7158VectorFormat vform,7159LogicVRegister zd,7160const LogicVRegister& zn,7161const LogicVRegister& zm) {7162VIXL_ASSERT(IsSVEFormat(vform));7163for (int i = 0; i < LaneCountFromFormat(vform); i++) {7164uint64_t op1 = zn.Uint(vform, i);7165uint64_t op2 = zm.Uint(vform, i);7166uint64_t result = 0;7167switch (logical_op) {7168case AND:7169result = op1 & op2;7170break;7171case BIC:7172result = op1 & ~op2;7173break;7174case EOR:7175result = op1 ^ op2;7176break;7177case ORR:7178result = op1 | op2;7179break;7180default:7181VIXL_UNIMPLEMENTED();7182}7183zd.SetUint(vform, i, result);7184}71857186return zd;7187}71887189LogicPRegister Simulator::SVEPredicateLogicalHelper(SVEPredicateLogicalOp op,7190LogicPRegister pd,7191const LogicPRegister& pn,7192const LogicPRegister& pm) {7193for (int i = 0; i < pn.GetChunkCount(); i++) {7194LogicPRegister::ChunkType op1 = pn.GetChunk(i);7195LogicPRegister::ChunkType op2 = pm.GetChunk(i);7196LogicPRegister::ChunkType result = 0;7197switch (op) {7198case ANDS_p_p_pp_z:7199case AND_p_p_pp_z:7200result = op1 & op2;7201break;7202case BICS_p_p_pp_z:7203case BIC_p_p_pp_z:7204result = op1 & ~op2;7205break;7206case EORS_p_p_pp_z:7207case EOR_p_p_pp_z:7208result = op1 ^ op2;7209break;7210case NANDS_p_p_pp_z:7211case NAND_p_p_pp_z:7212result = ~(op1 & op2);7213break;7214case NORS_p_p_pp_z:7215case NOR_p_p_pp_z:7216result = ~(op1 | op2);7217break;7218case ORNS_p_p_pp_z:7219case ORN_p_p_pp_z:7220result = op1 | ~op2;7221break;7222case ORRS_p_p_pp_z:7223case ORR_p_p_pp_z:7224result = op1 | op2;7225break;7226default:7227VIXL_UNIMPLEMENTED();7228}7229pd.SetChunk(i, result);7230}7231return pd;7232}72337234LogicVRegister Simulator::SVEBitwiseImmHelper(7235SVEBitwiseLogicalWithImm_UnpredicatedOp op,7236VectorFormat vform,7237LogicVRegister zd,7238uint64_t imm) {7239for (int i = 0; i < LaneCountFromFormat(vform); i++) {7240uint64_t op1 = zd.Uint(vform, i);7241uint64_t result = 0;7242switch (op) {7243case AND_z_zi:7244result = op1 & imm;7245break;7246case EOR_z_zi:7247result = op1 ^ imm;7248break;7249case ORR_z_zi:7250result = op1 | imm;7251break;7252default:7253VIXL_UNIMPLEMENTED();7254}7255zd.SetUint(vform, i, result);7256}72577258return zd;7259}72607261void Simulator::SVEStructuredStoreHelper(VectorFormat vform,7262const LogicPRegister& pg,7263unsigned zt_code,7264const LogicSVEAddressVector& addr) {7265VIXL_ASSERT(zt_code < kNumberOfZRegisters);72667267int esize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);7268int msize_in_bytes_log2 = addr.GetMsizeInBytesLog2();7269int msize_in_bytes = addr.GetMsizeInBytes();7270int reg_count = addr.GetRegCount();72717272VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2);7273VIXL_ASSERT((reg_count >= 1) && (reg_count <= 4));72747275unsigned zt_codes[4] = {zt_code,7276(zt_code + 1) % kNumberOfZRegisters,7277(zt_code + 2) % kNumberOfZRegisters,7278(zt_code + 3) % kNumberOfZRegisters};72797280LogicVRegister zt[4] = {7281ReadVRegister(zt_codes[0]),7282ReadVRegister(zt_codes[1]),7283ReadVRegister(zt_codes[2]),7284ReadVRegister(zt_codes[3]),7285};72867287// For unpacked forms (e.g. `st1b { z0.h }, ...`, the upper parts of the lanes7288// are ignored, so read the source register using the VectorFormat that7289// corresponds with the storage format, and multiply the index accordingly.7290VectorFormat unpack_vform =7291SVEFormatFromLaneSizeInBytesLog2(msize_in_bytes_log2);7292int unpack_shift = esize_in_bytes_log2 - msize_in_bytes_log2;72937294for (int i = 0; i < LaneCountFromFormat(vform); i++) {7295if (!pg.IsActive(vform, i)) continue;72967297for (int r = 0; r < reg_count; r++) {7298uint64_t element_address = addr.GetElementAddress(i, r);7299if (!StoreLane(zt[r], unpack_vform, i << unpack_shift, element_address)) {7300return;7301}7302}7303}73047305if (ShouldTraceWrites()) {7306PrintRegisterFormat format = GetPrintRegisterFormat(vform);7307if (esize_in_bytes_log2 == msize_in_bytes_log2) {7308// Use an FP format where it's likely that we're accessing FP data.7309format = GetPrintRegisterFormatTryFP(format);7310}7311// Stores don't represent a change to the source register's value, so only7312// print the relevant part of the value.7313format = GetPrintRegPartial(format);73147315PrintZStructAccess(zt_code,7316reg_count,7317pg,7318format,7319msize_in_bytes,7320"->",7321addr);7322}7323}73247325bool Simulator::SVEStructuredLoadHelper(VectorFormat vform,7326const LogicPRegister& pg,7327unsigned zt_code,7328const LogicSVEAddressVector& addr,7329bool is_signed) {7330int esize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);7331int msize_in_bytes_log2 = addr.GetMsizeInBytesLog2();7332int msize_in_bytes = addr.GetMsizeInBytes();7333int reg_count = addr.GetRegCount();73347335VIXL_ASSERT(zt_code < kNumberOfZRegisters);7336VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2);7337VIXL_ASSERT((reg_count >= 1) && (reg_count <= 4));73387339unsigned zt_codes[4] = {zt_code,7340(zt_code + 1) % kNumberOfZRegisters,7341(zt_code + 2) % kNumberOfZRegisters,7342(zt_code + 3) % kNumberOfZRegisters};7343LogicVRegister zt[4] = {7344ReadVRegister(zt_codes[0]),7345ReadVRegister(zt_codes[1]),7346ReadVRegister(zt_codes[2]),7347ReadVRegister(zt_codes[3]),7348};73497350for (int i = 0; i < LaneCountFromFormat(vform); i++) {7351for (int r = 0; r < reg_count; r++) {7352uint64_t element_address = addr.GetElementAddress(i, r);73537354if (!pg.IsActive(vform, i)) {7355zt[r].SetUint(vform, i, 0);7356continue;7357}73587359if (is_signed) {7360if (!LoadIntToLane(zt[r], vform, msize_in_bytes, i, element_address)) {7361return false;7362}7363} else {7364if (!LoadUintToLane(zt[r], vform, msize_in_bytes, i, element_address)) {7365return false;7366}7367}7368}7369}73707371if (ShouldTraceVRegs()) {7372PrintRegisterFormat format = GetPrintRegisterFormat(vform);7373if ((esize_in_bytes_log2 == msize_in_bytes_log2) && !is_signed) {7374// Use an FP format where it's likely that we're accessing FP data.7375format = GetPrintRegisterFormatTryFP(format);7376}7377PrintZStructAccess(zt_code,7378reg_count,7379pg,7380format,7381msize_in_bytes,7382"<-",7383addr);7384}7385return true;7386}73877388LogicPRegister Simulator::brka(LogicPRegister pd,7389const LogicPRegister& pg,7390const LogicPRegister& pn) {7391bool break_ = false;7392for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {7393if (pg.IsActive(kFormatVnB, i)) {7394pd.SetActive(kFormatVnB, i, !break_);7395break_ |= pn.IsActive(kFormatVnB, i);7396}7397}73987399return pd;7400}74017402LogicPRegister Simulator::brkb(LogicPRegister pd,7403const LogicPRegister& pg,7404const LogicPRegister& pn) {7405bool break_ = false;7406for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {7407if (pg.IsActive(kFormatVnB, i)) {7408break_ |= pn.IsActive(kFormatVnB, i);7409pd.SetActive(kFormatVnB, i, !break_);7410}7411}74127413return pd;7414}74157416LogicPRegister Simulator::brkn(LogicPRegister pdm,7417const LogicPRegister& pg,7418const LogicPRegister& pn) {7419if (!IsLastActive(kFormatVnB, pg, pn)) {7420pfalse(pdm);7421}7422return pdm;7423}74247425LogicPRegister Simulator::brkpa(LogicPRegister pd,7426const LogicPRegister& pg,7427const LogicPRegister& pn,7428const LogicPRegister& pm) {7429bool last_active = IsLastActive(kFormatVnB, pg, pn);74307431for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {7432bool active = false;7433if (pg.IsActive(kFormatVnB, i)) {7434active = last_active;7435last_active = last_active && !pm.IsActive(kFormatVnB, i);7436}7437pd.SetActive(kFormatVnB, i, active);7438}74397440return pd;7441}74427443LogicPRegister Simulator::brkpb(LogicPRegister pd,7444const LogicPRegister& pg,7445const LogicPRegister& pn,7446const LogicPRegister& pm) {7447bool last_active = IsLastActive(kFormatVnB, pg, pn);74487449for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {7450bool active = false;7451if (pg.IsActive(kFormatVnB, i)) {7452last_active = last_active && !pm.IsActive(kFormatVnB, i);7453active = last_active;7454}7455pd.SetActive(kFormatVnB, i, active);7456}74577458return pd;7459}74607461void Simulator::SVEFaultTolerantLoadHelper(VectorFormat vform,7462const LogicPRegister& pg,7463unsigned zt_code,7464const LogicSVEAddressVector& addr,7465SVEFaultTolerantLoadType type,7466bool is_signed) {7467int esize_in_bytes = LaneSizeInBytesFromFormat(vform);7468int msize_in_bits = addr.GetMsizeInBits();7469int msize_in_bytes = addr.GetMsizeInBytes();74707471VIXL_ASSERT(zt_code < kNumberOfZRegisters);7472VIXL_ASSERT(esize_in_bytes >= msize_in_bytes);7473VIXL_ASSERT(addr.GetRegCount() == 1);74747475LogicVRegister zt = ReadVRegister(zt_code);7476LogicPRegister ffr = ReadFFR();74777478// Non-faulting loads are allowed to fail arbitrarily. To stress user7479// code, fail a random element in roughly one in eight full-vector loads.7480uint32_t rnd = static_cast<uint32_t>(jrand48(rand_state_));7481int fake_fault_at_lane = rnd % (LaneCountFromFormat(vform) * 8);74827483for (int i = 0; i < LaneCountFromFormat(vform); i++) {7484uint64_t value = 0;74857486if (pg.IsActive(vform, i)) {7487uint64_t element_address = addr.GetElementAddress(i, 0);74887489if (type == kSVEFirstFaultLoad) {7490// First-faulting loads always load the first active element, regardless7491// of FFR. The result will be discarded if its FFR lane is inactive, but7492// it could still generate a fault.7493VIXL_DEFINE_OR_RETURN(mem_result,7494MemReadUint(msize_in_bytes, element_address));7495value = mem_result;7496// All subsequent elements have non-fault semantics.7497type = kSVENonFaultLoad;74987499} else if (ffr.IsActive(vform, i)) {7500// Simulation of fault-tolerant loads relies on system calls, and is7501// likely to be relatively slow, so we only actually perform the load if7502// its FFR lane is active.75037504bool can_read = (i < fake_fault_at_lane) &&7505CanReadMemory(element_address, msize_in_bytes);7506if (can_read) {7507VIXL_DEFINE_OR_RETURN(mem_result,7508MemReadUint(msize_in_bytes, element_address));7509value = mem_result;7510} else {7511// Propagate the fault to the end of FFR.7512for (int j = i; j < LaneCountFromFormat(vform); j++) {7513ffr.SetActive(vform, j, false);7514}7515}7516}7517}75187519// The architecture permits a few possible results for inactive FFR lanes7520// (including those caused by a fault in this instruction). We choose to7521// leave the register value unchanged (like merging predication) because7522// no other input to this instruction can have the same behaviour.7523//7524// Note that this behaviour takes precedence over pg's zeroing predication.75257526if (ffr.IsActive(vform, i)) {7527int msb = msize_in_bits - 1;7528if (is_signed) {7529zt.SetInt(vform, i, ExtractSignedBitfield64(msb, 0, value));7530} else {7531zt.SetUint(vform, i, ExtractUnsignedBitfield64(msb, 0, value));7532}7533}7534}75357536if (ShouldTraceVRegs()) {7537PrintRegisterFormat format = GetPrintRegisterFormat(vform);7538if ((esize_in_bytes == msize_in_bytes) && !is_signed) {7539// Use an FP format where it's likely that we're accessing FP data.7540format = GetPrintRegisterFormatTryFP(format);7541}7542// Log accessed lanes that are active in both pg and ffr. PrintZStructAccess7543// expects a single mask, so combine the two predicates.7544SimPRegister mask;7545SVEPredicateLogicalHelper(AND_p_p_pp_z, mask, pg, ffr);7546PrintZStructAccess(zt_code, 1, mask, format, msize_in_bytes, "<-", addr);7547}7548}75497550void Simulator::SVEGatherLoadScalarPlusVectorHelper(const Instruction* instr,7551VectorFormat vform,7552SVEOffsetModifier mod) {7553bool is_signed = instr->ExtractBit(14) == 0;7554bool is_ff = instr->ExtractBit(13) == 1;7555// Note that these instructions don't use the Dtype encoding.7556int msize_in_bytes_log2 = instr->ExtractBits(24, 23);7557int scale = instr->ExtractBit(21) * msize_in_bytes_log2;7558uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer);7559LogicSVEAddressVector addr(base,7560&ReadVRegister(instr->GetRm()),7561vform,7562mod,7563scale);7564addr.SetMsizeInBytesLog2(msize_in_bytes_log2);7565if (is_ff) {7566SVEFaultTolerantLoadHelper(vform,7567ReadPRegister(instr->GetPgLow8()),7568instr->GetRt(),7569addr,7570kSVEFirstFaultLoad,7571is_signed);7572} else {7573SVEStructuredLoadHelper(vform,7574ReadPRegister(instr->GetPgLow8()),7575instr->GetRt(),7576addr,7577is_signed);7578}7579}75807581int Simulator::GetFirstActive(VectorFormat vform,7582const LogicPRegister& pg) const {7583for (int i = 0; i < LaneCountFromFormat(vform); i++) {7584if (pg.IsActive(vform, i)) return i;7585}7586return -1;7587}75887589int Simulator::GetLastActive(VectorFormat vform,7590const LogicPRegister& pg) const {7591for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {7592if (pg.IsActive(vform, i)) return i;7593}7594return -1;7595}75967597int Simulator::CountActiveLanes(VectorFormat vform,7598const LogicPRegister& pg) const {7599int count = 0;7600for (int i = 0; i < LaneCountFromFormat(vform); i++) {7601count += pg.IsActive(vform, i) ? 1 : 0;7602}7603return count;7604}76057606int Simulator::CountActiveAndTrueLanes(VectorFormat vform,7607const LogicPRegister& pg,7608const LogicPRegister& pn) const {7609int count = 0;7610for (int i = 0; i < LaneCountFromFormat(vform); i++) {7611count += (pg.IsActive(vform, i) && pn.IsActive(vform, i)) ? 1 : 0;7612}7613return count;7614}76157616int Simulator::GetPredicateConstraintLaneCount(VectorFormat vform,7617int pattern) const {7618VIXL_ASSERT(IsSVEFormat(vform));7619int all = LaneCountFromFormat(vform);7620VIXL_ASSERT(all > 0);76217622switch (pattern) {7623case SVE_VL1:7624case SVE_VL2:7625case SVE_VL3:7626case SVE_VL4:7627case SVE_VL5:7628case SVE_VL6:7629case SVE_VL7:7630case SVE_VL8:7631// VL1-VL8 are encoded directly.7632VIXL_STATIC_ASSERT(SVE_VL1 == 1);7633VIXL_STATIC_ASSERT(SVE_VL8 == 8);7634return (pattern <= all) ? pattern : 0;7635case SVE_VL16:7636case SVE_VL32:7637case SVE_VL64:7638case SVE_VL128:7639case SVE_VL256: {7640// VL16-VL256 are encoded as log2(N) + c.7641int min = 16 << (pattern - SVE_VL16);7642return (min <= all) ? min : 0;7643}7644// Special cases.7645case SVE_POW2:7646return 1 << HighestSetBitPosition(all);7647case SVE_MUL4:7648return all - (all % 4);7649case SVE_MUL3:7650return all - (all % 3);7651case SVE_ALL:7652return all;7653}7654// Unnamed cases architecturally return 0.7655return 0;7656}76577658LogicPRegister Simulator::match(VectorFormat vform,7659LogicPRegister dst,7660const LogicVRegister& haystack,7661const LogicVRegister& needles,7662bool negate_match) {7663SimVRegister ztemp;7664SimPRegister ptemp;76657666pfalse(dst);7667int lanes_per_segment = kQRegSize / LaneSizeInBitsFromFormat(vform);7668for (int i = 0; i < lanes_per_segment; i++) {7669dup_elements_to_segments(vform, ztemp, needles, i);7670SVEIntCompareVectorsHelper(eq,7671vform,7672ptemp,7673GetPTrue(),7674haystack,7675ztemp,7676false,7677LeaveFlags);7678SVEPredicateLogicalHelper(ORR_p_p_pp_z, dst, dst, ptemp);7679}7680if (negate_match) {7681ptrue(vform, ptemp, SVE_ALL);7682SVEPredicateLogicalHelper(EOR_p_p_pp_z, dst, dst, ptemp);7683}7684return dst;7685}76867687uint64_t LogicSVEAddressVector::GetStructAddress(int lane) const {7688if (IsContiguous()) {7689return base_ + (lane * GetRegCount()) * GetMsizeInBytes();7690}76917692VIXL_ASSERT(IsScatterGather());7693VIXL_ASSERT(vector_ != NULL);76947695// For scatter-gather accesses, we need to extract the offset from vector_,7696// and apply modifiers.76977698uint64_t offset = 0;7699switch (vector_form_) {7700case kFormatVnS:7701offset = vector_->GetLane<uint32_t>(lane);7702break;7703case kFormatVnD:7704offset = vector_->GetLane<uint64_t>(lane);7705break;7706default:7707VIXL_UNIMPLEMENTED();7708break;7709}77107711switch (vector_mod_) {7712case SVE_MUL_VL:7713VIXL_UNIMPLEMENTED();7714break;7715case SVE_LSL:7716// We apply the shift below. There's nothing to do here.7717break;7718case NO_SVE_OFFSET_MODIFIER:7719VIXL_ASSERT(vector_shift_ == 0);7720break;7721case SVE_UXTW:7722offset = ExtractUnsignedBitfield64(kWRegSize - 1, 0, offset);7723break;7724case SVE_SXTW:7725offset = ExtractSignedBitfield64(kWRegSize - 1, 0, offset);7726break;7727}77287729return base_ + (offset << vector_shift_);7730}77317732LogicVRegister Simulator::pack_odd_elements(VectorFormat vform,7733LogicVRegister dst,7734const LogicVRegister& src) {7735SimVRegister zero;7736zero.Clear();7737return uzp2(vform, dst, src, zero);7738}77397740LogicVRegister Simulator::pack_even_elements(VectorFormat vform,7741LogicVRegister dst,7742const LogicVRegister& src) {7743SimVRegister zero;7744zero.Clear();7745return uzp1(vform, dst, src, zero);7746}77477748LogicVRegister Simulator::adcl(VectorFormat vform,7749LogicVRegister dst,7750const LogicVRegister& src1,7751const LogicVRegister& src2,7752bool top) {7753unsigned reg_size = LaneSizeInBitsFromFormat(vform);7754VIXL_ASSERT((reg_size == kSRegSize) || (reg_size == kDRegSize));77557756for (int i = 0; i < LaneCountFromFormat(vform); i += 2) {7757uint64_t left = src1.Uint(vform, i + (top ? 1 : 0));7758uint64_t right = dst.Uint(vform, i);7759unsigned carry_in = src2.Uint(vform, i + 1) & 1;7760std::pair<uint64_t, uint8_t> val_and_flags =7761AddWithCarry(reg_size, left, right, carry_in);77627763// Set even lanes to the result of the addition.7764dst.SetUint(vform, i, val_and_flags.first);77657766// Set odd lanes to the carry flag from the addition.7767uint64_t carry_out = (val_and_flags.second >> 1) & 1;7768dst.SetUint(vform, i + 1, carry_out);7769}7770return dst;7771}77727773// Multiply the 2x8 8-bit matrix in src1 by the 8x2 8-bit matrix in src2, add7774// the 2x2 32-bit result to the matrix in srcdst, and write back to srcdst.7775//7776// Matrices of the form:7777//7778// src1 = ( a b c d e f g h ) src2 = ( A B )7779// ( i j k l m n o p ) ( C D )7780// ( E F )7781// ( G H )7782// ( I J )7783// ( K L )7784// ( M N )7785// ( O P )7786//7787// Are stored in the input vector registers as:7788//7789// 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 07790// src1 = [ p | o | n | m | l | k | j | i | h | g | f | e | d | c | b | a ]7791// src2 = [ P | N | L | J | H | F | D | B | O | M | K | I | G | E | C | A ]7792//7793LogicVRegister Simulator::matmul(VectorFormat vform_dst,7794LogicVRegister srcdst,7795const LogicVRegister& src1,7796const LogicVRegister& src2,7797bool src1_signed,7798bool src2_signed) {7799// Two destination forms are supported: Q register containing four S-sized7800// elements (4S) and Z register containing n S-sized elements (VnS).7801VIXL_ASSERT((vform_dst == kFormat4S) || (vform_dst == kFormatVnS));7802VectorFormat vform_src = kFormatVnB;7803int b_per_segment = kQRegSize / kBRegSize;7804int s_per_segment = kQRegSize / kSRegSize;7805int64_t result[kZRegMaxSizeInBytes / kSRegSizeInBytes] = {};7806int segment_count = LaneCountFromFormat(vform_dst) / 4;7807for (int seg = 0; seg < segment_count; seg++) {7808for (int i = 0; i < 2; i++) {7809for (int j = 0; j < 2; j++) {7810int dstidx = (2 * i) + j + (seg * s_per_segment);7811int64_t sum = srcdst.Int(vform_dst, dstidx);7812for (int k = 0; k < 8; k++) {7813int idx1 = (8 * i) + k + (seg * b_per_segment);7814int idx2 = (8 * j) + k + (seg * b_per_segment);7815int64_t e1 = src1_signed ? src1.Int(vform_src, idx1)7816: src1.Uint(vform_src, idx1);7817int64_t e2 = src2_signed ? src2.Int(vform_src, idx2)7818: src2.Uint(vform_src, idx2);7819sum += e1 * e2;7820}7821result[dstidx] = sum;7822}7823}7824}7825srcdst.SetIntArray(vform_dst, result);7826return srcdst;7827}78287829// Multiply the 2x2 FP matrix in src1 by the 2x2 FP matrix in src2, add the 2x27830// result to the matrix in srcdst, and write back to srcdst.7831//7832// Matrices of the form:7833//7834// src1 = ( a b ) src2 = ( A B )7835// ( c d ) ( C D )7836//7837// Are stored in the input vector registers as:7838//7839// 3 2 1 07840// src1 = [ d | c | b | a ]7841// src2 = [ D | B | C | A ]7842//7843template <typename T>7844LogicVRegister Simulator::fmatmul(VectorFormat vform,7845LogicVRegister srcdst,7846const LogicVRegister& src1,7847const LogicVRegister& src2) {7848T result[kZRegMaxSizeInBytes / sizeof(T)];7849int T_per_segment = 4;7850int segment_count = GetVectorLengthInBytes() / (T_per_segment * sizeof(T));7851for (int seg = 0; seg < segment_count; seg++) {7852int segoff = seg * T_per_segment;7853for (int i = 0; i < 2; i++) {7854for (int j = 0; j < 2; j++) {7855T prod0 = FPMulNaNs(src1.Float<T>(2 * i + 0 + segoff),7856src2.Float<T>(2 * j + 0 + segoff));7857T prod1 = FPMulNaNs(src1.Float<T>(2 * i + 1 + segoff),7858src2.Float<T>(2 * j + 1 + segoff));7859T sum = FPAdd(srcdst.Float<T>(2 * i + j + segoff), prod0);7860result[2 * i + j + segoff] = FPAdd(sum, prod1);7861}7862}7863}7864for (int i = 0; i < LaneCountFromFormat(vform); i++) {7865// Elements outside a multiple of 4T are set to zero. This happens only7866// for double precision operations, when the VL is a multiple of 128 bits,7867// but not a multiple of 256 bits.7868T value = (i < (T_per_segment * segment_count)) ? result[i] : 0;7869srcdst.SetFloat<T>(vform, i, value);7870}7871return srcdst;7872}78737874LogicVRegister Simulator::fmatmul(VectorFormat vform,7875LogicVRegister dst,7876const LogicVRegister& src1,7877const LogicVRegister& src2) {7878if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {7879fmatmul<float>(vform, dst, src1, src2);7880} else {7881VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);7882fmatmul<double>(vform, dst, src1, src2);7883}7884return dst;7885}78867887} // namespace aarch647888} // namespace vixl78897890#endif // VIXL_INCLUDE_SIMULATOR_AARCH64789178927893