Path: blob/main/contrib/llvm-project/clang/lib/CodeGen/CGBuiltin.cpp
104187 views
//===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This contains code to emit Builtin calls as LLVM code.9//10//===----------------------------------------------------------------------===//1112#include "ABIInfo.h"13#include "CGCUDARuntime.h"14#include "CGCXXABI.h"15#include "CGHLSLRuntime.h"16#include "CGObjCRuntime.h"17#include "CGOpenCLRuntime.h"18#include "CGRecordLayout.h"19#include "CodeGenFunction.h"20#include "CodeGenModule.h"21#include "ConstantEmitter.h"22#include "PatternInit.h"23#include "TargetInfo.h"24#include "clang/AST/ASTContext.h"25#include "clang/AST/Attr.h"26#include "clang/AST/Decl.h"27#include "clang/AST/OSLog.h"28#include "clang/AST/OperationKinds.h"29#include "clang/Basic/TargetBuiltins.h"30#include "clang/Basic/TargetInfo.h"31#include "clang/Basic/TargetOptions.h"32#include "clang/CodeGen/CGFunctionInfo.h"33#include "clang/Frontend/FrontendDiagnostic.h"34#include "llvm/ADT/APFloat.h"35#include "llvm/ADT/APInt.h"36#include "llvm/ADT/FloatingPointMode.h"37#include "llvm/ADT/SmallPtrSet.h"38#include "llvm/ADT/StringExtras.h"39#include "llvm/Analysis/ValueTracking.h"40#include "llvm/IR/DataLayout.h"41#include "llvm/IR/InlineAsm.h"42#include "llvm/IR/Intrinsics.h"43#include "llvm/IR/IntrinsicsAArch64.h"44#include "llvm/IR/IntrinsicsAMDGPU.h"45#include "llvm/IR/IntrinsicsARM.h"46#include "llvm/IR/IntrinsicsBPF.h"47#include "llvm/IR/IntrinsicsDirectX.h"48#include "llvm/IR/IntrinsicsHexagon.h"49#include "llvm/IR/IntrinsicsNVPTX.h"50#include "llvm/IR/IntrinsicsPowerPC.h"51#include "llvm/IR/IntrinsicsR600.h"52#include "llvm/IR/IntrinsicsRISCV.h"53#include "llvm/IR/IntrinsicsS390.h"54#include "llvm/IR/IntrinsicsVE.h"55#include "llvm/IR/IntrinsicsWebAssembly.h"56#include "llvm/IR/IntrinsicsX86.h"57#include "llvm/IR/MDBuilder.h"58#include "llvm/IR/MatrixBuilder.h"59#include "llvm/IR/MemoryModelRelaxationAnnotations.h"60#include "llvm/Support/ConvertUTF.h"61#include "llvm/Support/MathExtras.h"62#include "llvm/Support/ScopedPrinter.h"63#include "llvm/TargetParser/AArch64TargetParser.h"64#include "llvm/TargetParser/X86TargetParser.h"65#include <optional>66#include <sstream>6768using namespace clang;69using namespace CodeGen;70using namespace llvm;7172static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size,73Align AlignmentInBytes) {74ConstantInt *Byte;75switch (CGF.getLangOpts().getTrivialAutoVarInit()) {76case LangOptions::TrivialAutoVarInitKind::Uninitialized:77// Nothing to initialize.78return;79case LangOptions::TrivialAutoVarInitKind::Zero:80Byte = CGF.Builder.getInt8(0x00);81break;82case LangOptions::TrivialAutoVarInitKind::Pattern: {83llvm::Type *Int8 = llvm::IntegerType::getInt8Ty(CGF.CGM.getLLVMContext());84Byte = llvm::dyn_cast<llvm::ConstantInt>(85initializationPatternFor(CGF.CGM, Int8));86break;87}88}89if (CGF.CGM.stopAutoInit())90return;91auto *I = CGF.Builder.CreateMemSet(AI, Byte, Size, AlignmentInBytes);92I->addAnnotationMetadata("auto-init");93}9495/// getBuiltinLibFunction - Given a builtin id for a function like96/// "__builtin_fabsf", return a Function* for "fabsf".97llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD,98unsigned BuiltinID) {99assert(Context.BuiltinInfo.isLibFunction(BuiltinID));100101// Get the name, skip over the __builtin_ prefix (if necessary).102StringRef Name;103GlobalDecl D(FD);104105// TODO: This list should be expanded or refactored after all GCC-compatible106// std libcall builtins are implemented.107static SmallDenseMap<unsigned, StringRef, 64> F128Builtins{108{Builtin::BI__builtin___fprintf_chk, "__fprintf_chkieee128"},109{Builtin::BI__builtin___printf_chk, "__printf_chkieee128"},110{Builtin::BI__builtin___snprintf_chk, "__snprintf_chkieee128"},111{Builtin::BI__builtin___sprintf_chk, "__sprintf_chkieee128"},112{Builtin::BI__builtin___vfprintf_chk, "__vfprintf_chkieee128"},113{Builtin::BI__builtin___vprintf_chk, "__vprintf_chkieee128"},114{Builtin::BI__builtin___vsnprintf_chk, "__vsnprintf_chkieee128"},115{Builtin::BI__builtin___vsprintf_chk, "__vsprintf_chkieee128"},116{Builtin::BI__builtin_fprintf, "__fprintfieee128"},117{Builtin::BI__builtin_printf, "__printfieee128"},118{Builtin::BI__builtin_snprintf, "__snprintfieee128"},119{Builtin::BI__builtin_sprintf, "__sprintfieee128"},120{Builtin::BI__builtin_vfprintf, "__vfprintfieee128"},121{Builtin::BI__builtin_vprintf, "__vprintfieee128"},122{Builtin::BI__builtin_vsnprintf, "__vsnprintfieee128"},123{Builtin::BI__builtin_vsprintf, "__vsprintfieee128"},124{Builtin::BI__builtin_fscanf, "__fscanfieee128"},125{Builtin::BI__builtin_scanf, "__scanfieee128"},126{Builtin::BI__builtin_sscanf, "__sscanfieee128"},127{Builtin::BI__builtin_vfscanf, "__vfscanfieee128"},128{Builtin::BI__builtin_vscanf, "__vscanfieee128"},129{Builtin::BI__builtin_vsscanf, "__vsscanfieee128"},130{Builtin::BI__builtin_nexttowardf128, "__nexttowardieee128"},131};132133// The AIX library functions frexpl, ldexpl, and modfl are for 128-bit134// IBM 'long double' (i.e. __ibm128). Map to the 'double' versions135// if it is 64-bit 'long double' mode.136static SmallDenseMap<unsigned, StringRef, 4> AIXLongDouble64Builtins{137{Builtin::BI__builtin_frexpl, "frexp"},138{Builtin::BI__builtin_ldexpl, "ldexp"},139{Builtin::BI__builtin_modfl, "modf"},140};141142// If the builtin has been declared explicitly with an assembler label,143// use the mangled name. This differs from the plain label on platforms144// that prefix labels.145if (FD->hasAttr<AsmLabelAttr>())146Name = getMangledName(D);147else {148// TODO: This mutation should also be applied to other targets other than149// PPC, after backend supports IEEE 128-bit style libcalls.150if (getTriple().isPPC64() &&151&getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad() &&152F128Builtins.contains(BuiltinID))153Name = F128Builtins[BuiltinID];154else if (getTriple().isOSAIX() &&155&getTarget().getLongDoubleFormat() ==156&llvm::APFloat::IEEEdouble() &&157AIXLongDouble64Builtins.contains(BuiltinID))158Name = AIXLongDouble64Builtins[BuiltinID];159else160Name = Context.BuiltinInfo.getName(BuiltinID).substr(10);161}162163llvm::FunctionType *Ty =164cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));165166return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);167}168169/// Emit the conversions required to turn the given value into an170/// integer of the given size.171static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,172QualType T, llvm::IntegerType *IntType) {173V = CGF.EmitToMemory(V, T);174175if (V->getType()->isPointerTy())176return CGF.Builder.CreatePtrToInt(V, IntType);177178assert(V->getType() == IntType);179return V;180}181182static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,183QualType T, llvm::Type *ResultType) {184V = CGF.EmitFromMemory(V, T);185186if (ResultType->isPointerTy())187return CGF.Builder.CreateIntToPtr(V, ResultType);188189assert(V->getType() == ResultType);190return V;191}192193static Address CheckAtomicAlignment(CodeGenFunction &CGF, const CallExpr *E) {194ASTContext &Ctx = CGF.getContext();195Address Ptr = CGF.EmitPointerWithAlignment(E->getArg(0));196unsigned Bytes = Ptr.getElementType()->isPointerTy()197? Ctx.getTypeSizeInChars(Ctx.VoidPtrTy).getQuantity()198: Ptr.getElementType()->getScalarSizeInBits() / 8;199unsigned Align = Ptr.getAlignment().getQuantity();200if (Align % Bytes != 0) {201DiagnosticsEngine &Diags = CGF.CGM.getDiags();202Diags.Report(E->getBeginLoc(), diag::warn_sync_op_misaligned);203// Force address to be at least naturally-aligned.204return Ptr.withAlignment(CharUnits::fromQuantity(Bytes));205}206return Ptr;207}208209/// Utility to insert an atomic instruction based on Intrinsic::ID210/// and the expression node.211static Value *MakeBinaryAtomicValue(212CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E,213AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {214215QualType T = E->getType();216assert(E->getArg(0)->getType()->isPointerType());217assert(CGF.getContext().hasSameUnqualifiedType(T,218E->getArg(0)->getType()->getPointeeType()));219assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));220221Address DestAddr = CheckAtomicAlignment(CGF, E);222223llvm::IntegerType *IntType = llvm::IntegerType::get(224CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));225226llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(1));227llvm::Type *ValueType = Val->getType();228Val = EmitToInt(CGF, Val, T, IntType);229230llvm::Value *Result =231CGF.Builder.CreateAtomicRMW(Kind, DestAddr, Val, Ordering);232return EmitFromInt(CGF, Result, T, ValueType);233}234235static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) {236Value *Val = CGF.EmitScalarExpr(E->getArg(0));237Address Addr = CGF.EmitPointerWithAlignment(E->getArg(1));238239Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());240LValue LV = CGF.MakeAddrLValue(Addr, E->getArg(0)->getType());241LV.setNontemporal(true);242CGF.EmitStoreOfScalar(Val, LV, false);243return nullptr;244}245246static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) {247Address Addr = CGF.EmitPointerWithAlignment(E->getArg(0));248249LValue LV = CGF.MakeAddrLValue(Addr, E->getType());250LV.setNontemporal(true);251return CGF.EmitLoadOfScalar(LV, E->getExprLoc());252}253254static RValue EmitBinaryAtomic(CodeGenFunction &CGF,255llvm::AtomicRMWInst::BinOp Kind,256const CallExpr *E) {257return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));258}259260/// Utility to insert an atomic instruction based Intrinsic::ID and261/// the expression node, where the return value is the result of the262/// operation.263static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,264llvm::AtomicRMWInst::BinOp Kind,265const CallExpr *E,266Instruction::BinaryOps Op,267bool Invert = false) {268QualType T = E->getType();269assert(E->getArg(0)->getType()->isPointerType());270assert(CGF.getContext().hasSameUnqualifiedType(T,271E->getArg(0)->getType()->getPointeeType()));272assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));273274Address DestAddr = CheckAtomicAlignment(CGF, E);275276llvm::IntegerType *IntType = llvm::IntegerType::get(277CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));278279llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(1));280llvm::Type *ValueType = Val->getType();281Val = EmitToInt(CGF, Val, T, IntType);282283llvm::Value *Result = CGF.Builder.CreateAtomicRMW(284Kind, DestAddr, Val, llvm::AtomicOrdering::SequentiallyConsistent);285Result = CGF.Builder.CreateBinOp(Op, Result, Val);286if (Invert)287Result =288CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,289llvm::ConstantInt::getAllOnesValue(IntType));290Result = EmitFromInt(CGF, Result, T, ValueType);291return RValue::get(Result);292}293294/// Utility to insert an atomic cmpxchg instruction.295///296/// @param CGF The current codegen function.297/// @param E Builtin call expression to convert to cmpxchg.298/// arg0 - address to operate on299/// arg1 - value to compare with300/// arg2 - new value301/// @param ReturnBool Specifies whether to return success flag of302/// cmpxchg result or the old value.303///304/// @returns result of cmpxchg, according to ReturnBool305///306/// Note: In order to lower Microsoft's _InterlockedCompareExchange* intrinsics307/// invoke the function EmitAtomicCmpXchgForMSIntrin.308static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E,309bool ReturnBool) {310QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();311Address DestAddr = CheckAtomicAlignment(CGF, E);312313llvm::IntegerType *IntType = llvm::IntegerType::get(314CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));315316Value *Cmp = CGF.EmitScalarExpr(E->getArg(1));317llvm::Type *ValueType = Cmp->getType();318Cmp = EmitToInt(CGF, Cmp, T, IntType);319Value *New = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);320321Value *Pair = CGF.Builder.CreateAtomicCmpXchg(322DestAddr, Cmp, New, llvm::AtomicOrdering::SequentiallyConsistent,323llvm::AtomicOrdering::SequentiallyConsistent);324if (ReturnBool)325// Extract boolean success flag and zext it to int.326return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),327CGF.ConvertType(E->getType()));328else329// Extract old value and emit it using the same type as compare value.330return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,331ValueType);332}333334/// This function should be invoked to emit atomic cmpxchg for Microsoft's335/// _InterlockedCompareExchange* intrinsics which have the following signature:336/// T _InterlockedCompareExchange(T volatile *Destination,337/// T Exchange,338/// T Comparand);339///340/// Whereas the llvm 'cmpxchg' instruction has the following syntax:341/// cmpxchg *Destination, Comparand, Exchange.342/// So we need to swap Comparand and Exchange when invoking343/// CreateAtomicCmpXchg. That is the reason we could not use the above utility344/// function MakeAtomicCmpXchgValue since it expects the arguments to be345/// already swapped.346347static348Value *EmitAtomicCmpXchgForMSIntrin(CodeGenFunction &CGF, const CallExpr *E,349AtomicOrdering SuccessOrdering = AtomicOrdering::SequentiallyConsistent) {350assert(E->getArg(0)->getType()->isPointerType());351assert(CGF.getContext().hasSameUnqualifiedType(352E->getType(), E->getArg(0)->getType()->getPointeeType()));353assert(CGF.getContext().hasSameUnqualifiedType(E->getType(),354E->getArg(1)->getType()));355assert(CGF.getContext().hasSameUnqualifiedType(E->getType(),356E->getArg(2)->getType()));357358Address DestAddr = CheckAtomicAlignment(CGF, E);359360auto *Comparand = CGF.EmitScalarExpr(E->getArg(2));361auto *Exchange = CGF.EmitScalarExpr(E->getArg(1));362363// For Release ordering, the failure ordering should be Monotonic.364auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release ?365AtomicOrdering::Monotonic :366SuccessOrdering;367368// The atomic instruction is marked volatile for consistency with MSVC. This369// blocks the few atomics optimizations that LLVM has. If we want to optimize370// _Interlocked* operations in the future, we will have to remove the volatile371// marker.372auto *Result = CGF.Builder.CreateAtomicCmpXchg(373DestAddr, Comparand, Exchange, SuccessOrdering, FailureOrdering);374Result->setVolatile(true);375return CGF.Builder.CreateExtractValue(Result, 0);376}377378// 64-bit Microsoft platforms support 128 bit cmpxchg operations. They are379// prototyped like this:380//381// unsigned char _InterlockedCompareExchange128...(382// __int64 volatile * _Destination,383// __int64 _ExchangeHigh,384// __int64 _ExchangeLow,385// __int64 * _ComparandResult);386//387// Note that Destination is assumed to be at least 16-byte aligned, despite388// being typed int64.389390static Value *EmitAtomicCmpXchg128ForMSIntrin(CodeGenFunction &CGF,391const CallExpr *E,392AtomicOrdering SuccessOrdering) {393assert(E->getNumArgs() == 4);394llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));395llvm::Value *ExchangeHigh = CGF.EmitScalarExpr(E->getArg(1));396llvm::Value *ExchangeLow = CGF.EmitScalarExpr(E->getArg(2));397Address ComparandAddr = CGF.EmitPointerWithAlignment(E->getArg(3));398399assert(DestPtr->getType()->isPointerTy());400assert(!ExchangeHigh->getType()->isPointerTy());401assert(!ExchangeLow->getType()->isPointerTy());402403// For Release ordering, the failure ordering should be Monotonic.404auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release405? AtomicOrdering::Monotonic406: SuccessOrdering;407408// Convert to i128 pointers and values. Alignment is also overridden for409// destination pointer.410llvm::Type *Int128Ty = llvm::IntegerType::get(CGF.getLLVMContext(), 128);411Address DestAddr(DestPtr, Int128Ty,412CGF.getContext().toCharUnitsFromBits(128));413ComparandAddr = ComparandAddr.withElementType(Int128Ty);414415// (((i128)hi) << 64) | ((i128)lo)416ExchangeHigh = CGF.Builder.CreateZExt(ExchangeHigh, Int128Ty);417ExchangeLow = CGF.Builder.CreateZExt(ExchangeLow, Int128Ty);418ExchangeHigh =419CGF.Builder.CreateShl(ExchangeHigh, llvm::ConstantInt::get(Int128Ty, 64));420llvm::Value *Exchange = CGF.Builder.CreateOr(ExchangeHigh, ExchangeLow);421422// Load the comparand for the instruction.423llvm::Value *Comparand = CGF.Builder.CreateLoad(ComparandAddr);424425auto *CXI = CGF.Builder.CreateAtomicCmpXchg(DestAddr, Comparand, Exchange,426SuccessOrdering, FailureOrdering);427428// The atomic instruction is marked volatile for consistency with MSVC. This429// blocks the few atomics optimizations that LLVM has. If we want to optimize430// _Interlocked* operations in the future, we will have to remove the volatile431// marker.432CXI->setVolatile(true);433434// Store the result as an outparameter.435CGF.Builder.CreateStore(CGF.Builder.CreateExtractValue(CXI, 0),436ComparandAddr);437438// Get the success boolean and zero extend it to i8.439Value *Success = CGF.Builder.CreateExtractValue(CXI, 1);440return CGF.Builder.CreateZExt(Success, CGF.Int8Ty);441}442443static Value *EmitAtomicIncrementValue(CodeGenFunction &CGF, const CallExpr *E,444AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {445assert(E->getArg(0)->getType()->isPointerType());446447auto *IntTy = CGF.ConvertType(E->getType());448Address DestAddr = CheckAtomicAlignment(CGF, E);449auto *Result = CGF.Builder.CreateAtomicRMW(450AtomicRMWInst::Add, DestAddr, ConstantInt::get(IntTy, 1), Ordering);451return CGF.Builder.CreateAdd(Result, ConstantInt::get(IntTy, 1));452}453454static Value *EmitAtomicDecrementValue(455CodeGenFunction &CGF, const CallExpr *E,456AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {457assert(E->getArg(0)->getType()->isPointerType());458459auto *IntTy = CGF.ConvertType(E->getType());460Address DestAddr = CheckAtomicAlignment(CGF, E);461auto *Result = CGF.Builder.CreateAtomicRMW(462AtomicRMWInst::Sub, DestAddr, ConstantInt::get(IntTy, 1), Ordering);463return CGF.Builder.CreateSub(Result, ConstantInt::get(IntTy, 1));464}465466// Build a plain volatile load.467static Value *EmitISOVolatileLoad(CodeGenFunction &CGF, const CallExpr *E) {468Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));469QualType ElTy = E->getArg(0)->getType()->getPointeeType();470CharUnits LoadSize = CGF.getContext().getTypeSizeInChars(ElTy);471llvm::Type *ITy =472llvm::IntegerType::get(CGF.getLLVMContext(), LoadSize.getQuantity() * 8);473llvm::LoadInst *Load = CGF.Builder.CreateAlignedLoad(ITy, Ptr, LoadSize);474Load->setVolatile(true);475return Load;476}477478// Build a plain volatile store.479static Value *EmitISOVolatileStore(CodeGenFunction &CGF, const CallExpr *E) {480Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));481Value *Value = CGF.EmitScalarExpr(E->getArg(1));482QualType ElTy = E->getArg(0)->getType()->getPointeeType();483CharUnits StoreSize = CGF.getContext().getTypeSizeInChars(ElTy);484llvm::StoreInst *Store =485CGF.Builder.CreateAlignedStore(Value, Ptr, StoreSize);486Store->setVolatile(true);487return Store;488}489490// Emit a simple mangled intrinsic that has 1 argument and a return type491// matching the argument type. Depending on mode, this may be a constrained492// floating-point intrinsic.493static Value *emitUnaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF,494const CallExpr *E, unsigned IntrinsicID,495unsigned ConstrainedIntrinsicID) {496llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));497498CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);499if (CGF.Builder.getIsFPConstrained()) {500Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());501return CGF.Builder.CreateConstrainedFPCall(F, { Src0 });502} else {503Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());504return CGF.Builder.CreateCall(F, Src0);505}506}507508// Emit an intrinsic that has 2 operands of the same type as its result.509// Depending on mode, this may be a constrained floating-point intrinsic.510static Value *emitBinaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF,511const CallExpr *E, unsigned IntrinsicID,512unsigned ConstrainedIntrinsicID) {513llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));514llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));515516CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);517if (CGF.Builder.getIsFPConstrained()) {518Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());519return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1 });520} else {521Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());522return CGF.Builder.CreateCall(F, { Src0, Src1 });523}524}525526// Has second type mangled argument.527static Value *emitBinaryExpMaybeConstrainedFPBuiltin(528CodeGenFunction &CGF, const CallExpr *E, llvm::Intrinsic::ID IntrinsicID,529llvm::Intrinsic::ID ConstrainedIntrinsicID) {530llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));531llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));532533CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);534if (CGF.Builder.getIsFPConstrained()) {535Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,536{Src0->getType(), Src1->getType()});537return CGF.Builder.CreateConstrainedFPCall(F, {Src0, Src1});538}539540Function *F =541CGF.CGM.getIntrinsic(IntrinsicID, {Src0->getType(), Src1->getType()});542return CGF.Builder.CreateCall(F, {Src0, Src1});543}544545// Emit an intrinsic that has 3 operands of the same type as its result.546// Depending on mode, this may be a constrained floating-point intrinsic.547static Value *emitTernaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF,548const CallExpr *E, unsigned IntrinsicID,549unsigned ConstrainedIntrinsicID) {550llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));551llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));552llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));553554CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);555if (CGF.Builder.getIsFPConstrained()) {556Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType());557return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1, Src2 });558} else {559Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());560return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });561}562}563564// Emit an intrinsic where all operands are of the same type as the result.565// Depending on mode, this may be a constrained floating-point intrinsic.566static Value *emitCallMaybeConstrainedFPBuiltin(CodeGenFunction &CGF,567unsigned IntrinsicID,568unsigned ConstrainedIntrinsicID,569llvm::Type *Ty,570ArrayRef<Value *> Args) {571Function *F;572if (CGF.Builder.getIsFPConstrained())573F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Ty);574else575F = CGF.CGM.getIntrinsic(IntrinsicID, Ty);576577if (CGF.Builder.getIsFPConstrained())578return CGF.Builder.CreateConstrainedFPCall(F, Args);579else580return CGF.Builder.CreateCall(F, Args);581}582583// Emit a simple intrinsic that has N scalar arguments and a return type584// matching the argument type. It is assumed that only the first argument is585// overloaded.586template <unsigned N>587Value *emitBuiltinWithOneOverloadedType(CodeGenFunction &CGF, const CallExpr *E,588unsigned IntrinsicID,589llvm::StringRef Name = "") {590static_assert(N, "expect non-empty argument");591SmallVector<Value *, N> Args;592for (unsigned I = 0; I < N; ++I)593Args.push_back(CGF.EmitScalarExpr(E->getArg(I)));594Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Args[0]->getType());595return CGF.Builder.CreateCall(F, Args, Name);596}597598// Emit an intrinsic that has 1 float or double operand, and 1 integer.599static Value *emitFPIntBuiltin(CodeGenFunction &CGF,600const CallExpr *E,601unsigned IntrinsicID) {602llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));603llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));604605Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());606return CGF.Builder.CreateCall(F, {Src0, Src1});607}608609// Emit an intrinsic that has overloaded integer result and fp operand.610static Value *611emitMaybeConstrainedFPToIntRoundBuiltin(CodeGenFunction &CGF, const CallExpr *E,612unsigned IntrinsicID,613unsigned ConstrainedIntrinsicID) {614llvm::Type *ResultType = CGF.ConvertType(E->getType());615llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));616617if (CGF.Builder.getIsFPConstrained()) {618CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);619Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID,620{ResultType, Src0->getType()});621return CGF.Builder.CreateConstrainedFPCall(F, {Src0});622} else {623Function *F =624CGF.CGM.getIntrinsic(IntrinsicID, {ResultType, Src0->getType()});625return CGF.Builder.CreateCall(F, Src0);626}627}628629static Value *emitFrexpBuiltin(CodeGenFunction &CGF, const CallExpr *E,630llvm::Intrinsic::ID IntrinsicID) {631llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));632llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));633634QualType IntPtrTy = E->getArg(1)->getType()->getPointeeType();635llvm::Type *IntTy = CGF.ConvertType(IntPtrTy);636llvm::Function *F =637CGF.CGM.getIntrinsic(IntrinsicID, {Src0->getType(), IntTy});638llvm::Value *Call = CGF.Builder.CreateCall(F, Src0);639640llvm::Value *Exp = CGF.Builder.CreateExtractValue(Call, 1);641LValue LV = CGF.MakeNaturalAlignAddrLValue(Src1, IntPtrTy);642CGF.EmitStoreOfScalar(Exp, LV);643644return CGF.Builder.CreateExtractValue(Call, 0);645}646647/// EmitFAbs - Emit a call to @llvm.fabs().648static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) {649Function *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());650llvm::CallInst *Call = CGF.Builder.CreateCall(F, V);651Call->setDoesNotAccessMemory();652return Call;653}654655/// Emit the computation of the sign bit for a floating point value. Returns656/// the i1 sign bit value.657static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) {658LLVMContext &C = CGF.CGM.getLLVMContext();659660llvm::Type *Ty = V->getType();661int Width = Ty->getPrimitiveSizeInBits();662llvm::Type *IntTy = llvm::IntegerType::get(C, Width);663V = CGF.Builder.CreateBitCast(V, IntTy);664if (Ty->isPPC_FP128Ty()) {665// We want the sign bit of the higher-order double. The bitcast we just666// did works as if the double-double was stored to memory and then667// read as an i128. The "store" will put the higher-order double in the668// lower address in both little- and big-Endian modes, but the "load"669// will treat those bits as a different part of the i128: the low bits in670// little-Endian, the high bits in big-Endian. Therefore, on big-Endian671// we need to shift the high bits down to the low before truncating.672Width >>= 1;673if (CGF.getTarget().isBigEndian()) {674Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);675V = CGF.Builder.CreateLShr(V, ShiftCst);676}677// We are truncating value in order to extract the higher-order678// double, which we will be using to extract the sign from.679IntTy = llvm::IntegerType::get(C, Width);680V = CGF.Builder.CreateTrunc(V, IntTy);681}682Value *Zero = llvm::Constant::getNullValue(IntTy);683return CGF.Builder.CreateICmpSLT(V, Zero);684}685686static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD,687const CallExpr *E, llvm::Constant *calleeValue) {688CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);689CGCallee callee = CGCallee::forDirect(calleeValue, GlobalDecl(FD));690RValue Call =691CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot());692693// Check the supported intrinsic.694if (unsigned BuiltinID = FD->getBuiltinID()) {695auto IsErrnoIntrinsic = [&]() -> unsigned {696switch (BuiltinID) {697case Builtin::BIexpf:698case Builtin::BI__builtin_expf:699case Builtin::BI__builtin_expf128:700return true;701}702// TODO: support more FP math libcalls703return false;704}();705706// Restrict to target with errno, for example, MacOS doesn't set errno.707if (IsErrnoIntrinsic && CGF.CGM.getLangOpts().MathErrno &&708!CGF.Builder.getIsFPConstrained()) {709ASTContext &Context = CGF.getContext();710// Emit "int" TBAA metadata on FP math libcalls.711clang::QualType IntTy = Context.IntTy;712TBAAAccessInfo TBAAInfo = CGF.CGM.getTBAAAccessInfo(IntTy);713Instruction *Inst = cast<llvm::Instruction>(Call.getScalarVal());714CGF.CGM.DecorateInstructionWithTBAA(Inst, TBAAInfo);715}716}717return Call;718}719720/// Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*721/// depending on IntrinsicID.722///723/// \arg CGF The current codegen function.724/// \arg IntrinsicID The ID for the Intrinsic we wish to generate.725/// \arg X The first argument to the llvm.*.with.overflow.*.726/// \arg Y The second argument to the llvm.*.with.overflow.*.727/// \arg Carry The carry returned by the llvm.*.with.overflow.*.728/// \returns The result (i.e. sum/product) returned by the intrinsic.729static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,730const llvm::Intrinsic::ID IntrinsicID,731llvm::Value *X, llvm::Value *Y,732llvm::Value *&Carry) {733// Make sure we have integers of the same width.734assert(X->getType() == Y->getType() &&735"Arguments must be the same type. (Did you forget to make sure both "736"arguments have the same integer width?)");737738Function *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());739llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});740Carry = CGF.Builder.CreateExtractValue(Tmp, 1);741return CGF.Builder.CreateExtractValue(Tmp, 0);742}743744static Value *emitRangedBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID,745int low, int high) {746Function *F = CGF.CGM.getIntrinsic(IntrinsicID, {});747llvm::CallInst *Call = CGF.Builder.CreateCall(F);748llvm::ConstantRange CR(APInt(32, low), APInt(32, high));749Call->addRangeRetAttr(CR);750Call->addRetAttr(llvm::Attribute::AttrKind::NoUndef);751return Call;752}753754namespace {755struct WidthAndSignedness {756unsigned Width;757bool Signed;758};759}760761static WidthAndSignedness762getIntegerWidthAndSignedness(const clang::ASTContext &context,763const clang::QualType Type) {764assert(Type->isIntegerType() && "Given type is not an integer.");765unsigned Width = Type->isBooleanType() ? 1766: Type->isBitIntType() ? context.getIntWidth(Type)767: context.getTypeInfo(Type).Width;768bool Signed = Type->isSignedIntegerType();769return {Width, Signed};770}771772// Given one or more integer types, this function produces an integer type that773// encompasses them: any value in one of the given types could be expressed in774// the encompassing type.775static struct WidthAndSignedness776EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {777assert(Types.size() > 0 && "Empty list of types.");778779// If any of the given types is signed, we must return a signed type.780bool Signed = false;781for (const auto &Type : Types) {782Signed |= Type.Signed;783}784785// The encompassing type must have a width greater than or equal to the width786// of the specified types. Additionally, if the encompassing type is signed,787// its width must be strictly greater than the width of any unsigned types788// given.789unsigned Width = 0;790for (const auto &Type : Types) {791unsigned MinWidth = Type.Width + (Signed && !Type.Signed);792if (Width < MinWidth) {793Width = MinWidth;794}795}796797return {Width, Signed};798}799800Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {801Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;802return Builder.CreateCall(CGM.getIntrinsic(inst, {ArgValue->getType()}),803ArgValue);804}805806/// Checks if using the result of __builtin_object_size(p, @p From) in place of807/// __builtin_object_size(p, @p To) is correct808static bool areBOSTypesCompatible(int From, int To) {809// Note: Our __builtin_object_size implementation currently treats Type=0 and810// Type=2 identically. Encoding this implementation detail here may make811// improving __builtin_object_size difficult in the future, so it's omitted.812return From == To || (From == 0 && To == 1) || (From == 3 && To == 2);813}814815static llvm::Value *816getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) {817return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true);818}819820llvm::Value *821CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,822llvm::IntegerType *ResType,823llvm::Value *EmittedE,824bool IsDynamic) {825uint64_t ObjectSize;826if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))827return emitBuiltinObjectSize(E, Type, ResType, EmittedE, IsDynamic);828return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);829}830831const FieldDecl *CodeGenFunction::FindFlexibleArrayMemberFieldAndOffset(832ASTContext &Ctx, const RecordDecl *RD, const FieldDecl *FAMDecl,833uint64_t &Offset) {834const LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel =835getLangOpts().getStrictFlexArraysLevel();836uint32_t FieldNo = 0;837838if (RD->isImplicit())839return nullptr;840841for (const FieldDecl *FD : RD->fields()) {842if ((!FAMDecl || FD == FAMDecl) &&843Decl::isFlexibleArrayMemberLike(844Ctx, FD, FD->getType(), StrictFlexArraysLevel,845/*IgnoreTemplateOrMacroSubstitution=*/true)) {846const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(RD);847Offset += Layout.getFieldOffset(FieldNo);848return FD;849}850851QualType Ty = FD->getType();852if (Ty->isRecordType()) {853if (const FieldDecl *Field = FindFlexibleArrayMemberFieldAndOffset(854Ctx, Ty->getAsRecordDecl(), FAMDecl, Offset)) {855const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(RD);856Offset += Layout.getFieldOffset(FieldNo);857return Field;858}859}860861if (!RD->isUnion())862++FieldNo;863}864865return nullptr;866}867868static unsigned CountCountedByAttrs(const RecordDecl *RD) {869unsigned Num = 0;870871for (const FieldDecl *FD : RD->fields()) {872if (FD->getType()->isCountAttributedType())873return ++Num;874875QualType Ty = FD->getType();876if (Ty->isRecordType())877Num += CountCountedByAttrs(Ty->getAsRecordDecl());878}879880return Num;881}882883llvm::Value *884CodeGenFunction::emitFlexibleArrayMemberSize(const Expr *E, unsigned Type,885llvm::IntegerType *ResType) {886// The code generated here calculates the size of a struct with a flexible887// array member that uses the counted_by attribute. There are two instances888// we handle:889//890// struct s {891// unsigned long flags;892// int count;893// int array[] __attribute__((counted_by(count)));894// }895//896// 1) bdos of the flexible array itself:897//898// __builtin_dynamic_object_size(p->array, 1) ==899// p->count * sizeof(*p->array)900//901// 2) bdos of a pointer into the flexible array:902//903// __builtin_dynamic_object_size(&p->array[42], 1) ==904// (p->count - 42) * sizeof(*p->array)905//906// 2) bdos of the whole struct, including the flexible array:907//908// __builtin_dynamic_object_size(p, 1) ==909// max(sizeof(struct s),910// offsetof(struct s, array) + p->count * sizeof(*p->array))911//912ASTContext &Ctx = getContext();913const Expr *Base = E->IgnoreParenImpCasts();914const Expr *Idx = nullptr;915916if (const auto *UO = dyn_cast<UnaryOperator>(Base);917UO && UO->getOpcode() == UO_AddrOf) {918Expr *SubExpr = UO->getSubExpr()->IgnoreParenImpCasts();919if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(SubExpr)) {920Base = ASE->getBase()->IgnoreParenImpCasts();921Idx = ASE->getIdx()->IgnoreParenImpCasts();922923if (const auto *IL = dyn_cast<IntegerLiteral>(Idx)) {924int64_t Val = IL->getValue().getSExtValue();925if (Val < 0)926return getDefaultBuiltinObjectSizeResult(Type, ResType);927928if (Val == 0)929// The index is 0, so we don't need to take it into account.930Idx = nullptr;931}932} else {933// Potential pointer to another element in the struct.934Base = SubExpr;935}936}937938// Get the flexible array member Decl.939const RecordDecl *OuterRD = nullptr;940const FieldDecl *FAMDecl = nullptr;941if (const auto *ME = dyn_cast<MemberExpr>(Base)) {942// Check if \p Base is referencing the FAM itself.943const ValueDecl *VD = ME->getMemberDecl();944OuterRD = VD->getDeclContext()->getOuterLexicalRecordContext();945FAMDecl = dyn_cast<FieldDecl>(VD);946if (!FAMDecl)947return nullptr;948} else if (const auto *DRE = dyn_cast<DeclRefExpr>(Base)) {949// Check if we're pointing to the whole struct.950QualType Ty = DRE->getDecl()->getType();951if (Ty->isPointerType())952Ty = Ty->getPointeeType();953OuterRD = Ty->getAsRecordDecl();954955// If we have a situation like this:956//957// struct union_of_fams {958// int flags;959// union {960// signed char normal_field;961// struct {962// int count1;963// int arr1[] __counted_by(count1);964// };965// struct {966// signed char count2;967// int arr2[] __counted_by(count2);968// };969// };970// };971//972// We don't know which 'count' to use in this scenario:973//974// size_t get_size(struct union_of_fams *p) {975// return __builtin_dynamic_object_size(p, 1);976// }977//978// Instead of calculating a wrong number, we give up.979if (OuterRD && CountCountedByAttrs(OuterRD) > 1)980return nullptr;981}982983if (!OuterRD)984return nullptr;985986// We call FindFlexibleArrayMemberAndOffset even if FAMDecl is non-null to987// get its offset.988uint64_t Offset = 0;989FAMDecl =990FindFlexibleArrayMemberFieldAndOffset(Ctx, OuterRD, FAMDecl, Offset);991Offset = Ctx.toCharUnitsFromBits(Offset).getQuantity();992993if (!FAMDecl || !FAMDecl->getType()->isCountAttributedType())994// No flexible array member found or it doesn't have the "counted_by"995// attribute.996return nullptr;997998const FieldDecl *CountedByFD = FindCountedByField(FAMDecl);999if (!CountedByFD)1000// Can't find the field referenced by the "counted_by" attribute.1001return nullptr;10021003if (isa<DeclRefExpr>(Base))1004// The whole struct is specificed in the __bdos. The calculation of the1005// whole size of the structure can be done in two ways:1006//1007// 1) sizeof(struct S) + count * sizeof(typeof(fam))1008// 2) offsetof(struct S, fam) + count * sizeof(typeof(fam))1009//1010// The first will add additional padding after the end of the array,1011// allocation while the second method is more precise, but not quite1012// expected from programmers. See1013// https://lore.kernel.org/lkml/ZvV6X5FPBBW7CO1f@archlinux/ for a1014// discussion of the topic.1015//1016// GCC isn't (currently) able to calculate __bdos on a pointer to the whole1017// structure. Therefore, because of the above issue, we'll choose to match1018// what GCC does for consistency's sake.1019return nullptr;10201021// Build a load of the counted_by field.1022bool IsSigned = CountedByFD->getType()->isSignedIntegerType();1023Value *CountedByInst = EmitCountedByFieldExpr(Base, FAMDecl, CountedByFD);1024if (!CountedByInst)1025return getDefaultBuiltinObjectSizeResult(Type, ResType);10261027CountedByInst = Builder.CreateIntCast(CountedByInst, ResType, IsSigned);10281029// Build a load of the index and subtract it from the count.1030Value *IdxInst = nullptr;1031if (Idx) {1032if (Idx->HasSideEffects(getContext()))1033// We can't have side-effects.1034return getDefaultBuiltinObjectSizeResult(Type, ResType);10351036bool IdxSigned = Idx->getType()->isSignedIntegerType();1037IdxInst = EmitAnyExprToTemp(Idx).getScalarVal();1038IdxInst = Builder.CreateIntCast(IdxInst, ResType, IdxSigned);10391040// We go ahead with the calculation here. If the index turns out to be1041// negative, we'll catch it at the end.1042CountedByInst =1043Builder.CreateSub(CountedByInst, IdxInst, "", !IsSigned, IsSigned);1044}10451046// Calculate how large the flexible array member is in bytes.1047const ArrayType *ArrayTy = Ctx.getAsArrayType(FAMDecl->getType());1048CharUnits Size = Ctx.getTypeSizeInChars(ArrayTy->getElementType());1049llvm::Constant *ElemSize =1050llvm::ConstantInt::get(ResType, Size.getQuantity(), IsSigned);1051Value *Res =1052Builder.CreateMul(CountedByInst, ElemSize, "", !IsSigned, IsSigned);1053Res = Builder.CreateIntCast(Res, ResType, IsSigned);10541055// A negative \p IdxInst or \p CountedByInst means that the index lands1056// outside of the flexible array member. If that's the case, we want to1057// return 0.1058Value *Cmp = Builder.CreateIsNotNeg(CountedByInst);1059if (IdxInst)1060Cmp = Builder.CreateAnd(Builder.CreateIsNotNeg(IdxInst), Cmp);10611062return Builder.CreateSelect(Cmp, Res, ConstantInt::get(ResType, 0, IsSigned));1063}10641065/// Returns a Value corresponding to the size of the given expression.1066/// This Value may be either of the following:1067/// - A llvm::Argument (if E is a param with the pass_object_size attribute on1068/// it)1069/// - A call to the @llvm.objectsize intrinsic1070///1071/// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null1072/// and we wouldn't otherwise try to reference a pass_object_size parameter,1073/// we'll call @llvm.objectsize on EmittedE, rather than emitting E.1074llvm::Value *1075CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,1076llvm::IntegerType *ResType,1077llvm::Value *EmittedE, bool IsDynamic) {1078// We need to reference an argument if the pointer is a parameter with the1079// pass_object_size attribute.1080if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {1081auto *Param = dyn_cast<ParmVarDecl>(D->getDecl());1082auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>();1083if (Param != nullptr && PS != nullptr &&1084areBOSTypesCompatible(PS->getType(), Type)) {1085auto Iter = SizeArguments.find(Param);1086assert(Iter != SizeArguments.end());10871088const ImplicitParamDecl *D = Iter->second;1089auto DIter = LocalDeclMap.find(D);1090assert(DIter != LocalDeclMap.end());10911092return EmitLoadOfScalar(DIter->second, /*Volatile=*/false,1093getContext().getSizeType(), E->getBeginLoc());1094}1095}10961097if (IsDynamic) {1098// Emit special code for a flexible array member with the "counted_by"1099// attribute.1100if (Value *V = emitFlexibleArrayMemberSize(E, Type, ResType))1101return V;1102}11031104// LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't1105// evaluate E for side-effects. In either case, we shouldn't lower to1106// @llvm.objectsize.1107if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext())))1108return getDefaultBuiltinObjectSizeResult(Type, ResType);11091110Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E);1111assert(Ptr->getType()->isPointerTy() &&1112"Non-pointer passed to __builtin_object_size?");11131114Function *F =1115CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()});11161117// LLVM only supports 0 and 2, make sure that we pass along that as a boolean.1118Value *Min = Builder.getInt1((Type & 2) != 0);1119// For GCC compatibility, __builtin_object_size treat NULL as unknown size.1120Value *NullIsUnknown = Builder.getTrue();1121Value *Dynamic = Builder.getInt1(IsDynamic);1122return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown, Dynamic});1123}11241125namespace {1126/// A struct to generically describe a bit test intrinsic.1127struct BitTest {1128enum ActionKind : uint8_t { TestOnly, Complement, Reset, Set };1129enum InterlockingKind : uint8_t {1130Unlocked,1131Sequential,1132Acquire,1133Release,1134NoFence1135};11361137ActionKind Action;1138InterlockingKind Interlocking;1139bool Is64Bit;11401141static BitTest decodeBitTestBuiltin(unsigned BuiltinID);1142};11431144} // namespace11451146BitTest BitTest::decodeBitTestBuiltin(unsigned BuiltinID) {1147switch (BuiltinID) {1148// Main portable variants.1149case Builtin::BI_bittest:1150return {TestOnly, Unlocked, false};1151case Builtin::BI_bittestandcomplement:1152return {Complement, Unlocked, false};1153case Builtin::BI_bittestandreset:1154return {Reset, Unlocked, false};1155case Builtin::BI_bittestandset:1156return {Set, Unlocked, false};1157case Builtin::BI_interlockedbittestandreset:1158return {Reset, Sequential, false};1159case Builtin::BI_interlockedbittestandset:1160return {Set, Sequential, false};11611162// X86-specific 64-bit variants.1163case Builtin::BI_bittest64:1164return {TestOnly, Unlocked, true};1165case Builtin::BI_bittestandcomplement64:1166return {Complement, Unlocked, true};1167case Builtin::BI_bittestandreset64:1168return {Reset, Unlocked, true};1169case Builtin::BI_bittestandset64:1170return {Set, Unlocked, true};1171case Builtin::BI_interlockedbittestandreset64:1172return {Reset, Sequential, true};1173case Builtin::BI_interlockedbittestandset64:1174return {Set, Sequential, true};11751176// ARM/AArch64-specific ordering variants.1177case Builtin::BI_interlockedbittestandset_acq:1178return {Set, Acquire, false};1179case Builtin::BI_interlockedbittestandset_rel:1180return {Set, Release, false};1181case Builtin::BI_interlockedbittestandset_nf:1182return {Set, NoFence, false};1183case Builtin::BI_interlockedbittestandreset_acq:1184return {Reset, Acquire, false};1185case Builtin::BI_interlockedbittestandreset_rel:1186return {Reset, Release, false};1187case Builtin::BI_interlockedbittestandreset_nf:1188return {Reset, NoFence, false};1189}1190llvm_unreachable("expected only bittest intrinsics");1191}11921193static char bitActionToX86BTCode(BitTest::ActionKind A) {1194switch (A) {1195case BitTest::TestOnly: return '\0';1196case BitTest::Complement: return 'c';1197case BitTest::Reset: return 'r';1198case BitTest::Set: return 's';1199}1200llvm_unreachable("invalid action");1201}12021203static llvm::Value *EmitX86BitTestIntrinsic(CodeGenFunction &CGF,1204BitTest BT,1205const CallExpr *E, Value *BitBase,1206Value *BitPos) {1207char Action = bitActionToX86BTCode(BT.Action);1208char SizeSuffix = BT.Is64Bit ? 'q' : 'l';12091210// Build the assembly.1211SmallString<64> Asm;1212raw_svector_ostream AsmOS(Asm);1213if (BT.Interlocking != BitTest::Unlocked)1214AsmOS << "lock ";1215AsmOS << "bt";1216if (Action)1217AsmOS << Action;1218AsmOS << SizeSuffix << " $2, ($1)";12191220// Build the constraints. FIXME: We should support immediates when possible.1221std::string Constraints = "={@ccc},r,r,~{cc},~{memory}";1222std::string_view MachineClobbers = CGF.getTarget().getClobbers();1223if (!MachineClobbers.empty()) {1224Constraints += ',';1225Constraints += MachineClobbers;1226}1227llvm::IntegerType *IntType = llvm::IntegerType::get(1228CGF.getLLVMContext(),1229CGF.getContext().getTypeSize(E->getArg(1)->getType()));1230llvm::FunctionType *FTy =1231llvm::FunctionType::get(CGF.Int8Ty, {CGF.UnqualPtrTy, IntType}, false);12321233llvm::InlineAsm *IA =1234llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);1235return CGF.Builder.CreateCall(IA, {BitBase, BitPos});1236}12371238static llvm::AtomicOrdering1239getBitTestAtomicOrdering(BitTest::InterlockingKind I) {1240switch (I) {1241case BitTest::Unlocked: return llvm::AtomicOrdering::NotAtomic;1242case BitTest::Sequential: return llvm::AtomicOrdering::SequentiallyConsistent;1243case BitTest::Acquire: return llvm::AtomicOrdering::Acquire;1244case BitTest::Release: return llvm::AtomicOrdering::Release;1245case BitTest::NoFence: return llvm::AtomicOrdering::Monotonic;1246}1247llvm_unreachable("invalid interlocking");1248}12491250/// Emit a _bittest* intrinsic. These intrinsics take a pointer to an array of1251/// bits and a bit position and read and optionally modify the bit at that1252/// position. The position index can be arbitrarily large, i.e. it can be larger1253/// than 31 or 63, so we need an indexed load in the general case.1254static llvm::Value *EmitBitTestIntrinsic(CodeGenFunction &CGF,1255unsigned BuiltinID,1256const CallExpr *E) {1257Value *BitBase = CGF.EmitScalarExpr(E->getArg(0));1258Value *BitPos = CGF.EmitScalarExpr(E->getArg(1));12591260BitTest BT = BitTest::decodeBitTestBuiltin(BuiltinID);12611262// X86 has special BT, BTC, BTR, and BTS instructions that handle the array1263// indexing operation internally. Use them if possible.1264if (CGF.getTarget().getTriple().isX86())1265return EmitX86BitTestIntrinsic(CGF, BT, E, BitBase, BitPos);12661267// Otherwise, use generic code to load one byte and test the bit. Use all but1268// the bottom three bits as the array index, and the bottom three bits to form1269// a mask.1270// Bit = BitBaseI8[BitPos >> 3] & (1 << (BitPos & 0x7)) != 0;1271Value *ByteIndex = CGF.Builder.CreateAShr(1272BitPos, llvm::ConstantInt::get(BitPos->getType(), 3), "bittest.byteidx");1273Value *BitBaseI8 = CGF.Builder.CreatePointerCast(BitBase, CGF.Int8PtrTy);1274Address ByteAddr(CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, BitBaseI8,1275ByteIndex, "bittest.byteaddr"),1276CGF.Int8Ty, CharUnits::One());1277Value *PosLow =1278CGF.Builder.CreateAnd(CGF.Builder.CreateTrunc(BitPos, CGF.Int8Ty),1279llvm::ConstantInt::get(CGF.Int8Ty, 0x7));12801281// The updating instructions will need a mask.1282Value *Mask = nullptr;1283if (BT.Action != BitTest::TestOnly) {1284Mask = CGF.Builder.CreateShl(llvm::ConstantInt::get(CGF.Int8Ty, 1), PosLow,1285"bittest.mask");1286}12871288// Check the action and ordering of the interlocked intrinsics.1289llvm::AtomicOrdering Ordering = getBitTestAtomicOrdering(BT.Interlocking);12901291Value *OldByte = nullptr;1292if (Ordering != llvm::AtomicOrdering::NotAtomic) {1293// Emit a combined atomicrmw load/store operation for the interlocked1294// intrinsics.1295llvm::AtomicRMWInst::BinOp RMWOp = llvm::AtomicRMWInst::Or;1296if (BT.Action == BitTest::Reset) {1297Mask = CGF.Builder.CreateNot(Mask);1298RMWOp = llvm::AtomicRMWInst::And;1299}1300OldByte = CGF.Builder.CreateAtomicRMW(RMWOp, ByteAddr, Mask, Ordering);1301} else {1302// Emit a plain load for the non-interlocked intrinsics.1303OldByte = CGF.Builder.CreateLoad(ByteAddr, "bittest.byte");1304Value *NewByte = nullptr;1305switch (BT.Action) {1306case BitTest::TestOnly:1307// Don't store anything.1308break;1309case BitTest::Complement:1310NewByte = CGF.Builder.CreateXor(OldByte, Mask);1311break;1312case BitTest::Reset:1313NewByte = CGF.Builder.CreateAnd(OldByte, CGF.Builder.CreateNot(Mask));1314break;1315case BitTest::Set:1316NewByte = CGF.Builder.CreateOr(OldByte, Mask);1317break;1318}1319if (NewByte)1320CGF.Builder.CreateStore(NewByte, ByteAddr);1321}13221323// However we loaded the old byte, either by plain load or atomicrmw, shift1324// the bit into the low position and mask it to 0 or 1.1325Value *ShiftedByte = CGF.Builder.CreateLShr(OldByte, PosLow, "bittest.shr");1326return CGF.Builder.CreateAnd(1327ShiftedByte, llvm::ConstantInt::get(CGF.Int8Ty, 1), "bittest.res");1328}13291330static llvm::Value *emitPPCLoadReserveIntrinsic(CodeGenFunction &CGF,1331unsigned BuiltinID,1332const CallExpr *E) {1333Value *Addr = CGF.EmitScalarExpr(E->getArg(0));13341335SmallString<64> Asm;1336raw_svector_ostream AsmOS(Asm);1337llvm::IntegerType *RetType = CGF.Int32Ty;13381339switch (BuiltinID) {1340case clang::PPC::BI__builtin_ppc_ldarx:1341AsmOS << "ldarx ";1342RetType = CGF.Int64Ty;1343break;1344case clang::PPC::BI__builtin_ppc_lwarx:1345AsmOS << "lwarx ";1346RetType = CGF.Int32Ty;1347break;1348case clang::PPC::BI__builtin_ppc_lharx:1349AsmOS << "lharx ";1350RetType = CGF.Int16Ty;1351break;1352case clang::PPC::BI__builtin_ppc_lbarx:1353AsmOS << "lbarx ";1354RetType = CGF.Int8Ty;1355break;1356default:1357llvm_unreachable("Expected only PowerPC load reserve intrinsics");1358}13591360AsmOS << "$0, ${1:y}";13611362std::string Constraints = "=r,*Z,~{memory}";1363std::string_view MachineClobbers = CGF.getTarget().getClobbers();1364if (!MachineClobbers.empty()) {1365Constraints += ',';1366Constraints += MachineClobbers;1367}13681369llvm::Type *PtrType = CGF.UnqualPtrTy;1370llvm::FunctionType *FTy = llvm::FunctionType::get(RetType, {PtrType}, false);13711372llvm::InlineAsm *IA =1373llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);1374llvm::CallInst *CI = CGF.Builder.CreateCall(IA, {Addr});1375CI->addParamAttr(13760, Attribute::get(CGF.getLLVMContext(), Attribute::ElementType, RetType));1377return CI;1378}13791380namespace {1381enum class MSVCSetJmpKind {1382_setjmpex,1383_setjmp3,1384_setjmp1385};1386}13871388/// MSVC handles setjmp a bit differently on different platforms. On every1389/// architecture except 32-bit x86, the frame address is passed. On x86, extra1390/// parameters can be passed as variadic arguments, but we always pass none.1391static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind,1392const CallExpr *E) {1393llvm::Value *Arg1 = nullptr;1394llvm::Type *Arg1Ty = nullptr;1395StringRef Name;1396bool IsVarArg = false;1397if (SJKind == MSVCSetJmpKind::_setjmp3) {1398Name = "_setjmp3";1399Arg1Ty = CGF.Int32Ty;1400Arg1 = llvm::ConstantInt::get(CGF.IntTy, 0);1401IsVarArg = true;1402} else {1403Name = SJKind == MSVCSetJmpKind::_setjmp ? "_setjmp" : "_setjmpex";1404Arg1Ty = CGF.Int8PtrTy;1405if (CGF.getTarget().getTriple().getArch() == llvm::Triple::aarch64) {1406Arg1 = CGF.Builder.CreateCall(1407CGF.CGM.getIntrinsic(Intrinsic::sponentry, CGF.AllocaInt8PtrTy));1408} else1409Arg1 = CGF.Builder.CreateCall(1410CGF.CGM.getIntrinsic(Intrinsic::frameaddress, CGF.AllocaInt8PtrTy),1411llvm::ConstantInt::get(CGF.Int32Ty, 0));1412}14131414// Mark the call site and declaration with ReturnsTwice.1415llvm::Type *ArgTypes[2] = {CGF.Int8PtrTy, Arg1Ty};1416llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(1417CGF.getLLVMContext(), llvm::AttributeList::FunctionIndex,1418llvm::Attribute::ReturnsTwice);1419llvm::FunctionCallee SetJmpFn = CGF.CGM.CreateRuntimeFunction(1420llvm::FunctionType::get(CGF.IntTy, ArgTypes, IsVarArg), Name,1421ReturnsTwiceAttr, /*Local=*/true);14221423llvm::Value *Buf = CGF.Builder.CreateBitOrPointerCast(1424CGF.EmitScalarExpr(E->getArg(0)), CGF.Int8PtrTy);1425llvm::Value *Args[] = {Buf, Arg1};1426llvm::CallBase *CB = CGF.EmitRuntimeCallOrInvoke(SetJmpFn, Args);1427CB->setAttributes(ReturnsTwiceAttr);1428return RValue::get(CB);1429}14301431// Many of MSVC builtins are on x64, ARM and AArch64; to avoid repeating code,1432// we handle them here.1433enum class CodeGenFunction::MSVCIntrin {1434_BitScanForward,1435_BitScanReverse,1436_InterlockedAnd,1437_InterlockedDecrement,1438_InterlockedExchange,1439_InterlockedExchangeAdd,1440_InterlockedExchangeSub,1441_InterlockedIncrement,1442_InterlockedOr,1443_InterlockedXor,1444_InterlockedExchangeAdd_acq,1445_InterlockedExchangeAdd_rel,1446_InterlockedExchangeAdd_nf,1447_InterlockedExchange_acq,1448_InterlockedExchange_rel,1449_InterlockedExchange_nf,1450_InterlockedCompareExchange_acq,1451_InterlockedCompareExchange_rel,1452_InterlockedCompareExchange_nf,1453_InterlockedCompareExchange128,1454_InterlockedCompareExchange128_acq,1455_InterlockedCompareExchange128_rel,1456_InterlockedCompareExchange128_nf,1457_InterlockedOr_acq,1458_InterlockedOr_rel,1459_InterlockedOr_nf,1460_InterlockedXor_acq,1461_InterlockedXor_rel,1462_InterlockedXor_nf,1463_InterlockedAnd_acq,1464_InterlockedAnd_rel,1465_InterlockedAnd_nf,1466_InterlockedIncrement_acq,1467_InterlockedIncrement_rel,1468_InterlockedIncrement_nf,1469_InterlockedDecrement_acq,1470_InterlockedDecrement_rel,1471_InterlockedDecrement_nf,1472__fastfail,1473};14741475static std::optional<CodeGenFunction::MSVCIntrin>1476translateArmToMsvcIntrin(unsigned BuiltinID) {1477using MSVCIntrin = CodeGenFunction::MSVCIntrin;1478switch (BuiltinID) {1479default:1480return std::nullopt;1481case clang::ARM::BI_BitScanForward:1482case clang::ARM::BI_BitScanForward64:1483return MSVCIntrin::_BitScanForward;1484case clang::ARM::BI_BitScanReverse:1485case clang::ARM::BI_BitScanReverse64:1486return MSVCIntrin::_BitScanReverse;1487case clang::ARM::BI_InterlockedAnd64:1488return MSVCIntrin::_InterlockedAnd;1489case clang::ARM::BI_InterlockedExchange64:1490return MSVCIntrin::_InterlockedExchange;1491case clang::ARM::BI_InterlockedExchangeAdd64:1492return MSVCIntrin::_InterlockedExchangeAdd;1493case clang::ARM::BI_InterlockedExchangeSub64:1494return MSVCIntrin::_InterlockedExchangeSub;1495case clang::ARM::BI_InterlockedOr64:1496return MSVCIntrin::_InterlockedOr;1497case clang::ARM::BI_InterlockedXor64:1498return MSVCIntrin::_InterlockedXor;1499case clang::ARM::BI_InterlockedDecrement64:1500return MSVCIntrin::_InterlockedDecrement;1501case clang::ARM::BI_InterlockedIncrement64:1502return MSVCIntrin::_InterlockedIncrement;1503case clang::ARM::BI_InterlockedExchangeAdd8_acq:1504case clang::ARM::BI_InterlockedExchangeAdd16_acq:1505case clang::ARM::BI_InterlockedExchangeAdd_acq:1506case clang::ARM::BI_InterlockedExchangeAdd64_acq:1507return MSVCIntrin::_InterlockedExchangeAdd_acq;1508case clang::ARM::BI_InterlockedExchangeAdd8_rel:1509case clang::ARM::BI_InterlockedExchangeAdd16_rel:1510case clang::ARM::BI_InterlockedExchangeAdd_rel:1511case clang::ARM::BI_InterlockedExchangeAdd64_rel:1512return MSVCIntrin::_InterlockedExchangeAdd_rel;1513case clang::ARM::BI_InterlockedExchangeAdd8_nf:1514case clang::ARM::BI_InterlockedExchangeAdd16_nf:1515case clang::ARM::BI_InterlockedExchangeAdd_nf:1516case clang::ARM::BI_InterlockedExchangeAdd64_nf:1517return MSVCIntrin::_InterlockedExchangeAdd_nf;1518case clang::ARM::BI_InterlockedExchange8_acq:1519case clang::ARM::BI_InterlockedExchange16_acq:1520case clang::ARM::BI_InterlockedExchange_acq:1521case clang::ARM::BI_InterlockedExchange64_acq:1522return MSVCIntrin::_InterlockedExchange_acq;1523case clang::ARM::BI_InterlockedExchange8_rel:1524case clang::ARM::BI_InterlockedExchange16_rel:1525case clang::ARM::BI_InterlockedExchange_rel:1526case clang::ARM::BI_InterlockedExchange64_rel:1527return MSVCIntrin::_InterlockedExchange_rel;1528case clang::ARM::BI_InterlockedExchange8_nf:1529case clang::ARM::BI_InterlockedExchange16_nf:1530case clang::ARM::BI_InterlockedExchange_nf:1531case clang::ARM::BI_InterlockedExchange64_nf:1532return MSVCIntrin::_InterlockedExchange_nf;1533case clang::ARM::BI_InterlockedCompareExchange8_acq:1534case clang::ARM::BI_InterlockedCompareExchange16_acq:1535case clang::ARM::BI_InterlockedCompareExchange_acq:1536case clang::ARM::BI_InterlockedCompareExchange64_acq:1537return MSVCIntrin::_InterlockedCompareExchange_acq;1538case clang::ARM::BI_InterlockedCompareExchange8_rel:1539case clang::ARM::BI_InterlockedCompareExchange16_rel:1540case clang::ARM::BI_InterlockedCompareExchange_rel:1541case clang::ARM::BI_InterlockedCompareExchange64_rel:1542return MSVCIntrin::_InterlockedCompareExchange_rel;1543case clang::ARM::BI_InterlockedCompareExchange8_nf:1544case clang::ARM::BI_InterlockedCompareExchange16_nf:1545case clang::ARM::BI_InterlockedCompareExchange_nf:1546case clang::ARM::BI_InterlockedCompareExchange64_nf:1547return MSVCIntrin::_InterlockedCompareExchange_nf;1548case clang::ARM::BI_InterlockedOr8_acq:1549case clang::ARM::BI_InterlockedOr16_acq:1550case clang::ARM::BI_InterlockedOr_acq:1551case clang::ARM::BI_InterlockedOr64_acq:1552return MSVCIntrin::_InterlockedOr_acq;1553case clang::ARM::BI_InterlockedOr8_rel:1554case clang::ARM::BI_InterlockedOr16_rel:1555case clang::ARM::BI_InterlockedOr_rel:1556case clang::ARM::BI_InterlockedOr64_rel:1557return MSVCIntrin::_InterlockedOr_rel;1558case clang::ARM::BI_InterlockedOr8_nf:1559case clang::ARM::BI_InterlockedOr16_nf:1560case clang::ARM::BI_InterlockedOr_nf:1561case clang::ARM::BI_InterlockedOr64_nf:1562return MSVCIntrin::_InterlockedOr_nf;1563case clang::ARM::BI_InterlockedXor8_acq:1564case clang::ARM::BI_InterlockedXor16_acq:1565case clang::ARM::BI_InterlockedXor_acq:1566case clang::ARM::BI_InterlockedXor64_acq:1567return MSVCIntrin::_InterlockedXor_acq;1568case clang::ARM::BI_InterlockedXor8_rel:1569case clang::ARM::BI_InterlockedXor16_rel:1570case clang::ARM::BI_InterlockedXor_rel:1571case clang::ARM::BI_InterlockedXor64_rel:1572return MSVCIntrin::_InterlockedXor_rel;1573case clang::ARM::BI_InterlockedXor8_nf:1574case clang::ARM::BI_InterlockedXor16_nf:1575case clang::ARM::BI_InterlockedXor_nf:1576case clang::ARM::BI_InterlockedXor64_nf:1577return MSVCIntrin::_InterlockedXor_nf;1578case clang::ARM::BI_InterlockedAnd8_acq:1579case clang::ARM::BI_InterlockedAnd16_acq:1580case clang::ARM::BI_InterlockedAnd_acq:1581case clang::ARM::BI_InterlockedAnd64_acq:1582return MSVCIntrin::_InterlockedAnd_acq;1583case clang::ARM::BI_InterlockedAnd8_rel:1584case clang::ARM::BI_InterlockedAnd16_rel:1585case clang::ARM::BI_InterlockedAnd_rel:1586case clang::ARM::BI_InterlockedAnd64_rel:1587return MSVCIntrin::_InterlockedAnd_rel;1588case clang::ARM::BI_InterlockedAnd8_nf:1589case clang::ARM::BI_InterlockedAnd16_nf:1590case clang::ARM::BI_InterlockedAnd_nf:1591case clang::ARM::BI_InterlockedAnd64_nf:1592return MSVCIntrin::_InterlockedAnd_nf;1593case clang::ARM::BI_InterlockedIncrement16_acq:1594case clang::ARM::BI_InterlockedIncrement_acq:1595case clang::ARM::BI_InterlockedIncrement64_acq:1596return MSVCIntrin::_InterlockedIncrement_acq;1597case clang::ARM::BI_InterlockedIncrement16_rel:1598case clang::ARM::BI_InterlockedIncrement_rel:1599case clang::ARM::BI_InterlockedIncrement64_rel:1600return MSVCIntrin::_InterlockedIncrement_rel;1601case clang::ARM::BI_InterlockedIncrement16_nf:1602case clang::ARM::BI_InterlockedIncrement_nf:1603case clang::ARM::BI_InterlockedIncrement64_nf:1604return MSVCIntrin::_InterlockedIncrement_nf;1605case clang::ARM::BI_InterlockedDecrement16_acq:1606case clang::ARM::BI_InterlockedDecrement_acq:1607case clang::ARM::BI_InterlockedDecrement64_acq:1608return MSVCIntrin::_InterlockedDecrement_acq;1609case clang::ARM::BI_InterlockedDecrement16_rel:1610case clang::ARM::BI_InterlockedDecrement_rel:1611case clang::ARM::BI_InterlockedDecrement64_rel:1612return MSVCIntrin::_InterlockedDecrement_rel;1613case clang::ARM::BI_InterlockedDecrement16_nf:1614case clang::ARM::BI_InterlockedDecrement_nf:1615case clang::ARM::BI_InterlockedDecrement64_nf:1616return MSVCIntrin::_InterlockedDecrement_nf;1617}1618llvm_unreachable("must return from switch");1619}16201621static std::optional<CodeGenFunction::MSVCIntrin>1622translateAarch64ToMsvcIntrin(unsigned BuiltinID) {1623using MSVCIntrin = CodeGenFunction::MSVCIntrin;1624switch (BuiltinID) {1625default:1626return std::nullopt;1627case clang::AArch64::BI_BitScanForward:1628case clang::AArch64::BI_BitScanForward64:1629return MSVCIntrin::_BitScanForward;1630case clang::AArch64::BI_BitScanReverse:1631case clang::AArch64::BI_BitScanReverse64:1632return MSVCIntrin::_BitScanReverse;1633case clang::AArch64::BI_InterlockedAnd64:1634return MSVCIntrin::_InterlockedAnd;1635case clang::AArch64::BI_InterlockedExchange64:1636return MSVCIntrin::_InterlockedExchange;1637case clang::AArch64::BI_InterlockedExchangeAdd64:1638return MSVCIntrin::_InterlockedExchangeAdd;1639case clang::AArch64::BI_InterlockedExchangeSub64:1640return MSVCIntrin::_InterlockedExchangeSub;1641case clang::AArch64::BI_InterlockedOr64:1642return MSVCIntrin::_InterlockedOr;1643case clang::AArch64::BI_InterlockedXor64:1644return MSVCIntrin::_InterlockedXor;1645case clang::AArch64::BI_InterlockedDecrement64:1646return MSVCIntrin::_InterlockedDecrement;1647case clang::AArch64::BI_InterlockedIncrement64:1648return MSVCIntrin::_InterlockedIncrement;1649case clang::AArch64::BI_InterlockedExchangeAdd8_acq:1650case clang::AArch64::BI_InterlockedExchangeAdd16_acq:1651case clang::AArch64::BI_InterlockedExchangeAdd_acq:1652case clang::AArch64::BI_InterlockedExchangeAdd64_acq:1653return MSVCIntrin::_InterlockedExchangeAdd_acq;1654case clang::AArch64::BI_InterlockedExchangeAdd8_rel:1655case clang::AArch64::BI_InterlockedExchangeAdd16_rel:1656case clang::AArch64::BI_InterlockedExchangeAdd_rel:1657case clang::AArch64::BI_InterlockedExchangeAdd64_rel:1658return MSVCIntrin::_InterlockedExchangeAdd_rel;1659case clang::AArch64::BI_InterlockedExchangeAdd8_nf:1660case clang::AArch64::BI_InterlockedExchangeAdd16_nf:1661case clang::AArch64::BI_InterlockedExchangeAdd_nf:1662case clang::AArch64::BI_InterlockedExchangeAdd64_nf:1663return MSVCIntrin::_InterlockedExchangeAdd_nf;1664case clang::AArch64::BI_InterlockedExchange8_acq:1665case clang::AArch64::BI_InterlockedExchange16_acq:1666case clang::AArch64::BI_InterlockedExchange_acq:1667case clang::AArch64::BI_InterlockedExchange64_acq:1668return MSVCIntrin::_InterlockedExchange_acq;1669case clang::AArch64::BI_InterlockedExchange8_rel:1670case clang::AArch64::BI_InterlockedExchange16_rel:1671case clang::AArch64::BI_InterlockedExchange_rel:1672case clang::AArch64::BI_InterlockedExchange64_rel:1673return MSVCIntrin::_InterlockedExchange_rel;1674case clang::AArch64::BI_InterlockedExchange8_nf:1675case clang::AArch64::BI_InterlockedExchange16_nf:1676case clang::AArch64::BI_InterlockedExchange_nf:1677case clang::AArch64::BI_InterlockedExchange64_nf:1678return MSVCIntrin::_InterlockedExchange_nf;1679case clang::AArch64::BI_InterlockedCompareExchange8_acq:1680case clang::AArch64::BI_InterlockedCompareExchange16_acq:1681case clang::AArch64::BI_InterlockedCompareExchange_acq:1682case clang::AArch64::BI_InterlockedCompareExchange64_acq:1683return MSVCIntrin::_InterlockedCompareExchange_acq;1684case clang::AArch64::BI_InterlockedCompareExchange8_rel:1685case clang::AArch64::BI_InterlockedCompareExchange16_rel:1686case clang::AArch64::BI_InterlockedCompareExchange_rel:1687case clang::AArch64::BI_InterlockedCompareExchange64_rel:1688return MSVCIntrin::_InterlockedCompareExchange_rel;1689case clang::AArch64::BI_InterlockedCompareExchange8_nf:1690case clang::AArch64::BI_InterlockedCompareExchange16_nf:1691case clang::AArch64::BI_InterlockedCompareExchange_nf:1692case clang::AArch64::BI_InterlockedCompareExchange64_nf:1693return MSVCIntrin::_InterlockedCompareExchange_nf;1694case clang::AArch64::BI_InterlockedCompareExchange128:1695return MSVCIntrin::_InterlockedCompareExchange128;1696case clang::AArch64::BI_InterlockedCompareExchange128_acq:1697return MSVCIntrin::_InterlockedCompareExchange128_acq;1698case clang::AArch64::BI_InterlockedCompareExchange128_nf:1699return MSVCIntrin::_InterlockedCompareExchange128_nf;1700case clang::AArch64::BI_InterlockedCompareExchange128_rel:1701return MSVCIntrin::_InterlockedCompareExchange128_rel;1702case clang::AArch64::BI_InterlockedOr8_acq:1703case clang::AArch64::BI_InterlockedOr16_acq:1704case clang::AArch64::BI_InterlockedOr_acq:1705case clang::AArch64::BI_InterlockedOr64_acq:1706return MSVCIntrin::_InterlockedOr_acq;1707case clang::AArch64::BI_InterlockedOr8_rel:1708case clang::AArch64::BI_InterlockedOr16_rel:1709case clang::AArch64::BI_InterlockedOr_rel:1710case clang::AArch64::BI_InterlockedOr64_rel:1711return MSVCIntrin::_InterlockedOr_rel;1712case clang::AArch64::BI_InterlockedOr8_nf:1713case clang::AArch64::BI_InterlockedOr16_nf:1714case clang::AArch64::BI_InterlockedOr_nf:1715case clang::AArch64::BI_InterlockedOr64_nf:1716return MSVCIntrin::_InterlockedOr_nf;1717case clang::AArch64::BI_InterlockedXor8_acq:1718case clang::AArch64::BI_InterlockedXor16_acq:1719case clang::AArch64::BI_InterlockedXor_acq:1720case clang::AArch64::BI_InterlockedXor64_acq:1721return MSVCIntrin::_InterlockedXor_acq;1722case clang::AArch64::BI_InterlockedXor8_rel:1723case clang::AArch64::BI_InterlockedXor16_rel:1724case clang::AArch64::BI_InterlockedXor_rel:1725case clang::AArch64::BI_InterlockedXor64_rel:1726return MSVCIntrin::_InterlockedXor_rel;1727case clang::AArch64::BI_InterlockedXor8_nf:1728case clang::AArch64::BI_InterlockedXor16_nf:1729case clang::AArch64::BI_InterlockedXor_nf:1730case clang::AArch64::BI_InterlockedXor64_nf:1731return MSVCIntrin::_InterlockedXor_nf;1732case clang::AArch64::BI_InterlockedAnd8_acq:1733case clang::AArch64::BI_InterlockedAnd16_acq:1734case clang::AArch64::BI_InterlockedAnd_acq:1735case clang::AArch64::BI_InterlockedAnd64_acq:1736return MSVCIntrin::_InterlockedAnd_acq;1737case clang::AArch64::BI_InterlockedAnd8_rel:1738case clang::AArch64::BI_InterlockedAnd16_rel:1739case clang::AArch64::BI_InterlockedAnd_rel:1740case clang::AArch64::BI_InterlockedAnd64_rel:1741return MSVCIntrin::_InterlockedAnd_rel;1742case clang::AArch64::BI_InterlockedAnd8_nf:1743case clang::AArch64::BI_InterlockedAnd16_nf:1744case clang::AArch64::BI_InterlockedAnd_nf:1745case clang::AArch64::BI_InterlockedAnd64_nf:1746return MSVCIntrin::_InterlockedAnd_nf;1747case clang::AArch64::BI_InterlockedIncrement16_acq:1748case clang::AArch64::BI_InterlockedIncrement_acq:1749case clang::AArch64::BI_InterlockedIncrement64_acq:1750return MSVCIntrin::_InterlockedIncrement_acq;1751case clang::AArch64::BI_InterlockedIncrement16_rel:1752case clang::AArch64::BI_InterlockedIncrement_rel:1753case clang::AArch64::BI_InterlockedIncrement64_rel:1754return MSVCIntrin::_InterlockedIncrement_rel;1755case clang::AArch64::BI_InterlockedIncrement16_nf:1756case clang::AArch64::BI_InterlockedIncrement_nf:1757case clang::AArch64::BI_InterlockedIncrement64_nf:1758return MSVCIntrin::_InterlockedIncrement_nf;1759case clang::AArch64::BI_InterlockedDecrement16_acq:1760case clang::AArch64::BI_InterlockedDecrement_acq:1761case clang::AArch64::BI_InterlockedDecrement64_acq:1762return MSVCIntrin::_InterlockedDecrement_acq;1763case clang::AArch64::BI_InterlockedDecrement16_rel:1764case clang::AArch64::BI_InterlockedDecrement_rel:1765case clang::AArch64::BI_InterlockedDecrement64_rel:1766return MSVCIntrin::_InterlockedDecrement_rel;1767case clang::AArch64::BI_InterlockedDecrement16_nf:1768case clang::AArch64::BI_InterlockedDecrement_nf:1769case clang::AArch64::BI_InterlockedDecrement64_nf:1770return MSVCIntrin::_InterlockedDecrement_nf;1771}1772llvm_unreachable("must return from switch");1773}17741775static std::optional<CodeGenFunction::MSVCIntrin>1776translateX86ToMsvcIntrin(unsigned BuiltinID) {1777using MSVCIntrin = CodeGenFunction::MSVCIntrin;1778switch (BuiltinID) {1779default:1780return std::nullopt;1781case clang::X86::BI_BitScanForward:1782case clang::X86::BI_BitScanForward64:1783return MSVCIntrin::_BitScanForward;1784case clang::X86::BI_BitScanReverse:1785case clang::X86::BI_BitScanReverse64:1786return MSVCIntrin::_BitScanReverse;1787case clang::X86::BI_InterlockedAnd64:1788return MSVCIntrin::_InterlockedAnd;1789case clang::X86::BI_InterlockedCompareExchange128:1790return MSVCIntrin::_InterlockedCompareExchange128;1791case clang::X86::BI_InterlockedExchange64:1792return MSVCIntrin::_InterlockedExchange;1793case clang::X86::BI_InterlockedExchangeAdd64:1794return MSVCIntrin::_InterlockedExchangeAdd;1795case clang::X86::BI_InterlockedExchangeSub64:1796return MSVCIntrin::_InterlockedExchangeSub;1797case clang::X86::BI_InterlockedOr64:1798return MSVCIntrin::_InterlockedOr;1799case clang::X86::BI_InterlockedXor64:1800return MSVCIntrin::_InterlockedXor;1801case clang::X86::BI_InterlockedDecrement64:1802return MSVCIntrin::_InterlockedDecrement;1803case clang::X86::BI_InterlockedIncrement64:1804return MSVCIntrin::_InterlockedIncrement;1805}1806llvm_unreachable("must return from switch");1807}18081809// Emit an MSVC intrinsic. Assumes that arguments have *not* been evaluated.1810Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID,1811const CallExpr *E) {1812switch (BuiltinID) {1813case MSVCIntrin::_BitScanForward:1814case MSVCIntrin::_BitScanReverse: {1815Address IndexAddress(EmitPointerWithAlignment(E->getArg(0)));1816Value *ArgValue = EmitScalarExpr(E->getArg(1));18171818llvm::Type *ArgType = ArgValue->getType();1819llvm::Type *IndexType = IndexAddress.getElementType();1820llvm::Type *ResultType = ConvertType(E->getType());18211822Value *ArgZero = llvm::Constant::getNullValue(ArgType);1823Value *ResZero = llvm::Constant::getNullValue(ResultType);1824Value *ResOne = llvm::ConstantInt::get(ResultType, 1);18251826BasicBlock *Begin = Builder.GetInsertBlock();1827BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn);1828Builder.SetInsertPoint(End);1829PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result");18301831Builder.SetInsertPoint(Begin);1832Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero);1833BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn);1834Builder.CreateCondBr(IsZero, End, NotZero);1835Result->addIncoming(ResZero, Begin);18361837Builder.SetInsertPoint(NotZero);18381839if (BuiltinID == MSVCIntrin::_BitScanForward) {1840Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);1841Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});1842ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);1843Builder.CreateStore(ZeroCount, IndexAddress, false);1844} else {1845unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();1846Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1);18471848Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);1849Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});1850ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);1851Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount);1852Builder.CreateStore(Index, IndexAddress, false);1853}1854Builder.CreateBr(End);1855Result->addIncoming(ResOne, NotZero);18561857Builder.SetInsertPoint(End);1858return Result;1859}1860case MSVCIntrin::_InterlockedAnd:1861return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E);1862case MSVCIntrin::_InterlockedExchange:1863return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E);1864case MSVCIntrin::_InterlockedExchangeAdd:1865return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E);1866case MSVCIntrin::_InterlockedExchangeSub:1867return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E);1868case MSVCIntrin::_InterlockedOr:1869return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E);1870case MSVCIntrin::_InterlockedXor:1871return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E);1872case MSVCIntrin::_InterlockedExchangeAdd_acq:1873return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,1874AtomicOrdering::Acquire);1875case MSVCIntrin::_InterlockedExchangeAdd_rel:1876return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,1877AtomicOrdering::Release);1878case MSVCIntrin::_InterlockedExchangeAdd_nf:1879return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,1880AtomicOrdering::Monotonic);1881case MSVCIntrin::_InterlockedExchange_acq:1882return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,1883AtomicOrdering::Acquire);1884case MSVCIntrin::_InterlockedExchange_rel:1885return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,1886AtomicOrdering::Release);1887case MSVCIntrin::_InterlockedExchange_nf:1888return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,1889AtomicOrdering::Monotonic);1890case MSVCIntrin::_InterlockedCompareExchange_acq:1891return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Acquire);1892case MSVCIntrin::_InterlockedCompareExchange_rel:1893return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Release);1894case MSVCIntrin::_InterlockedCompareExchange_nf:1895return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Monotonic);1896case MSVCIntrin::_InterlockedCompareExchange128:1897return EmitAtomicCmpXchg128ForMSIntrin(1898*this, E, AtomicOrdering::SequentiallyConsistent);1899case MSVCIntrin::_InterlockedCompareExchange128_acq:1900return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Acquire);1901case MSVCIntrin::_InterlockedCompareExchange128_rel:1902return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Release);1903case MSVCIntrin::_InterlockedCompareExchange128_nf:1904return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Monotonic);1905case MSVCIntrin::_InterlockedOr_acq:1906return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,1907AtomicOrdering::Acquire);1908case MSVCIntrin::_InterlockedOr_rel:1909return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,1910AtomicOrdering::Release);1911case MSVCIntrin::_InterlockedOr_nf:1912return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,1913AtomicOrdering::Monotonic);1914case MSVCIntrin::_InterlockedXor_acq:1915return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,1916AtomicOrdering::Acquire);1917case MSVCIntrin::_InterlockedXor_rel:1918return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,1919AtomicOrdering::Release);1920case MSVCIntrin::_InterlockedXor_nf:1921return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,1922AtomicOrdering::Monotonic);1923case MSVCIntrin::_InterlockedAnd_acq:1924return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,1925AtomicOrdering::Acquire);1926case MSVCIntrin::_InterlockedAnd_rel:1927return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,1928AtomicOrdering::Release);1929case MSVCIntrin::_InterlockedAnd_nf:1930return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,1931AtomicOrdering::Monotonic);1932case MSVCIntrin::_InterlockedIncrement_acq:1933return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Acquire);1934case MSVCIntrin::_InterlockedIncrement_rel:1935return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Release);1936case MSVCIntrin::_InterlockedIncrement_nf:1937return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Monotonic);1938case MSVCIntrin::_InterlockedDecrement_acq:1939return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Acquire);1940case MSVCIntrin::_InterlockedDecrement_rel:1941return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Release);1942case MSVCIntrin::_InterlockedDecrement_nf:1943return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Monotonic);19441945case MSVCIntrin::_InterlockedDecrement:1946return EmitAtomicDecrementValue(*this, E);1947case MSVCIntrin::_InterlockedIncrement:1948return EmitAtomicIncrementValue(*this, E);19491950case MSVCIntrin::__fastfail: {1951// Request immediate process termination from the kernel. The instruction1952// sequences to do this are documented on MSDN:1953// https://msdn.microsoft.com/en-us/library/dn774154.aspx1954llvm::Triple::ArchType ISA = getTarget().getTriple().getArch();1955StringRef Asm, Constraints;1956switch (ISA) {1957default:1958ErrorUnsupported(E, "__fastfail call for this architecture");1959break;1960case llvm::Triple::x86:1961case llvm::Triple::x86_64:1962Asm = "int $$0x29";1963Constraints = "{cx}";1964break;1965case llvm::Triple::thumb:1966Asm = "udf #251";1967Constraints = "{r0}";1968break;1969case llvm::Triple::aarch64:1970Asm = "brk #0xF003";1971Constraints = "{w0}";1972}1973llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false);1974llvm::InlineAsm *IA =1975llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);1976llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(1977getLLVMContext(), llvm::AttributeList::FunctionIndex,1978llvm::Attribute::NoReturn);1979llvm::CallInst *CI = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0)));1980CI->setAttributes(NoReturnAttr);1981return CI;1982}1983}1984llvm_unreachable("Incorrect MSVC intrinsic!");1985}19861987namespace {1988// ARC cleanup for __builtin_os_log_format1989struct CallObjCArcUse final : EHScopeStack::Cleanup {1990CallObjCArcUse(llvm::Value *object) : object(object) {}1991llvm::Value *object;19921993void Emit(CodeGenFunction &CGF, Flags flags) override {1994CGF.EmitARCIntrinsicUse(object);1995}1996};1997}19981999Value *CodeGenFunction::EmitCheckedArgForBuiltin(const Expr *E,2000BuiltinCheckKind Kind) {2001assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero)2002&& "Unsupported builtin check kind");20032004Value *ArgValue = EmitScalarExpr(E);2005if (!SanOpts.has(SanitizerKind::Builtin))2006return ArgValue;20072008SanitizerScope SanScope(this);2009Value *Cond = Builder.CreateICmpNE(2010ArgValue, llvm::Constant::getNullValue(ArgValue->getType()));2011EmitCheck(std::make_pair(Cond, SanitizerKind::Builtin),2012SanitizerHandler::InvalidBuiltin,2013{EmitCheckSourceLocation(E->getExprLoc()),2014llvm::ConstantInt::get(Builder.getInt8Ty(), Kind)},2015std::nullopt);2016return ArgValue;2017}20182019static Value *EmitAbs(CodeGenFunction &CGF, Value *ArgValue, bool HasNSW) {2020return CGF.Builder.CreateBinaryIntrinsic(2021Intrinsic::abs, ArgValue,2022ConstantInt::get(CGF.Builder.getInt1Ty(), HasNSW));2023}20242025static Value *EmitOverflowCheckedAbs(CodeGenFunction &CGF, const CallExpr *E,2026bool SanitizeOverflow) {2027Value *ArgValue = CGF.EmitScalarExpr(E->getArg(0));20282029// Try to eliminate overflow check.2030if (const auto *VCI = dyn_cast<llvm::ConstantInt>(ArgValue)) {2031if (!VCI->isMinSignedValue())2032return EmitAbs(CGF, ArgValue, true);2033}20342035CodeGenFunction::SanitizerScope SanScope(&CGF);20362037Constant *Zero = Constant::getNullValue(ArgValue->getType());2038Value *ResultAndOverflow = CGF.Builder.CreateBinaryIntrinsic(2039Intrinsic::ssub_with_overflow, Zero, ArgValue);2040Value *Result = CGF.Builder.CreateExtractValue(ResultAndOverflow, 0);2041Value *NotOverflow = CGF.Builder.CreateNot(2042CGF.Builder.CreateExtractValue(ResultAndOverflow, 1));20432044// TODO: support -ftrapv-handler.2045if (SanitizeOverflow) {2046CGF.EmitCheck({{NotOverflow, SanitizerKind::SignedIntegerOverflow}},2047SanitizerHandler::NegateOverflow,2048{CGF.EmitCheckSourceLocation(E->getArg(0)->getExprLoc()),2049CGF.EmitCheckTypeDescriptor(E->getType())},2050{ArgValue});2051} else2052CGF.EmitTrapCheck(NotOverflow, SanitizerHandler::SubOverflow);20532054Value *CmpResult = CGF.Builder.CreateICmpSLT(ArgValue, Zero, "abscond");2055return CGF.Builder.CreateSelect(CmpResult, Result, ArgValue, "abs");2056}20572058/// Get the argument type for arguments to os_log_helper.2059static CanQualType getOSLogArgType(ASTContext &C, int Size) {2060QualType UnsignedTy = C.getIntTypeForBitwidth(Size * 8, /*Signed=*/false);2061return C.getCanonicalType(UnsignedTy);2062}20632064llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction(2065const analyze_os_log::OSLogBufferLayout &Layout,2066CharUnits BufferAlignment) {2067ASTContext &Ctx = getContext();20682069llvm::SmallString<64> Name;2070{2071raw_svector_ostream OS(Name);2072OS << "__os_log_helper";2073OS << "_" << BufferAlignment.getQuantity();2074OS << "_" << int(Layout.getSummaryByte());2075OS << "_" << int(Layout.getNumArgsByte());2076for (const auto &Item : Layout.Items)2077OS << "_" << int(Item.getSizeByte()) << "_"2078<< int(Item.getDescriptorByte());2079}20802081if (llvm::Function *F = CGM.getModule().getFunction(Name))2082return F;20832084llvm::SmallVector<QualType, 4> ArgTys;2085FunctionArgList Args;2086Args.push_back(ImplicitParamDecl::Create(2087Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"), Ctx.VoidPtrTy,2088ImplicitParamKind::Other));2089ArgTys.emplace_back(Ctx.VoidPtrTy);20902091for (unsigned int I = 0, E = Layout.Items.size(); I < E; ++I) {2092char Size = Layout.Items[I].getSizeByte();2093if (!Size)2094continue;20952096QualType ArgTy = getOSLogArgType(Ctx, Size);2097Args.push_back(ImplicitParamDecl::Create(2098Ctx, nullptr, SourceLocation(),2099&Ctx.Idents.get(std::string("arg") + llvm::to_string(I)), ArgTy,2100ImplicitParamKind::Other));2101ArgTys.emplace_back(ArgTy);2102}21032104QualType ReturnTy = Ctx.VoidTy;21052106// The helper function has linkonce_odr linkage to enable the linker to merge2107// identical functions. To ensure the merging always happens, 'noinline' is2108// attached to the function when compiling with -Oz.2109const CGFunctionInfo &FI =2110CGM.getTypes().arrangeBuiltinFunctionDeclaration(ReturnTy, Args);2111llvm::FunctionType *FuncTy = CGM.getTypes().GetFunctionType(FI);2112llvm::Function *Fn = llvm::Function::Create(2113FuncTy, llvm::GlobalValue::LinkOnceODRLinkage, Name, &CGM.getModule());2114Fn->setVisibility(llvm::GlobalValue::HiddenVisibility);2115CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, Fn, /*IsThunk=*/false);2116CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Fn);2117Fn->setDoesNotThrow();21182119// Attach 'noinline' at -Oz.2120if (CGM.getCodeGenOpts().OptimizeSize == 2)2121Fn->addFnAttr(llvm::Attribute::NoInline);21222123auto NL = ApplyDebugLocation::CreateEmpty(*this);2124StartFunction(GlobalDecl(), ReturnTy, Fn, FI, Args);21252126// Create a scope with an artificial location for the body of this function.2127auto AL = ApplyDebugLocation::CreateArtificial(*this);21282129CharUnits Offset;2130Address BufAddr = makeNaturalAddressForPointer(2131Builder.CreateLoad(GetAddrOfLocalVar(Args[0]), "buf"), Ctx.VoidTy,2132BufferAlignment);2133Builder.CreateStore(Builder.getInt8(Layout.getSummaryByte()),2134Builder.CreateConstByteGEP(BufAddr, Offset++, "summary"));2135Builder.CreateStore(Builder.getInt8(Layout.getNumArgsByte()),2136Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs"));21372138unsigned I = 1;2139for (const auto &Item : Layout.Items) {2140Builder.CreateStore(2141Builder.getInt8(Item.getDescriptorByte()),2142Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor"));2143Builder.CreateStore(2144Builder.getInt8(Item.getSizeByte()),2145Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize"));21462147CharUnits Size = Item.size();2148if (!Size.getQuantity())2149continue;21502151Address Arg = GetAddrOfLocalVar(Args[I]);2152Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset, "argData");2153Addr = Addr.withElementType(Arg.getElementType());2154Builder.CreateStore(Builder.CreateLoad(Arg), Addr);2155Offset += Size;2156++I;2157}21582159FinishFunction();21602161return Fn;2162}21632164RValue CodeGenFunction::emitBuiltinOSLogFormat(const CallExpr &E) {2165assert(E.getNumArgs() >= 2 &&2166"__builtin_os_log_format takes at least 2 arguments");2167ASTContext &Ctx = getContext();2168analyze_os_log::OSLogBufferLayout Layout;2169analyze_os_log::computeOSLogBufferLayout(Ctx, &E, Layout);2170Address BufAddr = EmitPointerWithAlignment(E.getArg(0));2171llvm::SmallVector<llvm::Value *, 4> RetainableOperands;21722173// Ignore argument 1, the format string. It is not currently used.2174CallArgList Args;2175Args.add(RValue::get(BufAddr.emitRawPointer(*this)), Ctx.VoidPtrTy);21762177for (const auto &Item : Layout.Items) {2178int Size = Item.getSizeByte();2179if (!Size)2180continue;21812182llvm::Value *ArgVal;21832184if (Item.getKind() == analyze_os_log::OSLogBufferItem::MaskKind) {2185uint64_t Val = 0;2186for (unsigned I = 0, E = Item.getMaskType().size(); I < E; ++I)2187Val |= ((uint64_t)Item.getMaskType()[I]) << I * 8;2188ArgVal = llvm::Constant::getIntegerValue(Int64Ty, llvm::APInt(64, Val));2189} else if (const Expr *TheExpr = Item.getExpr()) {2190ArgVal = EmitScalarExpr(TheExpr, /*Ignore*/ false);21912192// If a temporary object that requires destruction after the full2193// expression is passed, push a lifetime-extended cleanup to extend its2194// lifetime to the end of the enclosing block scope.2195auto LifetimeExtendObject = [&](const Expr *E) {2196E = E->IgnoreParenCasts();2197// Extend lifetimes of objects returned by function calls and message2198// sends.21992200// FIXME: We should do this in other cases in which temporaries are2201// created including arguments of non-ARC types (e.g., C++2202// temporaries).2203if (isa<CallExpr>(E) || isa<ObjCMessageExpr>(E))2204return true;2205return false;2206};22072208if (TheExpr->getType()->isObjCRetainableType() &&2209getLangOpts().ObjCAutoRefCount && LifetimeExtendObject(TheExpr)) {2210assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar &&2211"Only scalar can be a ObjC retainable type");2212if (!isa<Constant>(ArgVal)) {2213CleanupKind Cleanup = getARCCleanupKind();2214QualType Ty = TheExpr->getType();2215RawAddress Alloca = RawAddress::invalid();2216RawAddress Addr = CreateMemTemp(Ty, "os.log.arg", &Alloca);2217ArgVal = EmitARCRetain(Ty, ArgVal);2218Builder.CreateStore(ArgVal, Addr);2219pushLifetimeExtendedDestroy(Cleanup, Alloca, Ty,2220CodeGenFunction::destroyARCStrongPrecise,2221Cleanup & EHCleanup);22222223// Push a clang.arc.use call to ensure ARC optimizer knows that the2224// argument has to be alive.2225if (CGM.getCodeGenOpts().OptimizationLevel != 0)2226pushCleanupAfterFullExpr<CallObjCArcUse>(Cleanup, ArgVal);2227}2228}2229} else {2230ArgVal = Builder.getInt32(Item.getConstValue().getQuantity());2231}22322233unsigned ArgValSize =2234CGM.getDataLayout().getTypeSizeInBits(ArgVal->getType());2235llvm::IntegerType *IntTy = llvm::Type::getIntNTy(getLLVMContext(),2236ArgValSize);2237ArgVal = Builder.CreateBitOrPointerCast(ArgVal, IntTy);2238CanQualType ArgTy = getOSLogArgType(Ctx, Size);2239// If ArgVal has type x86_fp80, zero-extend ArgVal.2240ArgVal = Builder.CreateZExtOrBitCast(ArgVal, ConvertType(ArgTy));2241Args.add(RValue::get(ArgVal), ArgTy);2242}22432244const CGFunctionInfo &FI =2245CGM.getTypes().arrangeBuiltinFunctionCall(Ctx.VoidTy, Args);2246llvm::Function *F = CodeGenFunction(CGM).generateBuiltinOSLogHelperFunction(2247Layout, BufAddr.getAlignment());2248EmitCall(FI, CGCallee::forDirect(F), ReturnValueSlot(), Args);2249return RValue::get(BufAddr, *this);2250}22512252static bool isSpecialUnsignedMultiplySignedResult(2253unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info,2254WidthAndSignedness ResultInfo) {2255return BuiltinID == Builtin::BI__builtin_mul_overflow &&2256Op1Info.Width == Op2Info.Width && Op2Info.Width == ResultInfo.Width &&2257!Op1Info.Signed && !Op2Info.Signed && ResultInfo.Signed;2258}22592260static RValue EmitCheckedUnsignedMultiplySignedResult(2261CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info,2262const clang::Expr *Op2, WidthAndSignedness Op2Info,2263const clang::Expr *ResultArg, QualType ResultQTy,2264WidthAndSignedness ResultInfo) {2265assert(isSpecialUnsignedMultiplySignedResult(2266Builtin::BI__builtin_mul_overflow, Op1Info, Op2Info, ResultInfo) &&2267"Cannot specialize this multiply");22682269llvm::Value *V1 = CGF.EmitScalarExpr(Op1);2270llvm::Value *V2 = CGF.EmitScalarExpr(Op2);22712272llvm::Value *HasOverflow;2273llvm::Value *Result = EmitOverflowIntrinsic(2274CGF, llvm::Intrinsic::umul_with_overflow, V1, V2, HasOverflow);22752276// The intrinsic call will detect overflow when the value is > UINT_MAX,2277// however, since the original builtin had a signed result, we need to report2278// an overflow when the result is greater than INT_MAX.2279auto IntMax = llvm::APInt::getSignedMaxValue(ResultInfo.Width);2280llvm::Value *IntMaxValue = llvm::ConstantInt::get(Result->getType(), IntMax);22812282llvm::Value *IntMaxOverflow = CGF.Builder.CreateICmpUGT(Result, IntMaxValue);2283HasOverflow = CGF.Builder.CreateOr(HasOverflow, IntMaxOverflow);22842285bool isVolatile =2286ResultArg->getType()->getPointeeType().isVolatileQualified();2287Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);2288CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,2289isVolatile);2290return RValue::get(HasOverflow);2291}22922293/// Determine if a binop is a checked mixed-sign multiply we can specialize.2294static bool isSpecialMixedSignMultiply(unsigned BuiltinID,2295WidthAndSignedness Op1Info,2296WidthAndSignedness Op2Info,2297WidthAndSignedness ResultInfo) {2298return BuiltinID == Builtin::BI__builtin_mul_overflow &&2299std::max(Op1Info.Width, Op2Info.Width) >= ResultInfo.Width &&2300Op1Info.Signed != Op2Info.Signed;2301}23022303/// Emit a checked mixed-sign multiply. This is a cheaper specialization of2304/// the generic checked-binop irgen.2305static RValue2306EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1,2307WidthAndSignedness Op1Info, const clang::Expr *Op2,2308WidthAndSignedness Op2Info,2309const clang::Expr *ResultArg, QualType ResultQTy,2310WidthAndSignedness ResultInfo) {2311assert(isSpecialMixedSignMultiply(Builtin::BI__builtin_mul_overflow, Op1Info,2312Op2Info, ResultInfo) &&2313"Not a mixed-sign multipliction we can specialize");23142315// Emit the signed and unsigned operands.2316const clang::Expr *SignedOp = Op1Info.Signed ? Op1 : Op2;2317const clang::Expr *UnsignedOp = Op1Info.Signed ? Op2 : Op1;2318llvm::Value *Signed = CGF.EmitScalarExpr(SignedOp);2319llvm::Value *Unsigned = CGF.EmitScalarExpr(UnsignedOp);2320unsigned SignedOpWidth = Op1Info.Signed ? Op1Info.Width : Op2Info.Width;2321unsigned UnsignedOpWidth = Op1Info.Signed ? Op2Info.Width : Op1Info.Width;23222323// One of the operands may be smaller than the other. If so, [s|z]ext it.2324if (SignedOpWidth < UnsignedOpWidth)2325Signed = CGF.Builder.CreateSExt(Signed, Unsigned->getType(), "op.sext");2326if (UnsignedOpWidth < SignedOpWidth)2327Unsigned = CGF.Builder.CreateZExt(Unsigned, Signed->getType(), "op.zext");23282329llvm::Type *OpTy = Signed->getType();2330llvm::Value *Zero = llvm::Constant::getNullValue(OpTy);2331Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);2332llvm::Type *ResTy = ResultPtr.getElementType();2333unsigned OpWidth = std::max(Op1Info.Width, Op2Info.Width);23342335// Take the absolute value of the signed operand.2336llvm::Value *IsNegative = CGF.Builder.CreateICmpSLT(Signed, Zero);2337llvm::Value *AbsOfNegative = CGF.Builder.CreateSub(Zero, Signed);2338llvm::Value *AbsSigned =2339CGF.Builder.CreateSelect(IsNegative, AbsOfNegative, Signed);23402341// Perform a checked unsigned multiplication.2342llvm::Value *UnsignedOverflow;2343llvm::Value *UnsignedResult =2344EmitOverflowIntrinsic(CGF, llvm::Intrinsic::umul_with_overflow, AbsSigned,2345Unsigned, UnsignedOverflow);23462347llvm::Value *Overflow, *Result;2348if (ResultInfo.Signed) {2349// Signed overflow occurs if the result is greater than INT_MAX or lesser2350// than INT_MIN, i.e when |Result| > (INT_MAX + IsNegative).2351auto IntMax =2352llvm::APInt::getSignedMaxValue(ResultInfo.Width).zext(OpWidth);2353llvm::Value *MaxResult =2354CGF.Builder.CreateAdd(llvm::ConstantInt::get(OpTy, IntMax),2355CGF.Builder.CreateZExt(IsNegative, OpTy));2356llvm::Value *SignedOverflow =2357CGF.Builder.CreateICmpUGT(UnsignedResult, MaxResult);2358Overflow = CGF.Builder.CreateOr(UnsignedOverflow, SignedOverflow);23592360// Prepare the signed result (possibly by negating it).2361llvm::Value *NegativeResult = CGF.Builder.CreateNeg(UnsignedResult);2362llvm::Value *SignedResult =2363CGF.Builder.CreateSelect(IsNegative, NegativeResult, UnsignedResult);2364Result = CGF.Builder.CreateTrunc(SignedResult, ResTy);2365} else {2366// Unsigned overflow occurs if the result is < 0 or greater than UINT_MAX.2367llvm::Value *Underflow = CGF.Builder.CreateAnd(2368IsNegative, CGF.Builder.CreateIsNotNull(UnsignedResult));2369Overflow = CGF.Builder.CreateOr(UnsignedOverflow, Underflow);2370if (ResultInfo.Width < OpWidth) {2371auto IntMax =2372llvm::APInt::getMaxValue(ResultInfo.Width).zext(OpWidth);2373llvm::Value *TruncOverflow = CGF.Builder.CreateICmpUGT(2374UnsignedResult, llvm::ConstantInt::get(OpTy, IntMax));2375Overflow = CGF.Builder.CreateOr(Overflow, TruncOverflow);2376}23772378// Negate the product if it would be negative in infinite precision.2379Result = CGF.Builder.CreateSelect(2380IsNegative, CGF.Builder.CreateNeg(UnsignedResult), UnsignedResult);23812382Result = CGF.Builder.CreateTrunc(Result, ResTy);2383}2384assert(Overflow && Result && "Missing overflow or result");23852386bool isVolatile =2387ResultArg->getType()->getPointeeType().isVolatileQualified();2388CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,2389isVolatile);2390return RValue::get(Overflow);2391}23922393static bool2394TypeRequiresBuiltinLaunderImp(const ASTContext &Ctx, QualType Ty,2395llvm::SmallPtrSetImpl<const Decl *> &Seen) {2396if (const auto *Arr = Ctx.getAsArrayType(Ty))2397Ty = Ctx.getBaseElementType(Arr);23982399const auto *Record = Ty->getAsCXXRecordDecl();2400if (!Record)2401return false;24022403// We've already checked this type, or are in the process of checking it.2404if (!Seen.insert(Record).second)2405return false;24062407assert(Record->hasDefinition() &&2408"Incomplete types should already be diagnosed");24092410if (Record->isDynamicClass())2411return true;24122413for (FieldDecl *F : Record->fields()) {2414if (TypeRequiresBuiltinLaunderImp(Ctx, F->getType(), Seen))2415return true;2416}2417return false;2418}24192420/// Determine if the specified type requires laundering by checking if it is a2421/// dynamic class type or contains a subobject which is a dynamic class type.2422static bool TypeRequiresBuiltinLaunder(CodeGenModule &CGM, QualType Ty) {2423if (!CGM.getCodeGenOpts().StrictVTablePointers)2424return false;2425llvm::SmallPtrSet<const Decl *, 16> Seen;2426return TypeRequiresBuiltinLaunderImp(CGM.getContext(), Ty, Seen);2427}24282429RValue CodeGenFunction::emitRotate(const CallExpr *E, bool IsRotateRight) {2430llvm::Value *Src = EmitScalarExpr(E->getArg(0));2431llvm::Value *ShiftAmt = EmitScalarExpr(E->getArg(1));24322433// The builtin's shift arg may have a different type than the source arg and2434// result, but the LLVM intrinsic uses the same type for all values.2435llvm::Type *Ty = Src->getType();2436ShiftAmt = Builder.CreateIntCast(ShiftAmt, Ty, false);24372438// Rotate is a special case of LLVM funnel shift - 1st 2 args are the same.2439unsigned IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;2440Function *F = CGM.getIntrinsic(IID, Ty);2441return RValue::get(Builder.CreateCall(F, { Src, Src, ShiftAmt }));2442}24432444// Map math builtins for long-double to f128 version.2445static unsigned mutateLongDoubleBuiltin(unsigned BuiltinID) {2446switch (BuiltinID) {2447#define MUTATE_LDBL(func) \2448case Builtin::BI__builtin_##func##l: \2449return Builtin::BI__builtin_##func##f128;2450MUTATE_LDBL(sqrt)2451MUTATE_LDBL(cbrt)2452MUTATE_LDBL(fabs)2453MUTATE_LDBL(log)2454MUTATE_LDBL(log2)2455MUTATE_LDBL(log10)2456MUTATE_LDBL(log1p)2457MUTATE_LDBL(logb)2458MUTATE_LDBL(exp)2459MUTATE_LDBL(exp2)2460MUTATE_LDBL(expm1)2461MUTATE_LDBL(fdim)2462MUTATE_LDBL(hypot)2463MUTATE_LDBL(ilogb)2464MUTATE_LDBL(pow)2465MUTATE_LDBL(fmin)2466MUTATE_LDBL(fmax)2467MUTATE_LDBL(ceil)2468MUTATE_LDBL(trunc)2469MUTATE_LDBL(rint)2470MUTATE_LDBL(nearbyint)2471MUTATE_LDBL(round)2472MUTATE_LDBL(floor)2473MUTATE_LDBL(lround)2474MUTATE_LDBL(llround)2475MUTATE_LDBL(lrint)2476MUTATE_LDBL(llrint)2477MUTATE_LDBL(fmod)2478MUTATE_LDBL(modf)2479MUTATE_LDBL(nan)2480MUTATE_LDBL(nans)2481MUTATE_LDBL(inf)2482MUTATE_LDBL(fma)2483MUTATE_LDBL(sin)2484MUTATE_LDBL(cos)2485MUTATE_LDBL(tan)2486MUTATE_LDBL(sinh)2487MUTATE_LDBL(cosh)2488MUTATE_LDBL(tanh)2489MUTATE_LDBL(asin)2490MUTATE_LDBL(acos)2491MUTATE_LDBL(atan)2492MUTATE_LDBL(asinh)2493MUTATE_LDBL(acosh)2494MUTATE_LDBL(atanh)2495MUTATE_LDBL(atan2)2496MUTATE_LDBL(erf)2497MUTATE_LDBL(erfc)2498MUTATE_LDBL(ldexp)2499MUTATE_LDBL(frexp)2500MUTATE_LDBL(huge_val)2501MUTATE_LDBL(copysign)2502MUTATE_LDBL(nextafter)2503MUTATE_LDBL(nexttoward)2504MUTATE_LDBL(remainder)2505MUTATE_LDBL(remquo)2506MUTATE_LDBL(scalbln)2507MUTATE_LDBL(scalbn)2508MUTATE_LDBL(tgamma)2509MUTATE_LDBL(lgamma)2510#undef MUTATE_LDBL2511default:2512return BuiltinID;2513}2514}25152516static Value *tryUseTestFPKind(CodeGenFunction &CGF, unsigned BuiltinID,2517Value *V) {2518if (CGF.Builder.getIsFPConstrained() &&2519CGF.Builder.getDefaultConstrainedExcept() != fp::ebIgnore) {2520if (Value *Result =2521CGF.getTargetHooks().testFPKind(V, BuiltinID, CGF.Builder, CGF.CGM))2522return Result;2523}2524return nullptr;2525}25262527static RValue EmitHipStdParUnsupportedBuiltin(CodeGenFunction *CGF,2528const FunctionDecl *FD) {2529auto Name = FD->getNameAsString() + "__hipstdpar_unsupported";2530auto FnTy = CGF->CGM.getTypes().GetFunctionType(FD);2531auto UBF = CGF->CGM.getModule().getOrInsertFunction(Name, FnTy);25322533SmallVector<Value *, 16> Args;2534for (auto &&FormalTy : FnTy->params())2535Args.push_back(llvm::PoisonValue::get(FormalTy));25362537return RValue::get(CGF->Builder.CreateCall(UBF, Args));2538}25392540RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,2541const CallExpr *E,2542ReturnValueSlot ReturnValue) {2543const FunctionDecl *FD = GD.getDecl()->getAsFunction();2544// See if we can constant fold this builtin. If so, don't emit it at all.2545// TODO: Extend this handling to all builtin calls that we can constant-fold.2546Expr::EvalResult Result;2547if (E->isPRValue() && E->EvaluateAsRValue(Result, CGM.getContext()) &&2548!Result.hasSideEffects()) {2549if (Result.Val.isInt())2550return RValue::get(llvm::ConstantInt::get(getLLVMContext(),2551Result.Val.getInt()));2552if (Result.Val.isFloat())2553return RValue::get(llvm::ConstantFP::get(getLLVMContext(),2554Result.Val.getFloat()));2555}25562557// If current long-double semantics is IEEE 128-bit, replace math builtins2558// of long-double with f128 equivalent.2559// TODO: This mutation should also be applied to other targets other than PPC,2560// after backend supports IEEE 128-bit style libcalls.2561if (getTarget().getTriple().isPPC64() &&2562&getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad())2563BuiltinID = mutateLongDoubleBuiltin(BuiltinID);25642565// If the builtin has been declared explicitly with an assembler label,2566// disable the specialized emitting below. Ideally we should communicate the2567// rename in IR, or at least avoid generating the intrinsic calls that are2568// likely to get lowered to the renamed library functions.2569const unsigned BuiltinIDIfNoAsmLabel =2570FD->hasAttr<AsmLabelAttr>() ? 0 : BuiltinID;25712572std::optional<bool> ErrnoOverriden;2573// ErrnoOverriden is true if math-errno is overriden via the2574// '#pragma float_control(precise, on)'. This pragma disables fast-math,2575// which implies math-errno.2576if (E->hasStoredFPFeatures()) {2577FPOptionsOverride OP = E->getFPFeatures();2578if (OP.hasMathErrnoOverride())2579ErrnoOverriden = OP.getMathErrnoOverride();2580}2581// True if 'attribute__((optnone))' is used. This attribute overrides2582// fast-math which implies math-errno.2583bool OptNone = CurFuncDecl && CurFuncDecl->hasAttr<OptimizeNoneAttr>();25842585// True if we are compiling at -O2 and errno has been disabled2586// using the '#pragma float_control(precise, off)', and2587// attribute opt-none hasn't been seen.2588bool ErrnoOverridenToFalseWithOpt =2589ErrnoOverriden.has_value() && !ErrnoOverriden.value() && !OptNone &&2590CGM.getCodeGenOpts().OptimizationLevel != 0;25912592// There are LLVM math intrinsics/instructions corresponding to math library2593// functions except the LLVM op will never set errno while the math library2594// might. Also, math builtins have the same semantics as their math library2595// twins. Thus, we can transform math library and builtin calls to their2596// LLVM counterparts if the call is marked 'const' (known to never set errno).2597// In case FP exceptions are enabled, the experimental versions of the2598// intrinsics model those.2599bool ConstAlways =2600getContext().BuiltinInfo.isConst(BuiltinID);26012602// There's a special case with the fma builtins where they are always const2603// if the target environment is GNU or the target is OS is Windows and we're2604// targeting the MSVCRT.dll environment.2605// FIXME: This list can be become outdated. Need to find a way to get it some2606// other way.2607switch (BuiltinID) {2608case Builtin::BI__builtin_fma:2609case Builtin::BI__builtin_fmaf:2610case Builtin::BI__builtin_fmal:2611case Builtin::BI__builtin_fmaf16:2612case Builtin::BIfma:2613case Builtin::BIfmaf:2614case Builtin::BIfmal: {2615auto &Trip = CGM.getTriple();2616if (Trip.isGNUEnvironment() || Trip.isOSMSVCRT())2617ConstAlways = true;2618break;2619}2620default:2621break;2622}26232624bool ConstWithoutErrnoAndExceptions =2625getContext().BuiltinInfo.isConstWithoutErrnoAndExceptions(BuiltinID);2626bool ConstWithoutExceptions =2627getContext().BuiltinInfo.isConstWithoutExceptions(BuiltinID);26282629// ConstAttr is enabled in fast-math mode. In fast-math mode, math-errno is2630// disabled.2631// Math intrinsics are generated only when math-errno is disabled. Any pragmas2632// or attributes that affect math-errno should prevent or allow math2633// intrincs to be generated. Intrinsics are generated:2634// 1- In fast math mode, unless math-errno is overriden2635// via '#pragma float_control(precise, on)', or via an2636// 'attribute__((optnone))'.2637// 2- If math-errno was enabled on command line but overriden2638// to false via '#pragma float_control(precise, off))' and2639// 'attribute__((optnone))' hasn't been used.2640// 3- If we are compiling with optimization and errno has been disabled2641// via '#pragma float_control(precise, off)', and2642// 'attribute__((optnone))' hasn't been used.26432644bool ConstWithoutErrnoOrExceptions =2645ConstWithoutErrnoAndExceptions || ConstWithoutExceptions;2646bool GenerateIntrinsics =2647(ConstAlways && !OptNone) ||2648(!getLangOpts().MathErrno &&2649!(ErrnoOverriden.has_value() && ErrnoOverriden.value()) && !OptNone);2650if (!GenerateIntrinsics) {2651GenerateIntrinsics =2652ConstWithoutErrnoOrExceptions && !ConstWithoutErrnoAndExceptions;2653if (!GenerateIntrinsics)2654GenerateIntrinsics =2655ConstWithoutErrnoOrExceptions &&2656(!getLangOpts().MathErrno &&2657!(ErrnoOverriden.has_value() && ErrnoOverriden.value()) && !OptNone);2658if (!GenerateIntrinsics)2659GenerateIntrinsics =2660ConstWithoutErrnoOrExceptions && ErrnoOverridenToFalseWithOpt;2661}2662if (GenerateIntrinsics) {2663switch (BuiltinIDIfNoAsmLabel) {2664case Builtin::BIacos:2665case Builtin::BIacosf:2666case Builtin::BIacosl:2667case Builtin::BI__builtin_acos:2668case Builtin::BI__builtin_acosf:2669case Builtin::BI__builtin_acosf16:2670case Builtin::BI__builtin_acosl:2671case Builtin::BI__builtin_acosf128:2672return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(2673*this, E, Intrinsic::acos, Intrinsic::experimental_constrained_acos));26742675case Builtin::BIasin:2676case Builtin::BIasinf:2677case Builtin::BIasinl:2678case Builtin::BI__builtin_asin:2679case Builtin::BI__builtin_asinf:2680case Builtin::BI__builtin_asinf16:2681case Builtin::BI__builtin_asinl:2682case Builtin::BI__builtin_asinf128:2683return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(2684*this, E, Intrinsic::asin, Intrinsic::experimental_constrained_asin));26852686case Builtin::BIatan:2687case Builtin::BIatanf:2688case Builtin::BIatanl:2689case Builtin::BI__builtin_atan:2690case Builtin::BI__builtin_atanf:2691case Builtin::BI__builtin_atanf16:2692case Builtin::BI__builtin_atanl:2693case Builtin::BI__builtin_atanf128:2694return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(2695*this, E, Intrinsic::atan, Intrinsic::experimental_constrained_atan));26962697case Builtin::BIceil:2698case Builtin::BIceilf:2699case Builtin::BIceill:2700case Builtin::BI__builtin_ceil:2701case Builtin::BI__builtin_ceilf:2702case Builtin::BI__builtin_ceilf16:2703case Builtin::BI__builtin_ceill:2704case Builtin::BI__builtin_ceilf128:2705return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,2706Intrinsic::ceil,2707Intrinsic::experimental_constrained_ceil));27082709case Builtin::BIcopysign:2710case Builtin::BIcopysignf:2711case Builtin::BIcopysignl:2712case Builtin::BI__builtin_copysign:2713case Builtin::BI__builtin_copysignf:2714case Builtin::BI__builtin_copysignf16:2715case Builtin::BI__builtin_copysignl:2716case Builtin::BI__builtin_copysignf128:2717return RValue::get(2718emitBuiltinWithOneOverloadedType<2>(*this, E, Intrinsic::copysign));27192720case Builtin::BIcos:2721case Builtin::BIcosf:2722case Builtin::BIcosl:2723case Builtin::BI__builtin_cos:2724case Builtin::BI__builtin_cosf:2725case Builtin::BI__builtin_cosf16:2726case Builtin::BI__builtin_cosl:2727case Builtin::BI__builtin_cosf128:2728return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,2729Intrinsic::cos,2730Intrinsic::experimental_constrained_cos));27312732case Builtin::BIcosh:2733case Builtin::BIcoshf:2734case Builtin::BIcoshl:2735case Builtin::BI__builtin_cosh:2736case Builtin::BI__builtin_coshf:2737case Builtin::BI__builtin_coshf16:2738case Builtin::BI__builtin_coshl:2739case Builtin::BI__builtin_coshf128:2740return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(2741*this, E, Intrinsic::cosh, Intrinsic::experimental_constrained_cosh));27422743case Builtin::BIexp:2744case Builtin::BIexpf:2745case Builtin::BIexpl:2746case Builtin::BI__builtin_exp:2747case Builtin::BI__builtin_expf:2748case Builtin::BI__builtin_expf16:2749case Builtin::BI__builtin_expl:2750case Builtin::BI__builtin_expf128:2751return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,2752Intrinsic::exp,2753Intrinsic::experimental_constrained_exp));27542755case Builtin::BIexp2:2756case Builtin::BIexp2f:2757case Builtin::BIexp2l:2758case Builtin::BI__builtin_exp2:2759case Builtin::BI__builtin_exp2f:2760case Builtin::BI__builtin_exp2f16:2761case Builtin::BI__builtin_exp2l:2762case Builtin::BI__builtin_exp2f128:2763return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,2764Intrinsic::exp2,2765Intrinsic::experimental_constrained_exp2));2766case Builtin::BI__builtin_exp10:2767case Builtin::BI__builtin_exp10f:2768case Builtin::BI__builtin_exp10f16:2769case Builtin::BI__builtin_exp10l:2770case Builtin::BI__builtin_exp10f128: {2771// TODO: strictfp support2772if (Builder.getIsFPConstrained())2773break;2774return RValue::get(2775emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::exp10));2776}2777case Builtin::BIfabs:2778case Builtin::BIfabsf:2779case Builtin::BIfabsl:2780case Builtin::BI__builtin_fabs:2781case Builtin::BI__builtin_fabsf:2782case Builtin::BI__builtin_fabsf16:2783case Builtin::BI__builtin_fabsl:2784case Builtin::BI__builtin_fabsf128:2785return RValue::get(2786emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::fabs));27872788case Builtin::BIfloor:2789case Builtin::BIfloorf:2790case Builtin::BIfloorl:2791case Builtin::BI__builtin_floor:2792case Builtin::BI__builtin_floorf:2793case Builtin::BI__builtin_floorf16:2794case Builtin::BI__builtin_floorl:2795case Builtin::BI__builtin_floorf128:2796return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,2797Intrinsic::floor,2798Intrinsic::experimental_constrained_floor));27992800case Builtin::BIfma:2801case Builtin::BIfmaf:2802case Builtin::BIfmal:2803case Builtin::BI__builtin_fma:2804case Builtin::BI__builtin_fmaf:2805case Builtin::BI__builtin_fmaf16:2806case Builtin::BI__builtin_fmal:2807case Builtin::BI__builtin_fmaf128:2808return RValue::get(emitTernaryMaybeConstrainedFPBuiltin(*this, E,2809Intrinsic::fma,2810Intrinsic::experimental_constrained_fma));28112812case Builtin::BIfmax:2813case Builtin::BIfmaxf:2814case Builtin::BIfmaxl:2815case Builtin::BI__builtin_fmax:2816case Builtin::BI__builtin_fmaxf:2817case Builtin::BI__builtin_fmaxf16:2818case Builtin::BI__builtin_fmaxl:2819case Builtin::BI__builtin_fmaxf128:2820return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E,2821Intrinsic::maxnum,2822Intrinsic::experimental_constrained_maxnum));28232824case Builtin::BIfmin:2825case Builtin::BIfminf:2826case Builtin::BIfminl:2827case Builtin::BI__builtin_fmin:2828case Builtin::BI__builtin_fminf:2829case Builtin::BI__builtin_fminf16:2830case Builtin::BI__builtin_fminl:2831case Builtin::BI__builtin_fminf128:2832return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E,2833Intrinsic::minnum,2834Intrinsic::experimental_constrained_minnum));28352836// fmod() is a special-case. It maps to the frem instruction rather than an2837// LLVM intrinsic.2838case Builtin::BIfmod:2839case Builtin::BIfmodf:2840case Builtin::BIfmodl:2841case Builtin::BI__builtin_fmod:2842case Builtin::BI__builtin_fmodf:2843case Builtin::BI__builtin_fmodf16:2844case Builtin::BI__builtin_fmodl:2845case Builtin::BI__builtin_fmodf128: {2846CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);2847Value *Arg1 = EmitScalarExpr(E->getArg(0));2848Value *Arg2 = EmitScalarExpr(E->getArg(1));2849return RValue::get(Builder.CreateFRem(Arg1, Arg2, "fmod"));2850}28512852case Builtin::BIlog:2853case Builtin::BIlogf:2854case Builtin::BIlogl:2855case Builtin::BI__builtin_log:2856case Builtin::BI__builtin_logf:2857case Builtin::BI__builtin_logf16:2858case Builtin::BI__builtin_logl:2859case Builtin::BI__builtin_logf128:2860return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,2861Intrinsic::log,2862Intrinsic::experimental_constrained_log));28632864case Builtin::BIlog10:2865case Builtin::BIlog10f:2866case Builtin::BIlog10l:2867case Builtin::BI__builtin_log10:2868case Builtin::BI__builtin_log10f:2869case Builtin::BI__builtin_log10f16:2870case Builtin::BI__builtin_log10l:2871case Builtin::BI__builtin_log10f128:2872return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,2873Intrinsic::log10,2874Intrinsic::experimental_constrained_log10));28752876case Builtin::BIlog2:2877case Builtin::BIlog2f:2878case Builtin::BIlog2l:2879case Builtin::BI__builtin_log2:2880case Builtin::BI__builtin_log2f:2881case Builtin::BI__builtin_log2f16:2882case Builtin::BI__builtin_log2l:2883case Builtin::BI__builtin_log2f128:2884return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,2885Intrinsic::log2,2886Intrinsic::experimental_constrained_log2));28872888case Builtin::BInearbyint:2889case Builtin::BInearbyintf:2890case Builtin::BInearbyintl:2891case Builtin::BI__builtin_nearbyint:2892case Builtin::BI__builtin_nearbyintf:2893case Builtin::BI__builtin_nearbyintl:2894case Builtin::BI__builtin_nearbyintf128:2895return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,2896Intrinsic::nearbyint,2897Intrinsic::experimental_constrained_nearbyint));28982899case Builtin::BIpow:2900case Builtin::BIpowf:2901case Builtin::BIpowl:2902case Builtin::BI__builtin_pow:2903case Builtin::BI__builtin_powf:2904case Builtin::BI__builtin_powf16:2905case Builtin::BI__builtin_powl:2906case Builtin::BI__builtin_powf128:2907return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E,2908Intrinsic::pow,2909Intrinsic::experimental_constrained_pow));29102911case Builtin::BIrint:2912case Builtin::BIrintf:2913case Builtin::BIrintl:2914case Builtin::BI__builtin_rint:2915case Builtin::BI__builtin_rintf:2916case Builtin::BI__builtin_rintf16:2917case Builtin::BI__builtin_rintl:2918case Builtin::BI__builtin_rintf128:2919return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,2920Intrinsic::rint,2921Intrinsic::experimental_constrained_rint));29222923case Builtin::BIround:2924case Builtin::BIroundf:2925case Builtin::BIroundl:2926case Builtin::BI__builtin_round:2927case Builtin::BI__builtin_roundf:2928case Builtin::BI__builtin_roundf16:2929case Builtin::BI__builtin_roundl:2930case Builtin::BI__builtin_roundf128:2931return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,2932Intrinsic::round,2933Intrinsic::experimental_constrained_round));29342935case Builtin::BIroundeven:2936case Builtin::BIroundevenf:2937case Builtin::BIroundevenl:2938case Builtin::BI__builtin_roundeven:2939case Builtin::BI__builtin_roundevenf:2940case Builtin::BI__builtin_roundevenf16:2941case Builtin::BI__builtin_roundevenl:2942case Builtin::BI__builtin_roundevenf128:2943return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,2944Intrinsic::roundeven,2945Intrinsic::experimental_constrained_roundeven));29462947case Builtin::BIsin:2948case Builtin::BIsinf:2949case Builtin::BIsinl:2950case Builtin::BI__builtin_sin:2951case Builtin::BI__builtin_sinf:2952case Builtin::BI__builtin_sinf16:2953case Builtin::BI__builtin_sinl:2954case Builtin::BI__builtin_sinf128:2955return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,2956Intrinsic::sin,2957Intrinsic::experimental_constrained_sin));29582959case Builtin::BIsinh:2960case Builtin::BIsinhf:2961case Builtin::BIsinhl:2962case Builtin::BI__builtin_sinh:2963case Builtin::BI__builtin_sinhf:2964case Builtin::BI__builtin_sinhf16:2965case Builtin::BI__builtin_sinhl:2966case Builtin::BI__builtin_sinhf128:2967return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(2968*this, E, Intrinsic::sinh, Intrinsic::experimental_constrained_sinh));29692970case Builtin::BIsqrt:2971case Builtin::BIsqrtf:2972case Builtin::BIsqrtl:2973case Builtin::BI__builtin_sqrt:2974case Builtin::BI__builtin_sqrtf:2975case Builtin::BI__builtin_sqrtf16:2976case Builtin::BI__builtin_sqrtl:2977case Builtin::BI__builtin_sqrtf128:2978case Builtin::BI__builtin_elementwise_sqrt: {2979llvm::Value *Call = emitUnaryMaybeConstrainedFPBuiltin(2980*this, E, Intrinsic::sqrt, Intrinsic::experimental_constrained_sqrt);2981SetSqrtFPAccuracy(Call);2982return RValue::get(Call);2983}29842985case Builtin::BItan:2986case Builtin::BItanf:2987case Builtin::BItanl:2988case Builtin::BI__builtin_tan:2989case Builtin::BI__builtin_tanf:2990case Builtin::BI__builtin_tanf16:2991case Builtin::BI__builtin_tanl:2992case Builtin::BI__builtin_tanf128:2993return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(2994*this, E, Intrinsic::tan, Intrinsic::experimental_constrained_tan));29952996case Builtin::BItanh:2997case Builtin::BItanhf:2998case Builtin::BItanhl:2999case Builtin::BI__builtin_tanh:3000case Builtin::BI__builtin_tanhf:3001case Builtin::BI__builtin_tanhf16:3002case Builtin::BI__builtin_tanhl:3003case Builtin::BI__builtin_tanhf128:3004return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(3005*this, E, Intrinsic::tanh, Intrinsic::experimental_constrained_tanh));30063007case Builtin::BItrunc:3008case Builtin::BItruncf:3009case Builtin::BItruncl:3010case Builtin::BI__builtin_trunc:3011case Builtin::BI__builtin_truncf:3012case Builtin::BI__builtin_truncf16:3013case Builtin::BI__builtin_truncl:3014case Builtin::BI__builtin_truncf128:3015return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,3016Intrinsic::trunc,3017Intrinsic::experimental_constrained_trunc));30183019case Builtin::BIlround:3020case Builtin::BIlroundf:3021case Builtin::BIlroundl:3022case Builtin::BI__builtin_lround:3023case Builtin::BI__builtin_lroundf:3024case Builtin::BI__builtin_lroundl:3025case Builtin::BI__builtin_lroundf128:3026return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin(3027*this, E, Intrinsic::lround,3028Intrinsic::experimental_constrained_lround));30293030case Builtin::BIllround:3031case Builtin::BIllroundf:3032case Builtin::BIllroundl:3033case Builtin::BI__builtin_llround:3034case Builtin::BI__builtin_llroundf:3035case Builtin::BI__builtin_llroundl:3036case Builtin::BI__builtin_llroundf128:3037return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin(3038*this, E, Intrinsic::llround,3039Intrinsic::experimental_constrained_llround));30403041case Builtin::BIlrint:3042case Builtin::BIlrintf:3043case Builtin::BIlrintl:3044case Builtin::BI__builtin_lrint:3045case Builtin::BI__builtin_lrintf:3046case Builtin::BI__builtin_lrintl:3047case Builtin::BI__builtin_lrintf128:3048return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin(3049*this, E, Intrinsic::lrint,3050Intrinsic::experimental_constrained_lrint));30513052case Builtin::BIllrint:3053case Builtin::BIllrintf:3054case Builtin::BIllrintl:3055case Builtin::BI__builtin_llrint:3056case Builtin::BI__builtin_llrintf:3057case Builtin::BI__builtin_llrintl:3058case Builtin::BI__builtin_llrintf128:3059return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin(3060*this, E, Intrinsic::llrint,3061Intrinsic::experimental_constrained_llrint));3062case Builtin::BI__builtin_ldexp:3063case Builtin::BI__builtin_ldexpf:3064case Builtin::BI__builtin_ldexpl:3065case Builtin::BI__builtin_ldexpf16:3066case Builtin::BI__builtin_ldexpf128: {3067return RValue::get(emitBinaryExpMaybeConstrainedFPBuiltin(3068*this, E, Intrinsic::ldexp,3069Intrinsic::experimental_constrained_ldexp));3070}3071default:3072break;3073}3074}30753076// Check NonnullAttribute/NullabilityArg and Alignment.3077auto EmitArgCheck = [&](TypeCheckKind Kind, Address A, const Expr *Arg,3078unsigned ParmNum) {3079Value *Val = A.emitRawPointer(*this);3080EmitNonNullArgCheck(RValue::get(Val), Arg->getType(), Arg->getExprLoc(), FD,3081ParmNum);30823083if (SanOpts.has(SanitizerKind::Alignment)) {3084SanitizerSet SkippedChecks;3085SkippedChecks.set(SanitizerKind::All);3086SkippedChecks.clear(SanitizerKind::Alignment);3087SourceLocation Loc = Arg->getExprLoc();3088// Strip an implicit cast.3089if (auto *CE = dyn_cast<ImplicitCastExpr>(Arg))3090if (CE->getCastKind() == CK_BitCast)3091Arg = CE->getSubExpr();3092EmitTypeCheck(Kind, Loc, Val, Arg->getType(), A.getAlignment(),3093SkippedChecks);3094}3095};30963097switch (BuiltinIDIfNoAsmLabel) {3098default: break;3099case Builtin::BI__builtin___CFStringMakeConstantString:3100case Builtin::BI__builtin___NSStringMakeConstantString:3101return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType()));3102case Builtin::BI__builtin_stdarg_start:3103case Builtin::BI__builtin_va_start:3104case Builtin::BI__va_start:3105case Builtin::BI__builtin_va_end:3106EmitVAStartEnd(BuiltinID == Builtin::BI__va_start3107? EmitScalarExpr(E->getArg(0))3108: EmitVAListRef(E->getArg(0)).emitRawPointer(*this),3109BuiltinID != Builtin::BI__builtin_va_end);3110return RValue::get(nullptr);3111case Builtin::BI__builtin_va_copy: {3112Value *DstPtr = EmitVAListRef(E->getArg(0)).emitRawPointer(*this);3113Value *SrcPtr = EmitVAListRef(E->getArg(1)).emitRawPointer(*this);3114Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy, {DstPtr->getType()}),3115{DstPtr, SrcPtr});3116return RValue::get(nullptr);3117}3118case Builtin::BIabs:3119case Builtin::BIlabs:3120case Builtin::BIllabs:3121case Builtin::BI__builtin_abs:3122case Builtin::BI__builtin_labs:3123case Builtin::BI__builtin_llabs: {3124bool SanitizeOverflow = SanOpts.has(SanitizerKind::SignedIntegerOverflow);31253126Value *Result;3127switch (getLangOpts().getSignedOverflowBehavior()) {3128case LangOptions::SOB_Defined:3129Result = EmitAbs(*this, EmitScalarExpr(E->getArg(0)), false);3130break;3131case LangOptions::SOB_Undefined:3132if (!SanitizeOverflow) {3133Result = EmitAbs(*this, EmitScalarExpr(E->getArg(0)), true);3134break;3135}3136[[fallthrough]];3137case LangOptions::SOB_Trapping:3138// TODO: Somehow handle the corner case when the address of abs is taken.3139Result = EmitOverflowCheckedAbs(*this, E, SanitizeOverflow);3140break;3141}3142return RValue::get(Result);3143}3144case Builtin::BI__builtin_complex: {3145Value *Real = EmitScalarExpr(E->getArg(0));3146Value *Imag = EmitScalarExpr(E->getArg(1));3147return RValue::getComplex({Real, Imag});3148}3149case Builtin::BI__builtin_conj:3150case Builtin::BI__builtin_conjf:3151case Builtin::BI__builtin_conjl:3152case Builtin::BIconj:3153case Builtin::BIconjf:3154case Builtin::BIconjl: {3155ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));3156Value *Real = ComplexVal.first;3157Value *Imag = ComplexVal.second;3158Imag = Builder.CreateFNeg(Imag, "neg");3159return RValue::getComplex(std::make_pair(Real, Imag));3160}3161case Builtin::BI__builtin_creal:3162case Builtin::BI__builtin_crealf:3163case Builtin::BI__builtin_creall:3164case Builtin::BIcreal:3165case Builtin::BIcrealf:3166case Builtin::BIcreall: {3167ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));3168return RValue::get(ComplexVal.first);3169}31703171case Builtin::BI__builtin_preserve_access_index: {3172// Only enabled preserved access index region when debuginfo3173// is available as debuginfo is needed to preserve user-level3174// access pattern.3175if (!getDebugInfo()) {3176CGM.Error(E->getExprLoc(), "using builtin_preserve_access_index() without -g");3177return RValue::get(EmitScalarExpr(E->getArg(0)));3178}31793180// Nested builtin_preserve_access_index() not supported3181if (IsInPreservedAIRegion) {3182CGM.Error(E->getExprLoc(), "nested builtin_preserve_access_index() not supported");3183return RValue::get(EmitScalarExpr(E->getArg(0)));3184}31853186IsInPreservedAIRegion = true;3187Value *Res = EmitScalarExpr(E->getArg(0));3188IsInPreservedAIRegion = false;3189return RValue::get(Res);3190}31913192case Builtin::BI__builtin_cimag:3193case Builtin::BI__builtin_cimagf:3194case Builtin::BI__builtin_cimagl:3195case Builtin::BIcimag:3196case Builtin::BIcimagf:3197case Builtin::BIcimagl: {3198ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));3199return RValue::get(ComplexVal.second);3200}32013202case Builtin::BI__builtin_clrsb:3203case Builtin::BI__builtin_clrsbl:3204case Builtin::BI__builtin_clrsbll: {3205// clrsb(x) -> clz(x < 0 ? ~x : x) - 1 or3206Value *ArgValue = EmitScalarExpr(E->getArg(0));32073208llvm::Type *ArgType = ArgValue->getType();3209Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);32103211llvm::Type *ResultType = ConvertType(E->getType());3212Value *Zero = llvm::Constant::getNullValue(ArgType);3213Value *IsNeg = Builder.CreateICmpSLT(ArgValue, Zero, "isneg");3214Value *Inverse = Builder.CreateNot(ArgValue, "not");3215Value *Tmp = Builder.CreateSelect(IsNeg, Inverse, ArgValue);3216Value *Ctlz = Builder.CreateCall(F, {Tmp, Builder.getFalse()});3217Value *Result = Builder.CreateSub(Ctlz, llvm::ConstantInt::get(ArgType, 1));3218Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,3219"cast");3220return RValue::get(Result);3221}3222case Builtin::BI__builtin_ctzs:3223case Builtin::BI__builtin_ctz:3224case Builtin::BI__builtin_ctzl:3225case Builtin::BI__builtin_ctzll:3226case Builtin::BI__builtin_ctzg: {3227bool HasFallback = BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_ctzg &&3228E->getNumArgs() > 1;32293230Value *ArgValue =3231HasFallback ? EmitScalarExpr(E->getArg(0))3232: EmitCheckedArgForBuiltin(E->getArg(0), BCK_CTZPassedZero);32333234llvm::Type *ArgType = ArgValue->getType();3235Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);32363237llvm::Type *ResultType = ConvertType(E->getType());3238Value *ZeroUndef =3239Builder.getInt1(HasFallback || getTarget().isCLZForZeroUndef());3240Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});3241if (Result->getType() != ResultType)3242Result =3243Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");3244if (!HasFallback)3245return RValue::get(Result);32463247Value *Zero = Constant::getNullValue(ArgType);3248Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");3249Value *FallbackValue = EmitScalarExpr(E->getArg(1));3250Value *ResultOrFallback =3251Builder.CreateSelect(IsZero, FallbackValue, Result, "ctzg");3252return RValue::get(ResultOrFallback);3253}3254case Builtin::BI__builtin_clzs:3255case Builtin::BI__builtin_clz:3256case Builtin::BI__builtin_clzl:3257case Builtin::BI__builtin_clzll:3258case Builtin::BI__builtin_clzg: {3259bool HasFallback = BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_clzg &&3260E->getNumArgs() > 1;32613262Value *ArgValue =3263HasFallback ? EmitScalarExpr(E->getArg(0))3264: EmitCheckedArgForBuiltin(E->getArg(0), BCK_CLZPassedZero);32653266llvm::Type *ArgType = ArgValue->getType();3267Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);32683269llvm::Type *ResultType = ConvertType(E->getType());3270Value *ZeroUndef =3271Builder.getInt1(HasFallback || getTarget().isCLZForZeroUndef());3272Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});3273if (Result->getType() != ResultType)3274Result =3275Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");3276if (!HasFallback)3277return RValue::get(Result);32783279Value *Zero = Constant::getNullValue(ArgType);3280Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");3281Value *FallbackValue = EmitScalarExpr(E->getArg(1));3282Value *ResultOrFallback =3283Builder.CreateSelect(IsZero, FallbackValue, Result, "clzg");3284return RValue::get(ResultOrFallback);3285}3286case Builtin::BI__builtin_ffs:3287case Builtin::BI__builtin_ffsl:3288case Builtin::BI__builtin_ffsll: {3289// ffs(x) -> x ? cttz(x) + 1 : 03290Value *ArgValue = EmitScalarExpr(E->getArg(0));32913292llvm::Type *ArgType = ArgValue->getType();3293Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);32943295llvm::Type *ResultType = ConvertType(E->getType());3296Value *Tmp =3297Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),3298llvm::ConstantInt::get(ArgType, 1));3299Value *Zero = llvm::Constant::getNullValue(ArgType);3300Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");3301Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");3302if (Result->getType() != ResultType)3303Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,3304"cast");3305return RValue::get(Result);3306}3307case Builtin::BI__builtin_parity:3308case Builtin::BI__builtin_parityl:3309case Builtin::BI__builtin_parityll: {3310// parity(x) -> ctpop(x) & 13311Value *ArgValue = EmitScalarExpr(E->getArg(0));33123313llvm::Type *ArgType = ArgValue->getType();3314Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);33153316llvm::Type *ResultType = ConvertType(E->getType());3317Value *Tmp = Builder.CreateCall(F, ArgValue);3318Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));3319if (Result->getType() != ResultType)3320Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,3321"cast");3322return RValue::get(Result);3323}3324case Builtin::BI__lzcnt16:3325case Builtin::BI__lzcnt:3326case Builtin::BI__lzcnt64: {3327Value *ArgValue = EmitScalarExpr(E->getArg(0));33283329llvm::Type *ArgType = ArgValue->getType();3330Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);33313332llvm::Type *ResultType = ConvertType(E->getType());3333Value *Result = Builder.CreateCall(F, {ArgValue, Builder.getFalse()});3334if (Result->getType() != ResultType)3335Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,3336"cast");3337return RValue::get(Result);3338}3339case Builtin::BI__popcnt16:3340case Builtin::BI__popcnt:3341case Builtin::BI__popcnt64:3342case Builtin::BI__builtin_popcount:3343case Builtin::BI__builtin_popcountl:3344case Builtin::BI__builtin_popcountll:3345case Builtin::BI__builtin_popcountg: {3346Value *ArgValue = EmitScalarExpr(E->getArg(0));33473348llvm::Type *ArgType = ArgValue->getType();3349Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);33503351llvm::Type *ResultType = ConvertType(E->getType());3352Value *Result = Builder.CreateCall(F, ArgValue);3353if (Result->getType() != ResultType)3354Result =3355Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast");3356return RValue::get(Result);3357}3358case Builtin::BI__builtin_unpredictable: {3359// Always return the argument of __builtin_unpredictable. LLVM does not3360// handle this builtin. Metadata for this builtin should be added directly3361// to instructions such as branches or switches that use it.3362return RValue::get(EmitScalarExpr(E->getArg(0)));3363}3364case Builtin::BI__builtin_expect: {3365Value *ArgValue = EmitScalarExpr(E->getArg(0));3366llvm::Type *ArgType = ArgValue->getType();33673368Value *ExpectedValue = EmitScalarExpr(E->getArg(1));3369// Don't generate llvm.expect on -O0 as the backend won't use it for3370// anything.3371// Note, we still IRGen ExpectedValue because it could have side-effects.3372if (CGM.getCodeGenOpts().OptimizationLevel == 0)3373return RValue::get(ArgValue);33743375Function *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);3376Value *Result =3377Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");3378return RValue::get(Result);3379}3380case Builtin::BI__builtin_expect_with_probability: {3381Value *ArgValue = EmitScalarExpr(E->getArg(0));3382llvm::Type *ArgType = ArgValue->getType();33833384Value *ExpectedValue = EmitScalarExpr(E->getArg(1));3385llvm::APFloat Probability(0.0);3386const Expr *ProbArg = E->getArg(2);3387bool EvalSucceed = ProbArg->EvaluateAsFloat(Probability, CGM.getContext());3388assert(EvalSucceed && "probability should be able to evaluate as float");3389(void)EvalSucceed;3390bool LoseInfo = false;3391Probability.convert(llvm::APFloat::IEEEdouble(),3392llvm::RoundingMode::Dynamic, &LoseInfo);3393llvm::Type *Ty = ConvertType(ProbArg->getType());3394Constant *Confidence = ConstantFP::get(Ty, Probability);3395// Don't generate llvm.expect.with.probability on -O0 as the backend3396// won't use it for anything.3397// Note, we still IRGen ExpectedValue because it could have side-effects.3398if (CGM.getCodeGenOpts().OptimizationLevel == 0)3399return RValue::get(ArgValue);34003401Function *FnExpect =3402CGM.getIntrinsic(Intrinsic::expect_with_probability, ArgType);3403Value *Result = Builder.CreateCall(3404FnExpect, {ArgValue, ExpectedValue, Confidence}, "expval");3405return RValue::get(Result);3406}3407case Builtin::BI__builtin_assume_aligned: {3408const Expr *Ptr = E->getArg(0);3409Value *PtrValue = EmitScalarExpr(Ptr);3410Value *OffsetValue =3411(E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;34123413Value *AlignmentValue = EmitScalarExpr(E->getArg(1));3414ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);3415if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment))3416AlignmentCI = ConstantInt::get(AlignmentCI->getIntegerType(),3417llvm::Value::MaximumAlignment);34183419emitAlignmentAssumption(PtrValue, Ptr,3420/*The expr loc is sufficient.*/ SourceLocation(),3421AlignmentCI, OffsetValue);3422return RValue::get(PtrValue);3423}3424case Builtin::BI__assume:3425case Builtin::BI__builtin_assume: {3426if (E->getArg(0)->HasSideEffects(getContext()))3427return RValue::get(nullptr);34283429Value *ArgValue = EmitScalarExpr(E->getArg(0));3430Function *FnAssume = CGM.getIntrinsic(Intrinsic::assume);3431Builder.CreateCall(FnAssume, ArgValue);3432return RValue::get(nullptr);3433}3434case Builtin::BI__builtin_assume_separate_storage: {3435const Expr *Arg0 = E->getArg(0);3436const Expr *Arg1 = E->getArg(1);34373438Value *Value0 = EmitScalarExpr(Arg0);3439Value *Value1 = EmitScalarExpr(Arg1);34403441Value *Values[] = {Value0, Value1};3442OperandBundleDefT<Value *> OBD("separate_storage", Values);3443Builder.CreateAssumption(ConstantInt::getTrue(getLLVMContext()), {OBD});3444return RValue::get(nullptr);3445}3446case Builtin::BI__builtin_allow_runtime_check: {3447StringRef Kind =3448cast<StringLiteral>(E->getArg(0)->IgnoreParenCasts())->getString();3449LLVMContext &Ctx = CGM.getLLVMContext();3450llvm::Value *Allow = Builder.CreateCall(3451CGM.getIntrinsic(llvm::Intrinsic::allow_runtime_check),3452llvm::MetadataAsValue::get(Ctx, llvm::MDString::get(Ctx, Kind)));3453return RValue::get(Allow);3454}3455case Builtin::BI__arithmetic_fence: {3456// Create the builtin call if FastMath is selected, and the target3457// supports the builtin, otherwise just return the argument.3458CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);3459llvm::FastMathFlags FMF = Builder.getFastMathFlags();3460bool isArithmeticFenceEnabled =3461FMF.allowReassoc() &&3462getContext().getTargetInfo().checkArithmeticFenceSupported();3463QualType ArgType = E->getArg(0)->getType();3464if (ArgType->isComplexType()) {3465if (isArithmeticFenceEnabled) {3466QualType ElementType = ArgType->castAs<ComplexType>()->getElementType();3467ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));3468Value *Real = Builder.CreateArithmeticFence(ComplexVal.first,3469ConvertType(ElementType));3470Value *Imag = Builder.CreateArithmeticFence(ComplexVal.second,3471ConvertType(ElementType));3472return RValue::getComplex(std::make_pair(Real, Imag));3473}3474ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));3475Value *Real = ComplexVal.first;3476Value *Imag = ComplexVal.second;3477return RValue::getComplex(std::make_pair(Real, Imag));3478}3479Value *ArgValue = EmitScalarExpr(E->getArg(0));3480if (isArithmeticFenceEnabled)3481return RValue::get(3482Builder.CreateArithmeticFence(ArgValue, ConvertType(ArgType)));3483return RValue::get(ArgValue);3484}3485case Builtin::BI__builtin_bswap16:3486case Builtin::BI__builtin_bswap32:3487case Builtin::BI__builtin_bswap64:3488case Builtin::BI_byteswap_ushort:3489case Builtin::BI_byteswap_ulong:3490case Builtin::BI_byteswap_uint64: {3491return RValue::get(3492emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::bswap));3493}3494case Builtin::BI__builtin_bitreverse8:3495case Builtin::BI__builtin_bitreverse16:3496case Builtin::BI__builtin_bitreverse32:3497case Builtin::BI__builtin_bitreverse64: {3498return RValue::get(3499emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::bitreverse));3500}3501case Builtin::BI__builtin_rotateleft8:3502case Builtin::BI__builtin_rotateleft16:3503case Builtin::BI__builtin_rotateleft32:3504case Builtin::BI__builtin_rotateleft64:3505case Builtin::BI_rotl8: // Microsoft variants of rotate left3506case Builtin::BI_rotl16:3507case Builtin::BI_rotl:3508case Builtin::BI_lrotl:3509case Builtin::BI_rotl64:3510return emitRotate(E, false);35113512case Builtin::BI__builtin_rotateright8:3513case Builtin::BI__builtin_rotateright16:3514case Builtin::BI__builtin_rotateright32:3515case Builtin::BI__builtin_rotateright64:3516case Builtin::BI_rotr8: // Microsoft variants of rotate right3517case Builtin::BI_rotr16:3518case Builtin::BI_rotr:3519case Builtin::BI_lrotr:3520case Builtin::BI_rotr64:3521return emitRotate(E, true);35223523case Builtin::BI__builtin_constant_p: {3524llvm::Type *ResultType = ConvertType(E->getType());35253526const Expr *Arg = E->getArg(0);3527QualType ArgType = Arg->getType();3528// FIXME: The allowance for Obj-C pointers and block pointers is historical3529// and likely a mistake.3530if (!ArgType->isIntegralOrEnumerationType() && !ArgType->isFloatingType() &&3531!ArgType->isObjCObjectPointerType() && !ArgType->isBlockPointerType())3532// Per the GCC documentation, only numeric constants are recognized after3533// inlining.3534return RValue::get(ConstantInt::get(ResultType, 0));35353536if (Arg->HasSideEffects(getContext()))3537// The argument is unevaluated, so be conservative if it might have3538// side-effects.3539return RValue::get(ConstantInt::get(ResultType, 0));35403541Value *ArgValue = EmitScalarExpr(Arg);3542if (ArgType->isObjCObjectPointerType()) {3543// Convert Objective-C objects to id because we cannot distinguish between3544// LLVM types for Obj-C classes as they are opaque.3545ArgType = CGM.getContext().getObjCIdType();3546ArgValue = Builder.CreateBitCast(ArgValue, ConvertType(ArgType));3547}3548Function *F =3549CGM.getIntrinsic(Intrinsic::is_constant, ConvertType(ArgType));3550Value *Result = Builder.CreateCall(F, ArgValue);3551if (Result->getType() != ResultType)3552Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/false);3553return RValue::get(Result);3554}3555case Builtin::BI__builtin_dynamic_object_size:3556case Builtin::BI__builtin_object_size: {3557unsigned Type =3558E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();3559auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType()));35603561// We pass this builtin onto the optimizer so that it can figure out the3562// object size in more complex cases.3563bool IsDynamic = BuiltinID == Builtin::BI__builtin_dynamic_object_size;3564return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType,3565/*EmittedE=*/nullptr, IsDynamic));3566}3567case Builtin::BI__builtin_prefetch: {3568Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));3569// FIXME: Technically these constants should of type 'int', yes?3570RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :3571llvm::ConstantInt::get(Int32Ty, 0);3572Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :3573llvm::ConstantInt::get(Int32Ty, 3);3574Value *Data = llvm::ConstantInt::get(Int32Ty, 1);3575Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());3576Builder.CreateCall(F, {Address, RW, Locality, Data});3577return RValue::get(nullptr);3578}3579case Builtin::BI__builtin_readcyclecounter: {3580Function *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);3581return RValue::get(Builder.CreateCall(F));3582}3583case Builtin::BI__builtin_readsteadycounter: {3584Function *F = CGM.getIntrinsic(Intrinsic::readsteadycounter);3585return RValue::get(Builder.CreateCall(F));3586}3587case Builtin::BI__builtin___clear_cache: {3588Value *Begin = EmitScalarExpr(E->getArg(0));3589Value *End = EmitScalarExpr(E->getArg(1));3590Function *F = CGM.getIntrinsic(Intrinsic::clear_cache);3591return RValue::get(Builder.CreateCall(F, {Begin, End}));3592}3593case Builtin::BI__builtin_trap:3594EmitTrapCall(Intrinsic::trap);3595return RValue::get(nullptr);3596case Builtin::BI__builtin_verbose_trap: {3597llvm::DILocation *TrapLocation = Builder.getCurrentDebugLocation();3598if (getDebugInfo()) {3599TrapLocation = getDebugInfo()->CreateTrapFailureMessageFor(3600TrapLocation, *E->getArg(0)->tryEvaluateString(getContext()),3601*E->getArg(1)->tryEvaluateString(getContext()));3602}3603ApplyDebugLocation ApplyTrapDI(*this, TrapLocation);3604// Currently no attempt is made to prevent traps from being merged.3605EmitTrapCall(Intrinsic::trap);3606return RValue::get(nullptr);3607}3608case Builtin::BI__debugbreak:3609EmitTrapCall(Intrinsic::debugtrap);3610return RValue::get(nullptr);3611case Builtin::BI__builtin_unreachable: {3612EmitUnreachable(E->getExprLoc());36133614// We do need to preserve an insertion point.3615EmitBlock(createBasicBlock("unreachable.cont"));36163617return RValue::get(nullptr);3618}36193620case Builtin::BI__builtin_powi:3621case Builtin::BI__builtin_powif:3622case Builtin::BI__builtin_powil: {3623llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));3624llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));36253626if (Builder.getIsFPConstrained()) {3627// FIXME: llvm.powi has 2 mangling types,3628// llvm.experimental.constrained.powi has one.3629CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);3630Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_powi,3631Src0->getType());3632return RValue::get(Builder.CreateConstrainedFPCall(F, { Src0, Src1 }));3633}36343635Function *F = CGM.getIntrinsic(Intrinsic::powi,3636{ Src0->getType(), Src1->getType() });3637return RValue::get(Builder.CreateCall(F, { Src0, Src1 }));3638}3639case Builtin::BI__builtin_frexpl: {3640// Linux PPC will not be adding additional PPCDoubleDouble support.3641// WIP to switch default to IEEE long double. Will emit libcall for3642// frexpl instead of legalizing this type in the BE.3643if (&getTarget().getLongDoubleFormat() == &llvm::APFloat::PPCDoubleDouble())3644break;3645[[fallthrough]];3646}3647case Builtin::BI__builtin_frexp:3648case Builtin::BI__builtin_frexpf:3649case Builtin::BI__builtin_frexpf128:3650case Builtin::BI__builtin_frexpf16:3651return RValue::get(emitFrexpBuiltin(*this, E, Intrinsic::frexp));3652case Builtin::BI__builtin_isgreater:3653case Builtin::BI__builtin_isgreaterequal:3654case Builtin::BI__builtin_isless:3655case Builtin::BI__builtin_islessequal:3656case Builtin::BI__builtin_islessgreater:3657case Builtin::BI__builtin_isunordered: {3658// Ordered comparisons: we know the arguments to these are matching scalar3659// floating point values.3660CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);3661Value *LHS = EmitScalarExpr(E->getArg(0));3662Value *RHS = EmitScalarExpr(E->getArg(1));36633664switch (BuiltinID) {3665default: llvm_unreachable("Unknown ordered comparison");3666case Builtin::BI__builtin_isgreater:3667LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");3668break;3669case Builtin::BI__builtin_isgreaterequal:3670LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");3671break;3672case Builtin::BI__builtin_isless:3673LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");3674break;3675case Builtin::BI__builtin_islessequal:3676LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");3677break;3678case Builtin::BI__builtin_islessgreater:3679LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");3680break;3681case Builtin::BI__builtin_isunordered:3682LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");3683break;3684}3685// ZExt bool to int type.3686return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));3687}36883689case Builtin::BI__builtin_isnan: {3690CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);3691Value *V = EmitScalarExpr(E->getArg(0));3692if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))3693return RValue::get(Result);3694return RValue::get(3695Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcNan),3696ConvertType(E->getType())));3697}36983699case Builtin::BI__builtin_issignaling: {3700CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);3701Value *V = EmitScalarExpr(E->getArg(0));3702return RValue::get(3703Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcSNan),3704ConvertType(E->getType())));3705}37063707case Builtin::BI__builtin_isinf: {3708CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);3709Value *V = EmitScalarExpr(E->getArg(0));3710if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))3711return RValue::get(Result);3712return RValue::get(3713Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcInf),3714ConvertType(E->getType())));3715}37163717case Builtin::BIfinite:3718case Builtin::BI__finite:3719case Builtin::BIfinitef:3720case Builtin::BI__finitef:3721case Builtin::BIfinitel:3722case Builtin::BI__finitel:3723case Builtin::BI__builtin_isfinite: {3724CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);3725Value *V = EmitScalarExpr(E->getArg(0));3726if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V))3727return RValue::get(Result);3728return RValue::get(3729Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcFinite),3730ConvertType(E->getType())));3731}37323733case Builtin::BI__builtin_isnormal: {3734CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);3735Value *V = EmitScalarExpr(E->getArg(0));3736return RValue::get(3737Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcNormal),3738ConvertType(E->getType())));3739}37403741case Builtin::BI__builtin_issubnormal: {3742CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);3743Value *V = EmitScalarExpr(E->getArg(0));3744return RValue::get(3745Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcSubnormal),3746ConvertType(E->getType())));3747}37483749case Builtin::BI__builtin_iszero: {3750CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);3751Value *V = EmitScalarExpr(E->getArg(0));3752return RValue::get(3753Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcZero),3754ConvertType(E->getType())));3755}37563757case Builtin::BI__builtin_isfpclass: {3758Expr::EvalResult Result;3759if (!E->getArg(1)->EvaluateAsInt(Result, CGM.getContext()))3760break;3761uint64_t Test = Result.Val.getInt().getLimitedValue();3762CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);3763Value *V = EmitScalarExpr(E->getArg(0));3764return RValue::get(Builder.CreateZExt(Builder.createIsFPClass(V, Test),3765ConvertType(E->getType())));3766}37673768case Builtin::BI__builtin_nondeterministic_value: {3769llvm::Type *Ty = ConvertType(E->getArg(0)->getType());37703771Value *Result = PoisonValue::get(Ty);3772Result = Builder.CreateFreeze(Result);37733774return RValue::get(Result);3775}37763777case Builtin::BI__builtin_elementwise_abs: {3778Value *Result;3779QualType QT = E->getArg(0)->getType();37803781if (auto *VecTy = QT->getAs<VectorType>())3782QT = VecTy->getElementType();3783if (QT->isIntegerType())3784Result = Builder.CreateBinaryIntrinsic(3785llvm::Intrinsic::abs, EmitScalarExpr(E->getArg(0)),3786Builder.getFalse(), nullptr, "elt.abs");3787else3788Result = emitBuiltinWithOneOverloadedType<1>(3789*this, E, llvm::Intrinsic::fabs, "elt.abs");37903791return RValue::get(Result);3792}3793case Builtin::BI__builtin_elementwise_acos:3794return RValue::get(emitBuiltinWithOneOverloadedType<1>(3795*this, E, llvm::Intrinsic::acos, "elt.acos"));3796case Builtin::BI__builtin_elementwise_asin:3797return RValue::get(emitBuiltinWithOneOverloadedType<1>(3798*this, E, llvm::Intrinsic::asin, "elt.asin"));3799case Builtin::BI__builtin_elementwise_atan:3800return RValue::get(emitBuiltinWithOneOverloadedType<1>(3801*this, E, llvm::Intrinsic::atan, "elt.atan"));3802case Builtin::BI__builtin_elementwise_ceil:3803return RValue::get(emitBuiltinWithOneOverloadedType<1>(3804*this, E, llvm::Intrinsic::ceil, "elt.ceil"));3805case Builtin::BI__builtin_elementwise_exp:3806return RValue::get(emitBuiltinWithOneOverloadedType<1>(3807*this, E, llvm::Intrinsic::exp, "elt.exp"));3808case Builtin::BI__builtin_elementwise_exp2:3809return RValue::get(emitBuiltinWithOneOverloadedType<1>(3810*this, E, llvm::Intrinsic::exp2, "elt.exp2"));3811case Builtin::BI__builtin_elementwise_log:3812return RValue::get(emitBuiltinWithOneOverloadedType<1>(3813*this, E, llvm::Intrinsic::log, "elt.log"));3814case Builtin::BI__builtin_elementwise_log2:3815return RValue::get(emitBuiltinWithOneOverloadedType<1>(3816*this, E, llvm::Intrinsic::log2, "elt.log2"));3817case Builtin::BI__builtin_elementwise_log10:3818return RValue::get(emitBuiltinWithOneOverloadedType<1>(3819*this, E, llvm::Intrinsic::log10, "elt.log10"));3820case Builtin::BI__builtin_elementwise_pow: {3821return RValue::get(3822emitBuiltinWithOneOverloadedType<2>(*this, E, llvm::Intrinsic::pow));3823}3824case Builtin::BI__builtin_elementwise_bitreverse:3825return RValue::get(emitBuiltinWithOneOverloadedType<1>(3826*this, E, llvm::Intrinsic::bitreverse, "elt.bitreverse"));3827case Builtin::BI__builtin_elementwise_cos:3828return RValue::get(emitBuiltinWithOneOverloadedType<1>(3829*this, E, llvm::Intrinsic::cos, "elt.cos"));3830case Builtin::BI__builtin_elementwise_cosh:3831return RValue::get(emitBuiltinWithOneOverloadedType<1>(3832*this, E, llvm::Intrinsic::cosh, "elt.cosh"));3833case Builtin::BI__builtin_elementwise_floor:3834return RValue::get(emitBuiltinWithOneOverloadedType<1>(3835*this, E, llvm::Intrinsic::floor, "elt.floor"));3836case Builtin::BI__builtin_elementwise_roundeven:3837return RValue::get(emitBuiltinWithOneOverloadedType<1>(3838*this, E, llvm::Intrinsic::roundeven, "elt.roundeven"));3839case Builtin::BI__builtin_elementwise_round:3840return RValue::get(emitBuiltinWithOneOverloadedType<1>(3841*this, E, llvm::Intrinsic::round, "elt.round"));3842case Builtin::BI__builtin_elementwise_rint:3843return RValue::get(emitBuiltinWithOneOverloadedType<1>(3844*this, E, llvm::Intrinsic::rint, "elt.rint"));3845case Builtin::BI__builtin_elementwise_nearbyint:3846return RValue::get(emitBuiltinWithOneOverloadedType<1>(3847*this, E, llvm::Intrinsic::nearbyint, "elt.nearbyint"));3848case Builtin::BI__builtin_elementwise_sin:3849return RValue::get(emitBuiltinWithOneOverloadedType<1>(3850*this, E, llvm::Intrinsic::sin, "elt.sin"));3851case Builtin::BI__builtin_elementwise_sinh:3852return RValue::get(emitBuiltinWithOneOverloadedType<1>(3853*this, E, llvm::Intrinsic::sinh, "elt.sinh"));3854case Builtin::BI__builtin_elementwise_tan:3855return RValue::get(emitBuiltinWithOneOverloadedType<1>(3856*this, E, llvm::Intrinsic::tan, "elt.tan"));3857case Builtin::BI__builtin_elementwise_tanh:3858return RValue::get(emitBuiltinWithOneOverloadedType<1>(3859*this, E, llvm::Intrinsic::tanh, "elt.tanh"));3860case Builtin::BI__builtin_elementwise_trunc:3861return RValue::get(emitBuiltinWithOneOverloadedType<1>(3862*this, E, llvm::Intrinsic::trunc, "elt.trunc"));3863case Builtin::BI__builtin_elementwise_canonicalize:3864return RValue::get(emitBuiltinWithOneOverloadedType<1>(3865*this, E, llvm::Intrinsic::canonicalize, "elt.canonicalize"));3866case Builtin::BI__builtin_elementwise_copysign:3867return RValue::get(emitBuiltinWithOneOverloadedType<2>(3868*this, E, llvm::Intrinsic::copysign));3869case Builtin::BI__builtin_elementwise_fma:3870return RValue::get(3871emitBuiltinWithOneOverloadedType<3>(*this, E, llvm::Intrinsic::fma));3872case Builtin::BI__builtin_elementwise_add_sat:3873case Builtin::BI__builtin_elementwise_sub_sat: {3874Value *Op0 = EmitScalarExpr(E->getArg(0));3875Value *Op1 = EmitScalarExpr(E->getArg(1));3876Value *Result;3877assert(Op0->getType()->isIntOrIntVectorTy() && "integer type expected");3878QualType Ty = E->getArg(0)->getType();3879if (auto *VecTy = Ty->getAs<VectorType>())3880Ty = VecTy->getElementType();3881bool IsSigned = Ty->isSignedIntegerType();3882unsigned Opc;3883if (BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_elementwise_add_sat)3884Opc = IsSigned ? llvm::Intrinsic::sadd_sat : llvm::Intrinsic::uadd_sat;3885else3886Opc = IsSigned ? llvm::Intrinsic::ssub_sat : llvm::Intrinsic::usub_sat;3887Result = Builder.CreateBinaryIntrinsic(Opc, Op0, Op1, nullptr, "elt.sat");3888return RValue::get(Result);3889}38903891case Builtin::BI__builtin_elementwise_max: {3892Value *Op0 = EmitScalarExpr(E->getArg(0));3893Value *Op1 = EmitScalarExpr(E->getArg(1));3894Value *Result;3895if (Op0->getType()->isIntOrIntVectorTy()) {3896QualType Ty = E->getArg(0)->getType();3897if (auto *VecTy = Ty->getAs<VectorType>())3898Ty = VecTy->getElementType();3899Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType()3900? llvm::Intrinsic::smax3901: llvm::Intrinsic::umax,3902Op0, Op1, nullptr, "elt.max");3903} else3904Result = Builder.CreateMaxNum(Op0, Op1, "elt.max");3905return RValue::get(Result);3906}3907case Builtin::BI__builtin_elementwise_min: {3908Value *Op0 = EmitScalarExpr(E->getArg(0));3909Value *Op1 = EmitScalarExpr(E->getArg(1));3910Value *Result;3911if (Op0->getType()->isIntOrIntVectorTy()) {3912QualType Ty = E->getArg(0)->getType();3913if (auto *VecTy = Ty->getAs<VectorType>())3914Ty = VecTy->getElementType();3915Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType()3916? llvm::Intrinsic::smin3917: llvm::Intrinsic::umin,3918Op0, Op1, nullptr, "elt.min");3919} else3920Result = Builder.CreateMinNum(Op0, Op1, "elt.min");3921return RValue::get(Result);3922}39233924case Builtin::BI__builtin_reduce_max: {3925auto GetIntrinsicID = [this](QualType QT) {3926if (auto *VecTy = QT->getAs<VectorType>())3927QT = VecTy->getElementType();3928else if (QT->isSizelessVectorType())3929QT = QT->getSizelessVectorEltType(CGM.getContext());39303931if (QT->isSignedIntegerType())3932return llvm::Intrinsic::vector_reduce_smax;3933if (QT->isUnsignedIntegerType())3934return llvm::Intrinsic::vector_reduce_umax;3935assert(QT->isFloatingType() && "must have a float here");3936return llvm::Intrinsic::vector_reduce_fmax;3937};3938return RValue::get(emitBuiltinWithOneOverloadedType<1>(3939*this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min"));3940}39413942case Builtin::BI__builtin_reduce_min: {3943auto GetIntrinsicID = [this](QualType QT) {3944if (auto *VecTy = QT->getAs<VectorType>())3945QT = VecTy->getElementType();3946else if (QT->isSizelessVectorType())3947QT = QT->getSizelessVectorEltType(CGM.getContext());39483949if (QT->isSignedIntegerType())3950return llvm::Intrinsic::vector_reduce_smin;3951if (QT->isUnsignedIntegerType())3952return llvm::Intrinsic::vector_reduce_umin;3953assert(QT->isFloatingType() && "must have a float here");3954return llvm::Intrinsic::vector_reduce_fmin;3955};39563957return RValue::get(emitBuiltinWithOneOverloadedType<1>(3958*this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min"));3959}39603961case Builtin::BI__builtin_reduce_add:3962return RValue::get(emitBuiltinWithOneOverloadedType<1>(3963*this, E, llvm::Intrinsic::vector_reduce_add, "rdx.add"));3964case Builtin::BI__builtin_reduce_mul:3965return RValue::get(emitBuiltinWithOneOverloadedType<1>(3966*this, E, llvm::Intrinsic::vector_reduce_mul, "rdx.mul"));3967case Builtin::BI__builtin_reduce_xor:3968return RValue::get(emitBuiltinWithOneOverloadedType<1>(3969*this, E, llvm::Intrinsic::vector_reduce_xor, "rdx.xor"));3970case Builtin::BI__builtin_reduce_or:3971return RValue::get(emitBuiltinWithOneOverloadedType<1>(3972*this, E, llvm::Intrinsic::vector_reduce_or, "rdx.or"));3973case Builtin::BI__builtin_reduce_and:3974return RValue::get(emitBuiltinWithOneOverloadedType<1>(3975*this, E, llvm::Intrinsic::vector_reduce_and, "rdx.and"));39763977case Builtin::BI__builtin_matrix_transpose: {3978auto *MatrixTy = E->getArg(0)->getType()->castAs<ConstantMatrixType>();3979Value *MatValue = EmitScalarExpr(E->getArg(0));3980MatrixBuilder MB(Builder);3981Value *Result = MB.CreateMatrixTranspose(MatValue, MatrixTy->getNumRows(),3982MatrixTy->getNumColumns());3983return RValue::get(Result);3984}39853986case Builtin::BI__builtin_matrix_column_major_load: {3987MatrixBuilder MB(Builder);3988// Emit everything that isn't dependent on the first parameter type3989Value *Stride = EmitScalarExpr(E->getArg(3));3990const auto *ResultTy = E->getType()->getAs<ConstantMatrixType>();3991auto *PtrTy = E->getArg(0)->getType()->getAs<PointerType>();3992assert(PtrTy && "arg0 must be of pointer type");3993bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified();39943995Address Src = EmitPointerWithAlignment(E->getArg(0));3996EmitNonNullArgCheck(RValue::get(Src.emitRawPointer(*this)),3997E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD,39980);3999Value *Result = MB.CreateColumnMajorLoad(4000Src.getElementType(), Src.emitRawPointer(*this),4001Align(Src.getAlignment().getQuantity()), Stride, IsVolatile,4002ResultTy->getNumRows(), ResultTy->getNumColumns(), "matrix");4003return RValue::get(Result);4004}40054006case Builtin::BI__builtin_matrix_column_major_store: {4007MatrixBuilder MB(Builder);4008Value *Matrix = EmitScalarExpr(E->getArg(0));4009Address Dst = EmitPointerWithAlignment(E->getArg(1));4010Value *Stride = EmitScalarExpr(E->getArg(2));40114012const auto *MatrixTy = E->getArg(0)->getType()->getAs<ConstantMatrixType>();4013auto *PtrTy = E->getArg(1)->getType()->getAs<PointerType>();4014assert(PtrTy && "arg1 must be of pointer type");4015bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified();40164017EmitNonNullArgCheck(RValue::get(Dst.emitRawPointer(*this)),4018E->getArg(1)->getType(), E->getArg(1)->getExprLoc(), FD,40190);4020Value *Result = MB.CreateColumnMajorStore(4021Matrix, Dst.emitRawPointer(*this),4022Align(Dst.getAlignment().getQuantity()), Stride, IsVolatile,4023MatrixTy->getNumRows(), MatrixTy->getNumColumns());4024return RValue::get(Result);4025}40264027case Builtin::BI__builtin_isinf_sign: {4028// isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 04029CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);4030// FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.4031Value *Arg = EmitScalarExpr(E->getArg(0));4032Value *AbsArg = EmitFAbs(*this, Arg);4033Value *IsInf = Builder.CreateFCmpOEQ(4034AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");4035Value *IsNeg = EmitSignBit(*this, Arg);40364037llvm::Type *IntTy = ConvertType(E->getType());4038Value *Zero = Constant::getNullValue(IntTy);4039Value *One = ConstantInt::get(IntTy, 1);4040Value *NegativeOne = ConstantInt::get(IntTy, -1);4041Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);4042Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);4043return RValue::get(Result);4044}40454046case Builtin::BI__builtin_flt_rounds: {4047Function *F = CGM.getIntrinsic(Intrinsic::get_rounding);40484049llvm::Type *ResultType = ConvertType(E->getType());4050Value *Result = Builder.CreateCall(F);4051if (Result->getType() != ResultType)4052Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,4053"cast");4054return RValue::get(Result);4055}40564057case Builtin::BI__builtin_set_flt_rounds: {4058Function *F = CGM.getIntrinsic(Intrinsic::set_rounding);40594060Value *V = EmitScalarExpr(E->getArg(0));4061Builder.CreateCall(F, V);4062return RValue::get(nullptr);4063}40644065case Builtin::BI__builtin_fpclassify: {4066CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);4067// FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here.4068Value *V = EmitScalarExpr(E->getArg(5));4069llvm::Type *Ty = ConvertType(E->getArg(5)->getType());40704071// Create Result4072BasicBlock *Begin = Builder.GetInsertBlock();4073BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);4074Builder.SetInsertPoint(End);4075PHINode *Result =4076Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,4077"fpclassify_result");40784079// if (V==0) return FP_ZERO4080Builder.SetInsertPoint(Begin);4081Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),4082"iszero");4083Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));4084BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);4085Builder.CreateCondBr(IsZero, End, NotZero);4086Result->addIncoming(ZeroLiteral, Begin);40874088// if (V != V) return FP_NAN4089Builder.SetInsertPoint(NotZero);4090Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");4091Value *NanLiteral = EmitScalarExpr(E->getArg(0));4092BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);4093Builder.CreateCondBr(IsNan, End, NotNan);4094Result->addIncoming(NanLiteral, NotZero);40954096// if (fabs(V) == infinity) return FP_INFINITY4097Builder.SetInsertPoint(NotNan);4098Value *VAbs = EmitFAbs(*this, V);4099Value *IsInf =4100Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),4101"isinf");4102Value *InfLiteral = EmitScalarExpr(E->getArg(1));4103BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);4104Builder.CreateCondBr(IsInf, End, NotInf);4105Result->addIncoming(InfLiteral, NotNan);41064107// if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL4108Builder.SetInsertPoint(NotInf);4109APFloat Smallest = APFloat::getSmallestNormalized(4110getContext().getFloatTypeSemantics(E->getArg(5)->getType()));4111Value *IsNormal =4112Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),4113"isnormal");4114Value *NormalResult =4115Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),4116EmitScalarExpr(E->getArg(3)));4117Builder.CreateBr(End);4118Result->addIncoming(NormalResult, NotInf);41194120// return Result4121Builder.SetInsertPoint(End);4122return RValue::get(Result);4123}41244125// An alloca will always return a pointer to the alloca (stack) address4126// space. This address space need not be the same as the AST / Language4127// default (e.g. in C / C++ auto vars are in the generic address space). At4128// the AST level this is handled within CreateTempAlloca et al., but for the4129// builtin / dynamic alloca we have to handle it here. We use an explicit cast4130// instead of passing an AS to CreateAlloca so as to not inhibit optimisation.4131case Builtin::BIalloca:4132case Builtin::BI_alloca:4133case Builtin::BI__builtin_alloca_uninitialized:4134case Builtin::BI__builtin_alloca: {4135Value *Size = EmitScalarExpr(E->getArg(0));4136const TargetInfo &TI = getContext().getTargetInfo();4137// The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__.4138const Align SuitableAlignmentInBytes =4139CGM.getContext()4140.toCharUnitsFromBits(TI.getSuitableAlign())4141.getAsAlign();4142AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);4143AI->setAlignment(SuitableAlignmentInBytes);4144if (BuiltinID != Builtin::BI__builtin_alloca_uninitialized)4145initializeAlloca(*this, AI, Size, SuitableAlignmentInBytes);4146LangAS AAS = getASTAllocaAddressSpace();4147LangAS EAS = E->getType()->getPointeeType().getAddressSpace();4148if (AAS != EAS) {4149llvm::Type *Ty = CGM.getTypes().ConvertType(E->getType());4150return RValue::get(getTargetHooks().performAddrSpaceCast(*this, AI, AAS,4151EAS, Ty));4152}4153return RValue::get(AI);4154}41554156case Builtin::BI__builtin_alloca_with_align_uninitialized:4157case Builtin::BI__builtin_alloca_with_align: {4158Value *Size = EmitScalarExpr(E->getArg(0));4159Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1));4160auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue);4161unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue();4162const Align AlignmentInBytes =4163CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getAsAlign();4164AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);4165AI->setAlignment(AlignmentInBytes);4166if (BuiltinID != Builtin::BI__builtin_alloca_with_align_uninitialized)4167initializeAlloca(*this, AI, Size, AlignmentInBytes);4168LangAS AAS = getASTAllocaAddressSpace();4169LangAS EAS = E->getType()->getPointeeType().getAddressSpace();4170if (AAS != EAS) {4171llvm::Type *Ty = CGM.getTypes().ConvertType(E->getType());4172return RValue::get(getTargetHooks().performAddrSpaceCast(*this, AI, AAS,4173EAS, Ty));4174}4175return RValue::get(AI);4176}41774178case Builtin::BIbzero:4179case Builtin::BI__builtin_bzero: {4180Address Dest = EmitPointerWithAlignment(E->getArg(0));4181Value *SizeVal = EmitScalarExpr(E->getArg(1));4182EmitNonNullArgCheck(Dest, E->getArg(0)->getType(),4183E->getArg(0)->getExprLoc(), FD, 0);4184Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);4185return RValue::get(nullptr);4186}41874188case Builtin::BIbcopy:4189case Builtin::BI__builtin_bcopy: {4190Address Src = EmitPointerWithAlignment(E->getArg(0));4191Address Dest = EmitPointerWithAlignment(E->getArg(1));4192Value *SizeVal = EmitScalarExpr(E->getArg(2));4193EmitNonNullArgCheck(RValue::get(Src.emitRawPointer(*this)),4194E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD,41950);4196EmitNonNullArgCheck(RValue::get(Dest.emitRawPointer(*this)),4197E->getArg(1)->getType(), E->getArg(1)->getExprLoc(), FD,41980);4199Builder.CreateMemMove(Dest, Src, SizeVal, false);4200return RValue::get(nullptr);4201}42024203case Builtin::BImemcpy:4204case Builtin::BI__builtin_memcpy:4205case Builtin::BImempcpy:4206case Builtin::BI__builtin_mempcpy: {4207Address Dest = EmitPointerWithAlignment(E->getArg(0));4208Address Src = EmitPointerWithAlignment(E->getArg(1));4209Value *SizeVal = EmitScalarExpr(E->getArg(2));4210EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);4211EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);4212Builder.CreateMemCpy(Dest, Src, SizeVal, false);4213if (BuiltinID == Builtin::BImempcpy ||4214BuiltinID == Builtin::BI__builtin_mempcpy)4215return RValue::get(Builder.CreateInBoundsGEP(4216Dest.getElementType(), Dest.emitRawPointer(*this), SizeVal));4217else4218return RValue::get(Dest, *this);4219}42204221case Builtin::BI__builtin_memcpy_inline: {4222Address Dest = EmitPointerWithAlignment(E->getArg(0));4223Address Src = EmitPointerWithAlignment(E->getArg(1));4224uint64_t Size =4225E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();4226EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);4227EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);4228Builder.CreateMemCpyInline(Dest, Src, Size);4229return RValue::get(nullptr);4230}42314232case Builtin::BI__builtin_char_memchr:4233BuiltinID = Builtin::BI__builtin_memchr;4234break;42354236case Builtin::BI__builtin___memcpy_chk: {4237// fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.4238Expr::EvalResult SizeResult, DstSizeResult;4239if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||4240!E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))4241break;4242llvm::APSInt Size = SizeResult.Val.getInt();4243llvm::APSInt DstSize = DstSizeResult.Val.getInt();4244if (Size.ugt(DstSize))4245break;4246Address Dest = EmitPointerWithAlignment(E->getArg(0));4247Address Src = EmitPointerWithAlignment(E->getArg(1));4248Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);4249Builder.CreateMemCpy(Dest, Src, SizeVal, false);4250return RValue::get(Dest, *this);4251}42524253case Builtin::BI__builtin_objc_memmove_collectable: {4254Address DestAddr = EmitPointerWithAlignment(E->getArg(0));4255Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));4256Value *SizeVal = EmitScalarExpr(E->getArg(2));4257CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this,4258DestAddr, SrcAddr, SizeVal);4259return RValue::get(DestAddr, *this);4260}42614262case Builtin::BI__builtin___memmove_chk: {4263// fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.4264Expr::EvalResult SizeResult, DstSizeResult;4265if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||4266!E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))4267break;4268llvm::APSInt Size = SizeResult.Val.getInt();4269llvm::APSInt DstSize = DstSizeResult.Val.getInt();4270if (Size.ugt(DstSize))4271break;4272Address Dest = EmitPointerWithAlignment(E->getArg(0));4273Address Src = EmitPointerWithAlignment(E->getArg(1));4274Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);4275Builder.CreateMemMove(Dest, Src, SizeVal, false);4276return RValue::get(Dest, *this);4277}42784279case Builtin::BImemmove:4280case Builtin::BI__builtin_memmove: {4281Address Dest = EmitPointerWithAlignment(E->getArg(0));4282Address Src = EmitPointerWithAlignment(E->getArg(1));4283Value *SizeVal = EmitScalarExpr(E->getArg(2));4284EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0);4285EmitArgCheck(TCK_Load, Src, E->getArg(1), 1);4286Builder.CreateMemMove(Dest, Src, SizeVal, false);4287return RValue::get(Dest, *this);4288}4289case Builtin::BImemset:4290case Builtin::BI__builtin_memset: {4291Address Dest = EmitPointerWithAlignment(E->getArg(0));4292Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),4293Builder.getInt8Ty());4294Value *SizeVal = EmitScalarExpr(E->getArg(2));4295EmitNonNullArgCheck(Dest, E->getArg(0)->getType(),4296E->getArg(0)->getExprLoc(), FD, 0);4297Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);4298return RValue::get(Dest, *this);4299}4300case Builtin::BI__builtin_memset_inline: {4301Address Dest = EmitPointerWithAlignment(E->getArg(0));4302Value *ByteVal =4303Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), Builder.getInt8Ty());4304uint64_t Size =4305E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue();4306EmitNonNullArgCheck(RValue::get(Dest.emitRawPointer(*this)),4307E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD,43080);4309Builder.CreateMemSetInline(Dest, ByteVal, Size);4310return RValue::get(nullptr);4311}4312case Builtin::BI__builtin___memset_chk: {4313// fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.4314Expr::EvalResult SizeResult, DstSizeResult;4315if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||4316!E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))4317break;4318llvm::APSInt Size = SizeResult.Val.getInt();4319llvm::APSInt DstSize = DstSizeResult.Val.getInt();4320if (Size.ugt(DstSize))4321break;4322Address Dest = EmitPointerWithAlignment(E->getArg(0));4323Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),4324Builder.getInt8Ty());4325Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);4326Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);4327return RValue::get(Dest, *this);4328}4329case Builtin::BI__builtin_wmemchr: {4330// The MSVC runtime library does not provide a definition of wmemchr, so we4331// need an inline implementation.4332if (!getTarget().getTriple().isOSMSVCRT())4333break;43344335llvm::Type *WCharTy = ConvertType(getContext().WCharTy);4336Value *Str = EmitScalarExpr(E->getArg(0));4337Value *Chr = EmitScalarExpr(E->getArg(1));4338Value *Size = EmitScalarExpr(E->getArg(2));43394340BasicBlock *Entry = Builder.GetInsertBlock();4341BasicBlock *CmpEq = createBasicBlock("wmemchr.eq");4342BasicBlock *Next = createBasicBlock("wmemchr.next");4343BasicBlock *Exit = createBasicBlock("wmemchr.exit");4344Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0));4345Builder.CreateCondBr(SizeEq0, Exit, CmpEq);43464347EmitBlock(CmpEq);4348PHINode *StrPhi = Builder.CreatePHI(Str->getType(), 2);4349StrPhi->addIncoming(Str, Entry);4350PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2);4351SizePhi->addIncoming(Size, Entry);4352CharUnits WCharAlign =4353getContext().getTypeAlignInChars(getContext().WCharTy);4354Value *StrCh = Builder.CreateAlignedLoad(WCharTy, StrPhi, WCharAlign);4355Value *FoundChr = Builder.CreateConstInBoundsGEP1_32(WCharTy, StrPhi, 0);4356Value *StrEqChr = Builder.CreateICmpEQ(StrCh, Chr);4357Builder.CreateCondBr(StrEqChr, Exit, Next);43584359EmitBlock(Next);4360Value *NextStr = Builder.CreateConstInBoundsGEP1_32(WCharTy, StrPhi, 1);4361Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1));4362Value *NextSizeEq0 =4363Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0));4364Builder.CreateCondBr(NextSizeEq0, Exit, CmpEq);4365StrPhi->addIncoming(NextStr, Next);4366SizePhi->addIncoming(NextSize, Next);43674368EmitBlock(Exit);4369PHINode *Ret = Builder.CreatePHI(Str->getType(), 3);4370Ret->addIncoming(llvm::Constant::getNullValue(Str->getType()), Entry);4371Ret->addIncoming(llvm::Constant::getNullValue(Str->getType()), Next);4372Ret->addIncoming(FoundChr, CmpEq);4373return RValue::get(Ret);4374}4375case Builtin::BI__builtin_wmemcmp: {4376// The MSVC runtime library does not provide a definition of wmemcmp, so we4377// need an inline implementation.4378if (!getTarget().getTriple().isOSMSVCRT())4379break;43804381llvm::Type *WCharTy = ConvertType(getContext().WCharTy);43824383Value *Dst = EmitScalarExpr(E->getArg(0));4384Value *Src = EmitScalarExpr(E->getArg(1));4385Value *Size = EmitScalarExpr(E->getArg(2));43864387BasicBlock *Entry = Builder.GetInsertBlock();4388BasicBlock *CmpGT = createBasicBlock("wmemcmp.gt");4389BasicBlock *CmpLT = createBasicBlock("wmemcmp.lt");4390BasicBlock *Next = createBasicBlock("wmemcmp.next");4391BasicBlock *Exit = createBasicBlock("wmemcmp.exit");4392Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0));4393Builder.CreateCondBr(SizeEq0, Exit, CmpGT);43944395EmitBlock(CmpGT);4396PHINode *DstPhi = Builder.CreatePHI(Dst->getType(), 2);4397DstPhi->addIncoming(Dst, Entry);4398PHINode *SrcPhi = Builder.CreatePHI(Src->getType(), 2);4399SrcPhi->addIncoming(Src, Entry);4400PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2);4401SizePhi->addIncoming(Size, Entry);4402CharUnits WCharAlign =4403getContext().getTypeAlignInChars(getContext().WCharTy);4404Value *DstCh = Builder.CreateAlignedLoad(WCharTy, DstPhi, WCharAlign);4405Value *SrcCh = Builder.CreateAlignedLoad(WCharTy, SrcPhi, WCharAlign);4406Value *DstGtSrc = Builder.CreateICmpUGT(DstCh, SrcCh);4407Builder.CreateCondBr(DstGtSrc, Exit, CmpLT);44084409EmitBlock(CmpLT);4410Value *DstLtSrc = Builder.CreateICmpULT(DstCh, SrcCh);4411Builder.CreateCondBr(DstLtSrc, Exit, Next);44124413EmitBlock(Next);4414Value *NextDst = Builder.CreateConstInBoundsGEP1_32(WCharTy, DstPhi, 1);4415Value *NextSrc = Builder.CreateConstInBoundsGEP1_32(WCharTy, SrcPhi, 1);4416Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1));4417Value *NextSizeEq0 =4418Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0));4419Builder.CreateCondBr(NextSizeEq0, Exit, CmpGT);4420DstPhi->addIncoming(NextDst, Next);4421SrcPhi->addIncoming(NextSrc, Next);4422SizePhi->addIncoming(NextSize, Next);44234424EmitBlock(Exit);4425PHINode *Ret = Builder.CreatePHI(IntTy, 4);4426Ret->addIncoming(ConstantInt::get(IntTy, 0), Entry);4427Ret->addIncoming(ConstantInt::get(IntTy, 1), CmpGT);4428Ret->addIncoming(ConstantInt::get(IntTy, -1), CmpLT);4429Ret->addIncoming(ConstantInt::get(IntTy, 0), Next);4430return RValue::get(Ret);4431}4432case Builtin::BI__builtin_dwarf_cfa: {4433// The offset in bytes from the first argument to the CFA.4434//4435// Why on earth is this in the frontend? Is there any reason at4436// all that the backend can't reasonably determine this while4437// lowering llvm.eh.dwarf.cfa()?4438//4439// TODO: If there's a satisfactory reason, add a target hook for4440// this instead of hard-coding 0, which is correct for most targets.4441int32_t Offset = 0;44424443Function *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);4444return RValue::get(Builder.CreateCall(F,4445llvm::ConstantInt::get(Int32Ty, Offset)));4446}4447case Builtin::BI__builtin_return_address: {4448Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),4449getContext().UnsignedIntTy);4450Function *F = CGM.getIntrinsic(Intrinsic::returnaddress);4451return RValue::get(Builder.CreateCall(F, Depth));4452}4453case Builtin::BI_ReturnAddress: {4454Function *F = CGM.getIntrinsic(Intrinsic::returnaddress);4455return RValue::get(Builder.CreateCall(F, Builder.getInt32(0)));4456}4457case Builtin::BI__builtin_frame_address: {4458Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),4459getContext().UnsignedIntTy);4460Function *F = CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy);4461return RValue::get(Builder.CreateCall(F, Depth));4462}4463case Builtin::BI__builtin_extract_return_addr: {4464Value *Address = EmitScalarExpr(E->getArg(0));4465Value *Result = getTargetHooks().decodeReturnAddress(*this, Address);4466return RValue::get(Result);4467}4468case Builtin::BI__builtin_frob_return_addr: {4469Value *Address = EmitScalarExpr(E->getArg(0));4470Value *Result = getTargetHooks().encodeReturnAddress(*this, Address);4471return RValue::get(Result);4472}4473case Builtin::BI__builtin_dwarf_sp_column: {4474llvm::IntegerType *Ty4475= cast<llvm::IntegerType>(ConvertType(E->getType()));4476int Column = getTargetHooks().getDwarfEHStackPointer(CGM);4477if (Column == -1) {4478CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");4479return RValue::get(llvm::UndefValue::get(Ty));4480}4481return RValue::get(llvm::ConstantInt::get(Ty, Column, true));4482}4483case Builtin::BI__builtin_init_dwarf_reg_size_table: {4484Value *Address = EmitScalarExpr(E->getArg(0));4485if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))4486CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");4487return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));4488}4489case Builtin::BI__builtin_eh_return: {4490Value *Int = EmitScalarExpr(E->getArg(0));4491Value *Ptr = EmitScalarExpr(E->getArg(1));44924493llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());4494assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&4495"LLVM's __builtin_eh_return only supports 32- and 64-bit variants");4496Function *F =4497CGM.getIntrinsic(IntTy->getBitWidth() == 32 ? Intrinsic::eh_return_i324498: Intrinsic::eh_return_i64);4499Builder.CreateCall(F, {Int, Ptr});4500Builder.CreateUnreachable();45014502// We do need to preserve an insertion point.4503EmitBlock(createBasicBlock("builtin_eh_return.cont"));45044505return RValue::get(nullptr);4506}4507case Builtin::BI__builtin_unwind_init: {4508Function *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);4509Builder.CreateCall(F);4510return RValue::get(nullptr);4511}4512case Builtin::BI__builtin_extend_pointer: {4513// Extends a pointer to the size of an _Unwind_Word, which is4514// uint64_t on all platforms. Generally this gets poked into a4515// register and eventually used as an address, so if the4516// addressing registers are wider than pointers and the platform4517// doesn't implicitly ignore high-order bits when doing4518// addressing, we need to make sure we zext / sext based on4519// the platform's expectations.4520//4521// See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html45224523// Cast the pointer to intptr_t.4524Value *Ptr = EmitScalarExpr(E->getArg(0));4525Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");45264527// If that's 64 bits, we're done.4528if (IntPtrTy->getBitWidth() == 64)4529return RValue::get(Result);45304531// Otherwise, ask the codegen data what to do.4532if (getTargetHooks().extendPointerWithSExt())4533return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));4534else4535return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));4536}4537case Builtin::BI__builtin_setjmp: {4538// Buffer is a void**.4539Address Buf = EmitPointerWithAlignment(E->getArg(0));45404541// Store the frame pointer to the setjmp buffer.4542Value *FrameAddr = Builder.CreateCall(4543CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy),4544ConstantInt::get(Int32Ty, 0));4545Builder.CreateStore(FrameAddr, Buf);45464547// Store the stack pointer to the setjmp buffer.4548Value *StackAddr = Builder.CreateStackSave();4549assert(Buf.emitRawPointer(*this)->getType() == StackAddr->getType());45504551Address StackSaveSlot = Builder.CreateConstInBoundsGEP(Buf, 2);4552Builder.CreateStore(StackAddr, StackSaveSlot);45534554// Call LLVM's EH setjmp, which is lightweight.4555Function *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);4556return RValue::get(Builder.CreateCall(F, Buf.emitRawPointer(*this)));4557}4558case Builtin::BI__builtin_longjmp: {4559Value *Buf = EmitScalarExpr(E->getArg(0));45604561// Call LLVM's EH longjmp, which is lightweight.4562Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);45634564// longjmp doesn't return; mark this as unreachable.4565Builder.CreateUnreachable();45664567// We do need to preserve an insertion point.4568EmitBlock(createBasicBlock("longjmp.cont"));45694570return RValue::get(nullptr);4571}4572case Builtin::BI__builtin_launder: {4573const Expr *Arg = E->getArg(0);4574QualType ArgTy = Arg->getType()->getPointeeType();4575Value *Ptr = EmitScalarExpr(Arg);4576if (TypeRequiresBuiltinLaunder(CGM, ArgTy))4577Ptr = Builder.CreateLaunderInvariantGroup(Ptr);45784579return RValue::get(Ptr);4580}4581case Builtin::BI__sync_fetch_and_add:4582case Builtin::BI__sync_fetch_and_sub:4583case Builtin::BI__sync_fetch_and_or:4584case Builtin::BI__sync_fetch_and_and:4585case Builtin::BI__sync_fetch_and_xor:4586case Builtin::BI__sync_fetch_and_nand:4587case Builtin::BI__sync_add_and_fetch:4588case Builtin::BI__sync_sub_and_fetch:4589case Builtin::BI__sync_and_and_fetch:4590case Builtin::BI__sync_or_and_fetch:4591case Builtin::BI__sync_xor_and_fetch:4592case Builtin::BI__sync_nand_and_fetch:4593case Builtin::BI__sync_val_compare_and_swap:4594case Builtin::BI__sync_bool_compare_and_swap:4595case Builtin::BI__sync_lock_test_and_set:4596case Builtin::BI__sync_lock_release:4597case Builtin::BI__sync_swap:4598llvm_unreachable("Shouldn't make it through sema");4599case Builtin::BI__sync_fetch_and_add_1:4600case Builtin::BI__sync_fetch_and_add_2:4601case Builtin::BI__sync_fetch_and_add_4:4602case Builtin::BI__sync_fetch_and_add_8:4603case Builtin::BI__sync_fetch_and_add_16:4604return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);4605case Builtin::BI__sync_fetch_and_sub_1:4606case Builtin::BI__sync_fetch_and_sub_2:4607case Builtin::BI__sync_fetch_and_sub_4:4608case Builtin::BI__sync_fetch_and_sub_8:4609case Builtin::BI__sync_fetch_and_sub_16:4610return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);4611case Builtin::BI__sync_fetch_and_or_1:4612case Builtin::BI__sync_fetch_and_or_2:4613case Builtin::BI__sync_fetch_and_or_4:4614case Builtin::BI__sync_fetch_and_or_8:4615case Builtin::BI__sync_fetch_and_or_16:4616return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);4617case Builtin::BI__sync_fetch_and_and_1:4618case Builtin::BI__sync_fetch_and_and_2:4619case Builtin::BI__sync_fetch_and_and_4:4620case Builtin::BI__sync_fetch_and_and_8:4621case Builtin::BI__sync_fetch_and_and_16:4622return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);4623case Builtin::BI__sync_fetch_and_xor_1:4624case Builtin::BI__sync_fetch_and_xor_2:4625case Builtin::BI__sync_fetch_and_xor_4:4626case Builtin::BI__sync_fetch_and_xor_8:4627case Builtin::BI__sync_fetch_and_xor_16:4628return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);4629case Builtin::BI__sync_fetch_and_nand_1:4630case Builtin::BI__sync_fetch_and_nand_2:4631case Builtin::BI__sync_fetch_and_nand_4:4632case Builtin::BI__sync_fetch_and_nand_8:4633case Builtin::BI__sync_fetch_and_nand_16:4634return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E);46354636// Clang extensions: not overloaded yet.4637case Builtin::BI__sync_fetch_and_min:4638return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);4639case Builtin::BI__sync_fetch_and_max:4640return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);4641case Builtin::BI__sync_fetch_and_umin:4642return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);4643case Builtin::BI__sync_fetch_and_umax:4644return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);46454646case Builtin::BI__sync_add_and_fetch_1:4647case Builtin::BI__sync_add_and_fetch_2:4648case Builtin::BI__sync_add_and_fetch_4:4649case Builtin::BI__sync_add_and_fetch_8:4650case Builtin::BI__sync_add_and_fetch_16:4651return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,4652llvm::Instruction::Add);4653case Builtin::BI__sync_sub_and_fetch_1:4654case Builtin::BI__sync_sub_and_fetch_2:4655case Builtin::BI__sync_sub_and_fetch_4:4656case Builtin::BI__sync_sub_and_fetch_8:4657case Builtin::BI__sync_sub_and_fetch_16:4658return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,4659llvm::Instruction::Sub);4660case Builtin::BI__sync_and_and_fetch_1:4661case Builtin::BI__sync_and_and_fetch_2:4662case Builtin::BI__sync_and_and_fetch_4:4663case Builtin::BI__sync_and_and_fetch_8:4664case Builtin::BI__sync_and_and_fetch_16:4665return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E,4666llvm::Instruction::And);4667case Builtin::BI__sync_or_and_fetch_1:4668case Builtin::BI__sync_or_and_fetch_2:4669case Builtin::BI__sync_or_and_fetch_4:4670case Builtin::BI__sync_or_and_fetch_8:4671case Builtin::BI__sync_or_and_fetch_16:4672return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,4673llvm::Instruction::Or);4674case Builtin::BI__sync_xor_and_fetch_1:4675case Builtin::BI__sync_xor_and_fetch_2:4676case Builtin::BI__sync_xor_and_fetch_4:4677case Builtin::BI__sync_xor_and_fetch_8:4678case Builtin::BI__sync_xor_and_fetch_16:4679return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,4680llvm::Instruction::Xor);4681case Builtin::BI__sync_nand_and_fetch_1:4682case Builtin::BI__sync_nand_and_fetch_2:4683case Builtin::BI__sync_nand_and_fetch_4:4684case Builtin::BI__sync_nand_and_fetch_8:4685case Builtin::BI__sync_nand_and_fetch_16:4686return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E,4687llvm::Instruction::And, true);46884689case Builtin::BI__sync_val_compare_and_swap_1:4690case Builtin::BI__sync_val_compare_and_swap_2:4691case Builtin::BI__sync_val_compare_and_swap_4:4692case Builtin::BI__sync_val_compare_and_swap_8:4693case Builtin::BI__sync_val_compare_and_swap_16:4694return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));46954696case Builtin::BI__sync_bool_compare_and_swap_1:4697case Builtin::BI__sync_bool_compare_and_swap_2:4698case Builtin::BI__sync_bool_compare_and_swap_4:4699case Builtin::BI__sync_bool_compare_and_swap_8:4700case Builtin::BI__sync_bool_compare_and_swap_16:4701return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));47024703case Builtin::BI__sync_swap_1:4704case Builtin::BI__sync_swap_2:4705case Builtin::BI__sync_swap_4:4706case Builtin::BI__sync_swap_8:4707case Builtin::BI__sync_swap_16:4708return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);47094710case Builtin::BI__sync_lock_test_and_set_1:4711case Builtin::BI__sync_lock_test_and_set_2:4712case Builtin::BI__sync_lock_test_and_set_4:4713case Builtin::BI__sync_lock_test_and_set_8:4714case Builtin::BI__sync_lock_test_and_set_16:4715return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);47164717case Builtin::BI__sync_lock_release_1:4718case Builtin::BI__sync_lock_release_2:4719case Builtin::BI__sync_lock_release_4:4720case Builtin::BI__sync_lock_release_8:4721case Builtin::BI__sync_lock_release_16: {4722Address Ptr = CheckAtomicAlignment(*this, E);4723QualType ElTy = E->getArg(0)->getType()->getPointeeType();47244725llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),4726getContext().getTypeSize(ElTy));4727llvm::StoreInst *Store =4728Builder.CreateStore(llvm::Constant::getNullValue(ITy), Ptr);4729Store->setAtomic(llvm::AtomicOrdering::Release);4730return RValue::get(nullptr);4731}47324733case Builtin::BI__sync_synchronize: {4734// We assume this is supposed to correspond to a C++0x-style4735// sequentially-consistent fence (i.e. this is only usable for4736// synchronization, not device I/O or anything like that). This intrinsic4737// is really badly designed in the sense that in theory, there isn't4738// any way to safely use it... but in practice, it mostly works4739// to use it with non-atomic loads and stores to get acquire/release4740// semantics.4741Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);4742return RValue::get(nullptr);4743}47444745case Builtin::BI__builtin_nontemporal_load:4746return RValue::get(EmitNontemporalLoad(*this, E));4747case Builtin::BI__builtin_nontemporal_store:4748return RValue::get(EmitNontemporalStore(*this, E));4749case Builtin::BI__c11_atomic_is_lock_free:4750case Builtin::BI__atomic_is_lock_free: {4751// Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the4752// __c11 builtin, ptr is 0 (indicating a properly-aligned object), since4753// _Atomic(T) is always properly-aligned.4754const char *LibCallName = "__atomic_is_lock_free";4755CallArgList Args;4756Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),4757getContext().getSizeType());4758if (BuiltinID == Builtin::BI__atomic_is_lock_free)4759Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),4760getContext().VoidPtrTy);4761else4762Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),4763getContext().VoidPtrTy);4764const CGFunctionInfo &FuncInfo =4765CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args);4766llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);4767llvm::FunctionCallee Func = CGM.CreateRuntimeFunction(FTy, LibCallName);4768return EmitCall(FuncInfo, CGCallee::forDirect(Func),4769ReturnValueSlot(), Args);4770}47714772case Builtin::BI__atomic_test_and_set: {4773// Look at the argument type to determine whether this is a volatile4774// operation. The parameter type is always volatile.4775QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();4776bool Volatile =4777PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();47784779Address Ptr =4780EmitPointerWithAlignment(E->getArg(0)).withElementType(Int8Ty);47814782Value *NewVal = Builder.getInt8(1);4783Value *Order = EmitScalarExpr(E->getArg(1));4784if (isa<llvm::ConstantInt>(Order)) {4785int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();4786AtomicRMWInst *Result = nullptr;4787switch (ord) {4788case 0: // memory_order_relaxed4789default: // invalid order4790Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,4791llvm::AtomicOrdering::Monotonic);4792break;4793case 1: // memory_order_consume4794case 2: // memory_order_acquire4795Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,4796llvm::AtomicOrdering::Acquire);4797break;4798case 3: // memory_order_release4799Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,4800llvm::AtomicOrdering::Release);4801break;4802case 4: // memory_order_acq_rel48034804Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,4805llvm::AtomicOrdering::AcquireRelease);4806break;4807case 5: // memory_order_seq_cst4808Result = Builder.CreateAtomicRMW(4809llvm::AtomicRMWInst::Xchg, Ptr, NewVal,4810llvm::AtomicOrdering::SequentiallyConsistent);4811break;4812}4813Result->setVolatile(Volatile);4814return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));4815}48164817llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);48184819llvm::BasicBlock *BBs[5] = {4820createBasicBlock("monotonic", CurFn),4821createBasicBlock("acquire", CurFn),4822createBasicBlock("release", CurFn),4823createBasicBlock("acqrel", CurFn),4824createBasicBlock("seqcst", CurFn)4825};4826llvm::AtomicOrdering Orders[5] = {4827llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire,4828llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease,4829llvm::AtomicOrdering::SequentiallyConsistent};48304831Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);4832llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);48334834Builder.SetInsertPoint(ContBB);4835PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");48364837for (unsigned i = 0; i < 5; ++i) {4838Builder.SetInsertPoint(BBs[i]);4839AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,4840Ptr, NewVal, Orders[i]);4841RMW->setVolatile(Volatile);4842Result->addIncoming(RMW, BBs[i]);4843Builder.CreateBr(ContBB);4844}48454846SI->addCase(Builder.getInt32(0), BBs[0]);4847SI->addCase(Builder.getInt32(1), BBs[1]);4848SI->addCase(Builder.getInt32(2), BBs[1]);4849SI->addCase(Builder.getInt32(3), BBs[2]);4850SI->addCase(Builder.getInt32(4), BBs[3]);4851SI->addCase(Builder.getInt32(5), BBs[4]);48524853Builder.SetInsertPoint(ContBB);4854return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));4855}48564857case Builtin::BI__atomic_clear: {4858QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();4859bool Volatile =4860PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();48614862Address Ptr = EmitPointerWithAlignment(E->getArg(0));4863Ptr = Ptr.withElementType(Int8Ty);4864Value *NewVal = Builder.getInt8(0);4865Value *Order = EmitScalarExpr(E->getArg(1));4866if (isa<llvm::ConstantInt>(Order)) {4867int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();4868StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);4869switch (ord) {4870case 0: // memory_order_relaxed4871default: // invalid order4872Store->setOrdering(llvm::AtomicOrdering::Monotonic);4873break;4874case 3: // memory_order_release4875Store->setOrdering(llvm::AtomicOrdering::Release);4876break;4877case 5: // memory_order_seq_cst4878Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent);4879break;4880}4881return RValue::get(nullptr);4882}48834884llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);48854886llvm::BasicBlock *BBs[3] = {4887createBasicBlock("monotonic", CurFn),4888createBasicBlock("release", CurFn),4889createBasicBlock("seqcst", CurFn)4890};4891llvm::AtomicOrdering Orders[3] = {4892llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release,4893llvm::AtomicOrdering::SequentiallyConsistent};48944895Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);4896llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);48974898for (unsigned i = 0; i < 3; ++i) {4899Builder.SetInsertPoint(BBs[i]);4900StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);4901Store->setOrdering(Orders[i]);4902Builder.CreateBr(ContBB);4903}49044905SI->addCase(Builder.getInt32(0), BBs[0]);4906SI->addCase(Builder.getInt32(3), BBs[1]);4907SI->addCase(Builder.getInt32(5), BBs[2]);49084909Builder.SetInsertPoint(ContBB);4910return RValue::get(nullptr);4911}49124913case Builtin::BI__atomic_thread_fence:4914case Builtin::BI__atomic_signal_fence:4915case Builtin::BI__c11_atomic_thread_fence:4916case Builtin::BI__c11_atomic_signal_fence: {4917llvm::SyncScope::ID SSID;4918if (BuiltinID == Builtin::BI__atomic_signal_fence ||4919BuiltinID == Builtin::BI__c11_atomic_signal_fence)4920SSID = llvm::SyncScope::SingleThread;4921else4922SSID = llvm::SyncScope::System;4923Value *Order = EmitScalarExpr(E->getArg(0));4924if (isa<llvm::ConstantInt>(Order)) {4925int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();4926switch (ord) {4927case 0: // memory_order_relaxed4928default: // invalid order4929break;4930case 1: // memory_order_consume4931case 2: // memory_order_acquire4932Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);4933break;4934case 3: // memory_order_release4935Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);4936break;4937case 4: // memory_order_acq_rel4938Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);4939break;4940case 5: // memory_order_seq_cst4941Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);4942break;4943}4944return RValue::get(nullptr);4945}49464947llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;4948AcquireBB = createBasicBlock("acquire", CurFn);4949ReleaseBB = createBasicBlock("release", CurFn);4950AcqRelBB = createBasicBlock("acqrel", CurFn);4951SeqCstBB = createBasicBlock("seqcst", CurFn);4952llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);49534954Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);4955llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);49564957Builder.SetInsertPoint(AcquireBB);4958Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);4959Builder.CreateBr(ContBB);4960SI->addCase(Builder.getInt32(1), AcquireBB);4961SI->addCase(Builder.getInt32(2), AcquireBB);49624963Builder.SetInsertPoint(ReleaseBB);4964Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);4965Builder.CreateBr(ContBB);4966SI->addCase(Builder.getInt32(3), ReleaseBB);49674968Builder.SetInsertPoint(AcqRelBB);4969Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);4970Builder.CreateBr(ContBB);4971SI->addCase(Builder.getInt32(4), AcqRelBB);49724973Builder.SetInsertPoint(SeqCstBB);4974Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);4975Builder.CreateBr(ContBB);4976SI->addCase(Builder.getInt32(5), SeqCstBB);49774978Builder.SetInsertPoint(ContBB);4979return RValue::get(nullptr);4980}49814982case Builtin::BI__builtin_signbit:4983case Builtin::BI__builtin_signbitf:4984case Builtin::BI__builtin_signbitl: {4985return RValue::get(4986Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),4987ConvertType(E->getType())));4988}4989case Builtin::BI__warn_memset_zero_len:4990return RValue::getIgnored();4991case Builtin::BI__annotation: {4992// Re-encode each wide string to UTF8 and make an MDString.4993SmallVector<Metadata *, 1> Strings;4994for (const Expr *Arg : E->arguments()) {4995const auto *Str = cast<StringLiteral>(Arg->IgnoreParenCasts());4996assert(Str->getCharByteWidth() == 2);4997StringRef WideBytes = Str->getBytes();4998std::string StrUtf8;4999if (!convertUTF16ToUTF8String(5000ArrayRef(WideBytes.data(), WideBytes.size()), StrUtf8)) {5001CGM.ErrorUnsupported(E, "non-UTF16 __annotation argument");5002continue;5003}5004Strings.push_back(llvm::MDString::get(getLLVMContext(), StrUtf8));5005}50065007// Build and MDTuple of MDStrings and emit the intrinsic call.5008llvm::Function *F =5009CGM.getIntrinsic(llvm::Intrinsic::codeview_annotation, {});5010MDTuple *StrTuple = MDTuple::get(getLLVMContext(), Strings);5011Builder.CreateCall(F, MetadataAsValue::get(getLLVMContext(), StrTuple));5012return RValue::getIgnored();5013}5014case Builtin::BI__builtin_annotation: {5015llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));5016llvm::Function *F =5017CGM.getIntrinsic(llvm::Intrinsic::annotation,5018{AnnVal->getType(), CGM.ConstGlobalsPtrTy});50195020// Get the annotation string, go through casts. Sema requires this to be a5021// non-wide string literal, potentially casted, so the cast<> is safe.5022const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();5023StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();5024return RValue::get(5025EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc(), nullptr));5026}5027case Builtin::BI__builtin_addcb:5028case Builtin::BI__builtin_addcs:5029case Builtin::BI__builtin_addc:5030case Builtin::BI__builtin_addcl:5031case Builtin::BI__builtin_addcll:5032case Builtin::BI__builtin_subcb:5033case Builtin::BI__builtin_subcs:5034case Builtin::BI__builtin_subc:5035case Builtin::BI__builtin_subcl:5036case Builtin::BI__builtin_subcll: {50375038// We translate all of these builtins from expressions of the form:5039// int x = ..., y = ..., carryin = ..., carryout, result;5040// result = __builtin_addc(x, y, carryin, &carryout);5041//5042// to LLVM IR of the form:5043//5044// %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)5045// %tmpsum1 = extractvalue {i32, i1} %tmp1, 05046// %carry1 = extractvalue {i32, i1} %tmp1, 15047// %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,5048// i32 %carryin)5049// %result = extractvalue {i32, i1} %tmp2, 05050// %carry2 = extractvalue {i32, i1} %tmp2, 15051// %tmp3 = or i1 %carry1, %carry25052// %tmp4 = zext i1 %tmp3 to i325053// store i32 %tmp4, i32* %carryout50545055// Scalarize our inputs.5056llvm::Value *X = EmitScalarExpr(E->getArg(0));5057llvm::Value *Y = EmitScalarExpr(E->getArg(1));5058llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));5059Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));50605061// Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.5062llvm::Intrinsic::ID IntrinsicId;5063switch (BuiltinID) {5064default: llvm_unreachable("Unknown multiprecision builtin id.");5065case Builtin::BI__builtin_addcb:5066case Builtin::BI__builtin_addcs:5067case Builtin::BI__builtin_addc:5068case Builtin::BI__builtin_addcl:5069case Builtin::BI__builtin_addcll:5070IntrinsicId = llvm::Intrinsic::uadd_with_overflow;5071break;5072case Builtin::BI__builtin_subcb:5073case Builtin::BI__builtin_subcs:5074case Builtin::BI__builtin_subc:5075case Builtin::BI__builtin_subcl:5076case Builtin::BI__builtin_subcll:5077IntrinsicId = llvm::Intrinsic::usub_with_overflow;5078break;5079}50805081// Construct our resulting LLVM IR expression.5082llvm::Value *Carry1;5083llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,5084X, Y, Carry1);5085llvm::Value *Carry2;5086llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,5087Sum1, Carryin, Carry2);5088llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),5089X->getType());5090Builder.CreateStore(CarryOut, CarryOutPtr);5091return RValue::get(Sum2);5092}50935094case Builtin::BI__builtin_add_overflow:5095case Builtin::BI__builtin_sub_overflow:5096case Builtin::BI__builtin_mul_overflow: {5097const clang::Expr *LeftArg = E->getArg(0);5098const clang::Expr *RightArg = E->getArg(1);5099const clang::Expr *ResultArg = E->getArg(2);51005101clang::QualType ResultQTy =5102ResultArg->getType()->castAs<PointerType>()->getPointeeType();51035104WidthAndSignedness LeftInfo =5105getIntegerWidthAndSignedness(CGM.getContext(), LeftArg->getType());5106WidthAndSignedness RightInfo =5107getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType());5108WidthAndSignedness ResultInfo =5109getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy);51105111// Handle mixed-sign multiplication as a special case, because adding5112// runtime or backend support for our generic irgen would be too expensive.5113if (isSpecialMixedSignMultiply(BuiltinID, LeftInfo, RightInfo, ResultInfo))5114return EmitCheckedMixedSignMultiply(*this, LeftArg, LeftInfo, RightArg,5115RightInfo, ResultArg, ResultQTy,5116ResultInfo);51175118if (isSpecialUnsignedMultiplySignedResult(BuiltinID, LeftInfo, RightInfo,5119ResultInfo))5120return EmitCheckedUnsignedMultiplySignedResult(5121*this, LeftArg, LeftInfo, RightArg, RightInfo, ResultArg, ResultQTy,5122ResultInfo);51235124WidthAndSignedness EncompassingInfo =5125EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});51265127llvm::Type *EncompassingLLVMTy =5128llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width);51295130llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy);51315132llvm::Intrinsic::ID IntrinsicId;5133switch (BuiltinID) {5134default:5135llvm_unreachable("Unknown overflow builtin id.");5136case Builtin::BI__builtin_add_overflow:5137IntrinsicId = EncompassingInfo.Signed5138? llvm::Intrinsic::sadd_with_overflow5139: llvm::Intrinsic::uadd_with_overflow;5140break;5141case Builtin::BI__builtin_sub_overflow:5142IntrinsicId = EncompassingInfo.Signed5143? llvm::Intrinsic::ssub_with_overflow5144: llvm::Intrinsic::usub_with_overflow;5145break;5146case Builtin::BI__builtin_mul_overflow:5147IntrinsicId = EncompassingInfo.Signed5148? llvm::Intrinsic::smul_with_overflow5149: llvm::Intrinsic::umul_with_overflow;5150break;5151}51525153llvm::Value *Left = EmitScalarExpr(LeftArg);5154llvm::Value *Right = EmitScalarExpr(RightArg);5155Address ResultPtr = EmitPointerWithAlignment(ResultArg);51565157// Extend each operand to the encompassing type.5158Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed);5159Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed);51605161// Perform the operation on the extended values.5162llvm::Value *Overflow, *Result;5163Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow);51645165if (EncompassingInfo.Width > ResultInfo.Width) {5166// The encompassing type is wider than the result type, so we need to5167// truncate it.5168llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy);51695170// To see if the truncation caused an overflow, we will extend5171// the result and then compare it to the original result.5172llvm::Value *ResultTruncExt = Builder.CreateIntCast(5173ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed);5174llvm::Value *TruncationOverflow =5175Builder.CreateICmpNE(Result, ResultTruncExt);51765177Overflow = Builder.CreateOr(Overflow, TruncationOverflow);5178Result = ResultTrunc;5179}51805181// Finally, store the result using the pointer.5182bool isVolatile =5183ResultArg->getType()->getPointeeType().isVolatileQualified();5184Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile);51855186return RValue::get(Overflow);5187}51885189case Builtin::BI__builtin_uadd_overflow:5190case Builtin::BI__builtin_uaddl_overflow:5191case Builtin::BI__builtin_uaddll_overflow:5192case Builtin::BI__builtin_usub_overflow:5193case Builtin::BI__builtin_usubl_overflow:5194case Builtin::BI__builtin_usubll_overflow:5195case Builtin::BI__builtin_umul_overflow:5196case Builtin::BI__builtin_umull_overflow:5197case Builtin::BI__builtin_umulll_overflow:5198case Builtin::BI__builtin_sadd_overflow:5199case Builtin::BI__builtin_saddl_overflow:5200case Builtin::BI__builtin_saddll_overflow:5201case Builtin::BI__builtin_ssub_overflow:5202case Builtin::BI__builtin_ssubl_overflow:5203case Builtin::BI__builtin_ssubll_overflow:5204case Builtin::BI__builtin_smul_overflow:5205case Builtin::BI__builtin_smull_overflow:5206case Builtin::BI__builtin_smulll_overflow: {52075208// We translate all of these builtins directly to the relevant llvm IR node.52095210// Scalarize our inputs.5211llvm::Value *X = EmitScalarExpr(E->getArg(0));5212llvm::Value *Y = EmitScalarExpr(E->getArg(1));5213Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));52145215// Decide which of the overflow intrinsics we are lowering to:5216llvm::Intrinsic::ID IntrinsicId;5217switch (BuiltinID) {5218default: llvm_unreachable("Unknown overflow builtin id.");5219case Builtin::BI__builtin_uadd_overflow:5220case Builtin::BI__builtin_uaddl_overflow:5221case Builtin::BI__builtin_uaddll_overflow:5222IntrinsicId = llvm::Intrinsic::uadd_with_overflow;5223break;5224case Builtin::BI__builtin_usub_overflow:5225case Builtin::BI__builtin_usubl_overflow:5226case Builtin::BI__builtin_usubll_overflow:5227IntrinsicId = llvm::Intrinsic::usub_with_overflow;5228break;5229case Builtin::BI__builtin_umul_overflow:5230case Builtin::BI__builtin_umull_overflow:5231case Builtin::BI__builtin_umulll_overflow:5232IntrinsicId = llvm::Intrinsic::umul_with_overflow;5233break;5234case Builtin::BI__builtin_sadd_overflow:5235case Builtin::BI__builtin_saddl_overflow:5236case Builtin::BI__builtin_saddll_overflow:5237IntrinsicId = llvm::Intrinsic::sadd_with_overflow;5238break;5239case Builtin::BI__builtin_ssub_overflow:5240case Builtin::BI__builtin_ssubl_overflow:5241case Builtin::BI__builtin_ssubll_overflow:5242IntrinsicId = llvm::Intrinsic::ssub_with_overflow;5243break;5244case Builtin::BI__builtin_smul_overflow:5245case Builtin::BI__builtin_smull_overflow:5246case Builtin::BI__builtin_smulll_overflow:5247IntrinsicId = llvm::Intrinsic::smul_with_overflow;5248break;5249}525052515252llvm::Value *Carry;5253llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);5254Builder.CreateStore(Sum, SumOutPtr);52555256return RValue::get(Carry);5257}5258case Builtin::BIaddressof:5259case Builtin::BI__addressof:5260case Builtin::BI__builtin_addressof:5261return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this));5262case Builtin::BI__builtin_function_start:5263return RValue::get(CGM.GetFunctionStart(5264E->getArg(0)->getAsBuiltinConstantDeclRef(CGM.getContext())));5265case Builtin::BI__builtin_operator_new:5266return EmitBuiltinNewDeleteCall(5267E->getCallee()->getType()->castAs<FunctionProtoType>(), E, false);5268case Builtin::BI__builtin_operator_delete:5269EmitBuiltinNewDeleteCall(5270E->getCallee()->getType()->castAs<FunctionProtoType>(), E, true);5271return RValue::get(nullptr);52725273case Builtin::BI__builtin_is_aligned:5274return EmitBuiltinIsAligned(E);5275case Builtin::BI__builtin_align_up:5276return EmitBuiltinAlignTo(E, true);5277case Builtin::BI__builtin_align_down:5278return EmitBuiltinAlignTo(E, false);52795280case Builtin::BI__noop:5281// __noop always evaluates to an integer literal zero.5282return RValue::get(ConstantInt::get(IntTy, 0));5283case Builtin::BI__builtin_call_with_static_chain: {5284const CallExpr *Call = cast<CallExpr>(E->getArg(0));5285const Expr *Chain = E->getArg(1);5286return EmitCall(Call->getCallee()->getType(),5287EmitCallee(Call->getCallee()), Call, ReturnValue,5288EmitScalarExpr(Chain));5289}5290case Builtin::BI_InterlockedExchange8:5291case Builtin::BI_InterlockedExchange16:5292case Builtin::BI_InterlockedExchange:5293case Builtin::BI_InterlockedExchangePointer:5294return RValue::get(5295EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E));5296case Builtin::BI_InterlockedCompareExchangePointer:5297case Builtin::BI_InterlockedCompareExchangePointer_nf: {5298llvm::Type *RTy;5299llvm::IntegerType *IntType = IntegerType::get(5300getLLVMContext(), getContext().getTypeSize(E->getType()));53015302Address DestAddr = CheckAtomicAlignment(*this, E);53035304llvm::Value *Exchange = EmitScalarExpr(E->getArg(1));5305RTy = Exchange->getType();5306Exchange = Builder.CreatePtrToInt(Exchange, IntType);53075308llvm::Value *Comparand =5309Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType);53105311auto Ordering =5312BuiltinID == Builtin::BI_InterlockedCompareExchangePointer_nf ?5313AtomicOrdering::Monotonic : AtomicOrdering::SequentiallyConsistent;53145315auto Result = Builder.CreateAtomicCmpXchg(DestAddr, Comparand, Exchange,5316Ordering, Ordering);5317Result->setVolatile(true);53185319return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result,53200),5321RTy));5322}5323case Builtin::BI_InterlockedCompareExchange8:5324case Builtin::BI_InterlockedCompareExchange16:5325case Builtin::BI_InterlockedCompareExchange:5326case Builtin::BI_InterlockedCompareExchange64:5327return RValue::get(EmitAtomicCmpXchgForMSIntrin(*this, E));5328case Builtin::BI_InterlockedIncrement16:5329case Builtin::BI_InterlockedIncrement:5330return RValue::get(5331EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E));5332case Builtin::BI_InterlockedDecrement16:5333case Builtin::BI_InterlockedDecrement:5334return RValue::get(5335EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E));5336case Builtin::BI_InterlockedAnd8:5337case Builtin::BI_InterlockedAnd16:5338case Builtin::BI_InterlockedAnd:5339return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E));5340case Builtin::BI_InterlockedExchangeAdd8:5341case Builtin::BI_InterlockedExchangeAdd16:5342case Builtin::BI_InterlockedExchangeAdd:5343return RValue::get(5344EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E));5345case Builtin::BI_InterlockedExchangeSub8:5346case Builtin::BI_InterlockedExchangeSub16:5347case Builtin::BI_InterlockedExchangeSub:5348return RValue::get(5349EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E));5350case Builtin::BI_InterlockedOr8:5351case Builtin::BI_InterlockedOr16:5352case Builtin::BI_InterlockedOr:5353return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E));5354case Builtin::BI_InterlockedXor8:5355case Builtin::BI_InterlockedXor16:5356case Builtin::BI_InterlockedXor:5357return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E));53585359case Builtin::BI_bittest64:5360case Builtin::BI_bittest:5361case Builtin::BI_bittestandcomplement64:5362case Builtin::BI_bittestandcomplement:5363case Builtin::BI_bittestandreset64:5364case Builtin::BI_bittestandreset:5365case Builtin::BI_bittestandset64:5366case Builtin::BI_bittestandset:5367case Builtin::BI_interlockedbittestandreset:5368case Builtin::BI_interlockedbittestandreset64:5369case Builtin::BI_interlockedbittestandset64:5370case Builtin::BI_interlockedbittestandset:5371case Builtin::BI_interlockedbittestandset_acq:5372case Builtin::BI_interlockedbittestandset_rel:5373case Builtin::BI_interlockedbittestandset_nf:5374case Builtin::BI_interlockedbittestandreset_acq:5375case Builtin::BI_interlockedbittestandreset_rel:5376case Builtin::BI_interlockedbittestandreset_nf:5377return RValue::get(EmitBitTestIntrinsic(*this, BuiltinID, E));53785379// These builtins exist to emit regular volatile loads and stores not5380// affected by the -fms-volatile setting.5381case Builtin::BI__iso_volatile_load8:5382case Builtin::BI__iso_volatile_load16:5383case Builtin::BI__iso_volatile_load32:5384case Builtin::BI__iso_volatile_load64:5385return RValue::get(EmitISOVolatileLoad(*this, E));5386case Builtin::BI__iso_volatile_store8:5387case Builtin::BI__iso_volatile_store16:5388case Builtin::BI__iso_volatile_store32:5389case Builtin::BI__iso_volatile_store64:5390return RValue::get(EmitISOVolatileStore(*this, E));53915392case Builtin::BI__builtin_ptrauth_sign_constant:5393return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType()));53945395case Builtin::BI__builtin_ptrauth_auth:5396case Builtin::BI__builtin_ptrauth_auth_and_resign:5397case Builtin::BI__builtin_ptrauth_blend_discriminator:5398case Builtin::BI__builtin_ptrauth_sign_generic_data:5399case Builtin::BI__builtin_ptrauth_sign_unauthenticated:5400case Builtin::BI__builtin_ptrauth_strip: {5401// Emit the arguments.5402SmallVector<llvm::Value *, 5> Args;5403for (auto argExpr : E->arguments())5404Args.push_back(EmitScalarExpr(argExpr));54055406// Cast the value to intptr_t, saving its original type.5407llvm::Type *OrigValueType = Args[0]->getType();5408if (OrigValueType->isPointerTy())5409Args[0] = Builder.CreatePtrToInt(Args[0], IntPtrTy);54105411switch (BuiltinID) {5412case Builtin::BI__builtin_ptrauth_auth_and_resign:5413if (Args[4]->getType()->isPointerTy())5414Args[4] = Builder.CreatePtrToInt(Args[4], IntPtrTy);5415[[fallthrough]];54165417case Builtin::BI__builtin_ptrauth_auth:5418case Builtin::BI__builtin_ptrauth_sign_unauthenticated:5419if (Args[2]->getType()->isPointerTy())5420Args[2] = Builder.CreatePtrToInt(Args[2], IntPtrTy);5421break;54225423case Builtin::BI__builtin_ptrauth_sign_generic_data:5424if (Args[1]->getType()->isPointerTy())5425Args[1] = Builder.CreatePtrToInt(Args[1], IntPtrTy);5426break;54275428case Builtin::BI__builtin_ptrauth_blend_discriminator:5429case Builtin::BI__builtin_ptrauth_strip:5430break;5431}54325433// Call the intrinsic.5434auto IntrinsicID = [&]() -> unsigned {5435switch (BuiltinID) {5436case Builtin::BI__builtin_ptrauth_auth:5437return llvm::Intrinsic::ptrauth_auth;5438case Builtin::BI__builtin_ptrauth_auth_and_resign:5439return llvm::Intrinsic::ptrauth_resign;5440case Builtin::BI__builtin_ptrauth_blend_discriminator:5441return llvm::Intrinsic::ptrauth_blend;5442case Builtin::BI__builtin_ptrauth_sign_generic_data:5443return llvm::Intrinsic::ptrauth_sign_generic;5444case Builtin::BI__builtin_ptrauth_sign_unauthenticated:5445return llvm::Intrinsic::ptrauth_sign;5446case Builtin::BI__builtin_ptrauth_strip:5447return llvm::Intrinsic::ptrauth_strip;5448}5449llvm_unreachable("bad ptrauth intrinsic");5450}();5451auto Intrinsic = CGM.getIntrinsic(IntrinsicID);5452llvm::Value *Result = EmitRuntimeCall(Intrinsic, Args);54535454if (BuiltinID != Builtin::BI__builtin_ptrauth_sign_generic_data &&5455BuiltinID != Builtin::BI__builtin_ptrauth_blend_discriminator &&5456OrigValueType->isPointerTy()) {5457Result = Builder.CreateIntToPtr(Result, OrigValueType);5458}5459return RValue::get(Result);5460}54615462case Builtin::BI__exception_code:5463case Builtin::BI_exception_code:5464return RValue::get(EmitSEHExceptionCode());5465case Builtin::BI__exception_info:5466case Builtin::BI_exception_info:5467return RValue::get(EmitSEHExceptionInfo());5468case Builtin::BI__abnormal_termination:5469case Builtin::BI_abnormal_termination:5470return RValue::get(EmitSEHAbnormalTermination());5471case Builtin::BI_setjmpex:5472if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 1 &&5473E->getArg(0)->getType()->isPointerType())5474return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);5475break;5476case Builtin::BI_setjmp:5477if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 1 &&5478E->getArg(0)->getType()->isPointerType()) {5479if (getTarget().getTriple().getArch() == llvm::Triple::x86)5480return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp3, E);5481else if (getTarget().getTriple().getArch() == llvm::Triple::aarch64)5482return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);5483return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp, E);5484}5485break;54865487// C++ std:: builtins.5488case Builtin::BImove:5489case Builtin::BImove_if_noexcept:5490case Builtin::BIforward:5491case Builtin::BIforward_like:5492case Builtin::BIas_const:5493return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this));5494case Builtin::BI__GetExceptionInfo: {5495if (llvm::GlobalVariable *GV =5496CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType()))5497return RValue::get(GV);5498break;5499}55005501case Builtin::BI__fastfail:5502return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E));55035504case Builtin::BI__builtin_coro_id:5505return EmitCoroutineIntrinsic(E, Intrinsic::coro_id);5506case Builtin::BI__builtin_coro_promise:5507return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise);5508case Builtin::BI__builtin_coro_resume:5509EmitCoroutineIntrinsic(E, Intrinsic::coro_resume);5510return RValue::get(nullptr);5511case Builtin::BI__builtin_coro_frame:5512return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame);5513case Builtin::BI__builtin_coro_noop:5514return EmitCoroutineIntrinsic(E, Intrinsic::coro_noop);5515case Builtin::BI__builtin_coro_free:5516return EmitCoroutineIntrinsic(E, Intrinsic::coro_free);5517case Builtin::BI__builtin_coro_destroy:5518EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy);5519return RValue::get(nullptr);5520case Builtin::BI__builtin_coro_done:5521return EmitCoroutineIntrinsic(E, Intrinsic::coro_done);5522case Builtin::BI__builtin_coro_alloc:5523return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc);5524case Builtin::BI__builtin_coro_begin:5525return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin);5526case Builtin::BI__builtin_coro_end:5527return EmitCoroutineIntrinsic(E, Intrinsic::coro_end);5528case Builtin::BI__builtin_coro_suspend:5529return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend);5530case Builtin::BI__builtin_coro_size:5531return EmitCoroutineIntrinsic(E, Intrinsic::coro_size);5532case Builtin::BI__builtin_coro_align:5533return EmitCoroutineIntrinsic(E, Intrinsic::coro_align);55345535// OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions5536case Builtin::BIread_pipe:5537case Builtin::BIwrite_pipe: {5538Value *Arg0 = EmitScalarExpr(E->getArg(0)),5539*Arg1 = EmitScalarExpr(E->getArg(1));5540CGOpenCLRuntime OpenCLRT(CGM);5541Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));5542Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));55435544// Type of the generic packet parameter.5545unsigned GenericAS =5546getContext().getTargetAddressSpace(LangAS::opencl_generic);5547llvm::Type *I8PTy = llvm::PointerType::get(getLLVMContext(), GenericAS);55485549// Testing which overloaded version we should generate the call for.5550if (2U == E->getNumArgs()) {5551const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"5552: "__write_pipe_2";5553// Creating a generic function type to be able to call with any builtin or5554// user defined type.5555llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty};5556llvm::FunctionType *FTy = llvm::FunctionType::get(5557Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);5558Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy);5559return RValue::get(5560EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),5561{Arg0, BCast, PacketSize, PacketAlign}));5562} else {5563assert(4 == E->getNumArgs() &&5564"Illegal number of parameters to pipe function");5565const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"5566: "__write_pipe_4";55675568llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy,5569Int32Ty, Int32Ty};5570Value *Arg2 = EmitScalarExpr(E->getArg(2)),5571*Arg3 = EmitScalarExpr(E->getArg(3));5572llvm::FunctionType *FTy = llvm::FunctionType::get(5573Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);5574Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy);5575// We know the third argument is an integer type, but we may need to cast5576// it to i32.5577if (Arg2->getType() != Int32Ty)5578Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);5579return RValue::get(5580EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),5581{Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign}));5582}5583}5584// OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write5585// functions5586case Builtin::BIreserve_read_pipe:5587case Builtin::BIreserve_write_pipe:5588case Builtin::BIwork_group_reserve_read_pipe:5589case Builtin::BIwork_group_reserve_write_pipe:5590case Builtin::BIsub_group_reserve_read_pipe:5591case Builtin::BIsub_group_reserve_write_pipe: {5592// Composing the mangled name for the function.5593const char *Name;5594if (BuiltinID == Builtin::BIreserve_read_pipe)5595Name = "__reserve_read_pipe";5596else if (BuiltinID == Builtin::BIreserve_write_pipe)5597Name = "__reserve_write_pipe";5598else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe)5599Name = "__work_group_reserve_read_pipe";5600else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe)5601Name = "__work_group_reserve_write_pipe";5602else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe)5603Name = "__sub_group_reserve_read_pipe";5604else5605Name = "__sub_group_reserve_write_pipe";56065607Value *Arg0 = EmitScalarExpr(E->getArg(0)),5608*Arg1 = EmitScalarExpr(E->getArg(1));5609llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy);5610CGOpenCLRuntime OpenCLRT(CGM);5611Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));5612Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));56135614// Building the generic function prototype.5615llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty};5616llvm::FunctionType *FTy = llvm::FunctionType::get(5617ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false);5618// We know the second argument is an integer type, but we may need to cast5619// it to i32.5620if (Arg1->getType() != Int32Ty)5621Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);5622return RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),5623{Arg0, Arg1, PacketSize, PacketAlign}));5624}5625// OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write5626// functions5627case Builtin::BIcommit_read_pipe:5628case Builtin::BIcommit_write_pipe:5629case Builtin::BIwork_group_commit_read_pipe:5630case Builtin::BIwork_group_commit_write_pipe:5631case Builtin::BIsub_group_commit_read_pipe:5632case Builtin::BIsub_group_commit_write_pipe: {5633const char *Name;5634if (BuiltinID == Builtin::BIcommit_read_pipe)5635Name = "__commit_read_pipe";5636else if (BuiltinID == Builtin::BIcommit_write_pipe)5637Name = "__commit_write_pipe";5638else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe)5639Name = "__work_group_commit_read_pipe";5640else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe)5641Name = "__work_group_commit_write_pipe";5642else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe)5643Name = "__sub_group_commit_read_pipe";5644else5645Name = "__sub_group_commit_write_pipe";56465647Value *Arg0 = EmitScalarExpr(E->getArg(0)),5648*Arg1 = EmitScalarExpr(E->getArg(1));5649CGOpenCLRuntime OpenCLRT(CGM);5650Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));5651Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));56525653// Building the generic function prototype.5654llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty};5655llvm::FunctionType *FTy =5656llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),5657llvm::ArrayRef<llvm::Type *>(ArgTys), false);56585659return RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),5660{Arg0, Arg1, PacketSize, PacketAlign}));5661}5662// OpenCL v2.0 s6.13.16.4 Built-in pipe query functions5663case Builtin::BIget_pipe_num_packets:5664case Builtin::BIget_pipe_max_packets: {5665const char *BaseName;5666const auto *PipeTy = E->getArg(0)->getType()->castAs<PipeType>();5667if (BuiltinID == Builtin::BIget_pipe_num_packets)5668BaseName = "__get_pipe_num_packets";5669else5670BaseName = "__get_pipe_max_packets";5671std::string Name = std::string(BaseName) +5672std::string(PipeTy->isReadOnly() ? "_ro" : "_wo");56735674// Building the generic function prototype.5675Value *Arg0 = EmitScalarExpr(E->getArg(0));5676CGOpenCLRuntime OpenCLRT(CGM);5677Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));5678Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));5679llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty};5680llvm::FunctionType *FTy = llvm::FunctionType::get(5681Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);56825683return RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),5684{Arg0, PacketSize, PacketAlign}));5685}56865687// OpenCL v2.0 s6.13.9 - Address space qualifier functions.5688case Builtin::BIto_global:5689case Builtin::BIto_local:5690case Builtin::BIto_private: {5691auto Arg0 = EmitScalarExpr(E->getArg(0));5692auto NewArgT = llvm::PointerType::get(5693getLLVMContext(),5694CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));5695auto NewRetT = llvm::PointerType::get(5696getLLVMContext(),5697CGM.getContext().getTargetAddressSpace(5698E->getType()->getPointeeType().getAddressSpace()));5699auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false);5700llvm::Value *NewArg;5701if (Arg0->getType()->getPointerAddressSpace() !=5702NewArgT->getPointerAddressSpace())5703NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT);5704else5705NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT);5706auto NewName = std::string("__") + E->getDirectCallee()->getName().str();5707auto NewCall =5708EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg});5709return RValue::get(Builder.CreateBitOrPointerCast(NewCall,5710ConvertType(E->getType())));5711}57125713// OpenCL v2.0, s6.13.17 - Enqueue kernel function.5714// Table 6.13.17.1 specifies four overload forms of enqueue_kernel.5715// The code below expands the builtin call to a call to one of the following5716// functions that an OpenCL runtime library will have to provide:5717// __enqueue_kernel_basic5718// __enqueue_kernel_varargs5719// __enqueue_kernel_basic_events5720// __enqueue_kernel_events_varargs5721case Builtin::BIenqueue_kernel: {5722StringRef Name; // Generated function call name5723unsigned NumArgs = E->getNumArgs();57245725llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);5726llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(5727getContext().getTargetAddressSpace(LangAS::opencl_generic));57285729llvm::Value *Queue = EmitScalarExpr(E->getArg(0));5730llvm::Value *Flags = EmitScalarExpr(E->getArg(1));5731LValue NDRangeL = EmitAggExprToLValue(E->getArg(2));5732llvm::Value *Range = NDRangeL.getAddress().emitRawPointer(*this);5733llvm::Type *RangeTy = NDRangeL.getAddress().getType();57345735if (NumArgs == 4) {5736// The most basic form of the call with parameters:5737// queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)5738Name = "__enqueue_kernel_basic";5739llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy,5740GenericVoidPtrTy};5741llvm::FunctionType *FTy = llvm::FunctionType::get(5742Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);57435744auto Info =5745CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));5746llvm::Value *Kernel =5747Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);5748llvm::Value *Block =5749Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);57505751AttrBuilder B(Builder.getContext());5752B.addByValAttr(NDRangeL.getAddress().getElementType());5753llvm::AttributeList ByValAttrSet =5754llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B);57555756auto RTCall =5757EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet),5758{Queue, Flags, Range, Kernel, Block});5759RTCall->setAttributes(ByValAttrSet);5760return RValue::get(RTCall);5761}5762assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");57635764// Create a temporary array to hold the sizes of local pointer arguments5765// for the block. \p First is the position of the first size argument.5766auto CreateArrayForSizeVar = [=](unsigned First)5767-> std::tuple<llvm::Value *, llvm::Value *, llvm::Value *> {5768llvm::APInt ArraySize(32, NumArgs - First);5769QualType SizeArrayTy = getContext().getConstantArrayType(5770getContext().getSizeType(), ArraySize, nullptr,5771ArraySizeModifier::Normal,5772/*IndexTypeQuals=*/0);5773auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes");5774llvm::Value *TmpPtr = Tmp.getPointer();5775llvm::Value *TmpSize = EmitLifetimeStart(5776CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), TmpPtr);5777llvm::Value *ElemPtr;5778// Each of the following arguments specifies the size of the corresponding5779// argument passed to the enqueued block.5780auto *Zero = llvm::ConstantInt::get(IntTy, 0);5781for (unsigned I = First; I < NumArgs; ++I) {5782auto *Index = llvm::ConstantInt::get(IntTy, I - First);5783auto *GEP = Builder.CreateGEP(Tmp.getElementType(), TmpPtr,5784{Zero, Index});5785if (I == First)5786ElemPtr = GEP;5787auto *V =5788Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy);5789Builder.CreateAlignedStore(5790V, GEP, CGM.getDataLayout().getPrefTypeAlign(SizeTy));5791}5792return std::tie(ElemPtr, TmpSize, TmpPtr);5793};57945795// Could have events and/or varargs.5796if (E->getArg(3)->getType()->isBlockPointerType()) {5797// No events passed, but has variadic arguments.5798Name = "__enqueue_kernel_varargs";5799auto Info =5800CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));5801llvm::Value *Kernel =5802Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);5803auto *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);5804llvm::Value *ElemPtr, *TmpSize, *TmpPtr;5805std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(4);58065807// Create a vector of the arguments, as well as a constant value to5808// express to the runtime the number of variadic arguments.5809llvm::Value *const Args[] = {Queue, Flags,5810Range, Kernel,5811Block, ConstantInt::get(IntTy, NumArgs - 4),5812ElemPtr};5813llvm::Type *const ArgTys[] = {5814QueueTy, IntTy, RangeTy, GenericVoidPtrTy,5815GenericVoidPtrTy, IntTy, ElemPtr->getType()};58165817llvm::FunctionType *FTy = llvm::FunctionType::get(Int32Ty, ArgTys, false);5818auto Call = RValue::get(5819EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Args));5820if (TmpSize)5821EmitLifetimeEnd(TmpSize, TmpPtr);5822return Call;5823}5824// Any calls now have event arguments passed.5825if (NumArgs >= 7) {5826llvm::PointerType *PtrTy = llvm::PointerType::get(5827CGM.getLLVMContext(),5828CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));58295830llvm::Value *NumEvents =5831Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty);58325833// Since SemaOpenCLBuiltinEnqueueKernel allows fifth and sixth arguments5834// to be a null pointer constant (including `0` literal), we can take it5835// into account and emit null pointer directly.5836llvm::Value *EventWaitList = nullptr;5837if (E->getArg(4)->isNullPointerConstant(5838getContext(), Expr::NPC_ValueDependentIsNotNull)) {5839EventWaitList = llvm::ConstantPointerNull::get(PtrTy);5840} else {5841EventWaitList =5842E->getArg(4)->getType()->isArrayType()5843? EmitArrayToPointerDecay(E->getArg(4)).emitRawPointer(*this)5844: EmitScalarExpr(E->getArg(4));5845// Convert to generic address space.5846EventWaitList = Builder.CreatePointerCast(EventWaitList, PtrTy);5847}5848llvm::Value *EventRet = nullptr;5849if (E->getArg(5)->isNullPointerConstant(5850getContext(), Expr::NPC_ValueDependentIsNotNull)) {5851EventRet = llvm::ConstantPointerNull::get(PtrTy);5852} else {5853EventRet =5854Builder.CreatePointerCast(EmitScalarExpr(E->getArg(5)), PtrTy);5855}58565857auto Info =5858CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(6));5859llvm::Value *Kernel =5860Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);5861llvm::Value *Block =5862Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);58635864std::vector<llvm::Type *> ArgTys = {5865QueueTy, Int32Ty, RangeTy, Int32Ty,5866PtrTy, PtrTy, GenericVoidPtrTy, GenericVoidPtrTy};58675868std::vector<llvm::Value *> Args = {Queue, Flags, Range,5869NumEvents, EventWaitList, EventRet,5870Kernel, Block};58715872if (NumArgs == 7) {5873// Has events but no variadics.5874Name = "__enqueue_kernel_basic_events";5875llvm::FunctionType *FTy = llvm::FunctionType::get(5876Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);5877return RValue::get(5878EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),5879llvm::ArrayRef<llvm::Value *>(Args)));5880}5881// Has event info and variadics5882// Pass the number of variadics to the runtime function too.5883Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7));5884ArgTys.push_back(Int32Ty);5885Name = "__enqueue_kernel_events_varargs";58865887llvm::Value *ElemPtr, *TmpSize, *TmpPtr;5888std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(7);5889Args.push_back(ElemPtr);5890ArgTys.push_back(ElemPtr->getType());58915892llvm::FunctionType *FTy = llvm::FunctionType::get(5893Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);5894auto Call =5895RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name),5896llvm::ArrayRef<llvm::Value *>(Args)));5897if (TmpSize)5898EmitLifetimeEnd(TmpSize, TmpPtr);5899return Call;5900}5901llvm_unreachable("Unexpected enqueue_kernel signature");5902}5903// OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block5904// parameter.5905case Builtin::BIget_kernel_work_group_size: {5906llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(5907getContext().getTargetAddressSpace(LangAS::opencl_generic));5908auto Info =5909CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));5910Value *Kernel =5911Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);5912Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);5913return RValue::get(EmitRuntimeCall(5914CGM.CreateRuntimeFunction(5915llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},5916false),5917"__get_kernel_work_group_size_impl"),5918{Kernel, Arg}));5919}5920case Builtin::BIget_kernel_preferred_work_group_size_multiple: {5921llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(5922getContext().getTargetAddressSpace(LangAS::opencl_generic));5923auto Info =5924CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));5925Value *Kernel =5926Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);5927Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);5928return RValue::get(EmitRuntimeCall(5929CGM.CreateRuntimeFunction(5930llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},5931false),5932"__get_kernel_preferred_work_group_size_multiple_impl"),5933{Kernel, Arg}));5934}5935case Builtin::BIget_kernel_max_sub_group_size_for_ndrange:5936case Builtin::BIget_kernel_sub_group_count_for_ndrange: {5937llvm::Type *GenericVoidPtrTy = Builder.getPtrTy(5938getContext().getTargetAddressSpace(LangAS::opencl_generic));5939LValue NDRangeL = EmitAggExprToLValue(E->getArg(0));5940llvm::Value *NDRange = NDRangeL.getAddress().emitRawPointer(*this);5941auto Info =5942CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(1));5943Value *Kernel =5944Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy);5945Value *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);5946const char *Name =5947BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange5948? "__get_kernel_max_sub_group_size_for_ndrange_impl"5949: "__get_kernel_sub_group_count_for_ndrange_impl";5950return RValue::get(EmitRuntimeCall(5951CGM.CreateRuntimeFunction(5952llvm::FunctionType::get(5953IntTy, {NDRange->getType(), GenericVoidPtrTy, GenericVoidPtrTy},5954false),5955Name),5956{NDRange, Kernel, Block}));5957}5958case Builtin::BI__builtin_store_half:5959case Builtin::BI__builtin_store_halff: {5960Value *Val = EmitScalarExpr(E->getArg(0));5961Address Address = EmitPointerWithAlignment(E->getArg(1));5962Value *HalfVal = Builder.CreateFPTrunc(Val, Builder.getHalfTy());5963Builder.CreateStore(HalfVal, Address);5964return RValue::get(nullptr);5965}5966case Builtin::BI__builtin_load_half: {5967Address Address = EmitPointerWithAlignment(E->getArg(0));5968Value *HalfVal = Builder.CreateLoad(Address);5969return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getDoubleTy()));5970}5971case Builtin::BI__builtin_load_halff: {5972Address Address = EmitPointerWithAlignment(E->getArg(0));5973Value *HalfVal = Builder.CreateLoad(Address);5974return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getFloatTy()));5975}5976case Builtin::BI__builtin_printf:5977case Builtin::BIprintf:5978if (getTarget().getTriple().isNVPTX() ||5979getTarget().getTriple().isAMDGCN() ||5980(getTarget().getTriple().isSPIRV() &&5981getTarget().getTriple().getVendor() == Triple::VendorType::AMD)) {5982if (getLangOpts().OpenMPIsTargetDevice)5983return EmitOpenMPDevicePrintfCallExpr(E);5984if (getTarget().getTriple().isNVPTX())5985return EmitNVPTXDevicePrintfCallExpr(E);5986if ((getTarget().getTriple().isAMDGCN() ||5987getTarget().getTriple().isSPIRV()) &&5988getLangOpts().HIP)5989return EmitAMDGPUDevicePrintfCallExpr(E);5990}59915992break;5993case Builtin::BI__builtin_canonicalize:5994case Builtin::BI__builtin_canonicalizef:5995case Builtin::BI__builtin_canonicalizef16:5996case Builtin::BI__builtin_canonicalizel:5997return RValue::get(5998emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::canonicalize));59996000case Builtin::BI__builtin_thread_pointer: {6001if (!getContext().getTargetInfo().isTLSSupported())6002CGM.ErrorUnsupported(E, "__builtin_thread_pointer");6003// Fall through - it's already mapped to the intrinsic by ClangBuiltin.6004break;6005}6006case Builtin::BI__builtin_os_log_format:6007return emitBuiltinOSLogFormat(*E);60086009case Builtin::BI__xray_customevent: {6010if (!ShouldXRayInstrumentFunction())6011return RValue::getIgnored();60126013if (!CGM.getCodeGenOpts().XRayInstrumentationBundle.has(6014XRayInstrKind::Custom))6015return RValue::getIgnored();60166017if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())6018if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayCustomEvents())6019return RValue::getIgnored();60206021Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent);6022auto FTy = F->getFunctionType();6023auto Arg0 = E->getArg(0);6024auto Arg0Val = EmitScalarExpr(Arg0);6025auto Arg0Ty = Arg0->getType();6026auto PTy0 = FTy->getParamType(0);6027if (PTy0 != Arg0Val->getType()) {6028if (Arg0Ty->isArrayType())6029Arg0Val = EmitArrayToPointerDecay(Arg0).emitRawPointer(*this);6030else6031Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0);6032}6033auto Arg1 = EmitScalarExpr(E->getArg(1));6034auto PTy1 = FTy->getParamType(1);6035if (PTy1 != Arg1->getType())6036Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1);6037return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1}));6038}60396040case Builtin::BI__xray_typedevent: {6041// TODO: There should be a way to always emit events even if the current6042// function is not instrumented. Losing events in a stream can cripple6043// a trace.6044if (!ShouldXRayInstrumentFunction())6045return RValue::getIgnored();60466047if (!CGM.getCodeGenOpts().XRayInstrumentationBundle.has(6048XRayInstrKind::Typed))6049return RValue::getIgnored();60506051if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())6052if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayTypedEvents())6053return RValue::getIgnored();60546055Function *F = CGM.getIntrinsic(Intrinsic::xray_typedevent);6056auto FTy = F->getFunctionType();6057auto Arg0 = EmitScalarExpr(E->getArg(0));6058auto PTy0 = FTy->getParamType(0);6059if (PTy0 != Arg0->getType())6060Arg0 = Builder.CreateTruncOrBitCast(Arg0, PTy0);6061auto Arg1 = E->getArg(1);6062auto Arg1Val = EmitScalarExpr(Arg1);6063auto Arg1Ty = Arg1->getType();6064auto PTy1 = FTy->getParamType(1);6065if (PTy1 != Arg1Val->getType()) {6066if (Arg1Ty->isArrayType())6067Arg1Val = EmitArrayToPointerDecay(Arg1).emitRawPointer(*this);6068else6069Arg1Val = Builder.CreatePointerCast(Arg1Val, PTy1);6070}6071auto Arg2 = EmitScalarExpr(E->getArg(2));6072auto PTy2 = FTy->getParamType(2);6073if (PTy2 != Arg2->getType())6074Arg2 = Builder.CreateTruncOrBitCast(Arg2, PTy2);6075return RValue::get(Builder.CreateCall(F, {Arg0, Arg1Val, Arg2}));6076}60776078case Builtin::BI__builtin_ms_va_start:6079case Builtin::BI__builtin_ms_va_end:6080return RValue::get(6081EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).emitRawPointer(*this),6082BuiltinID == Builtin::BI__builtin_ms_va_start));60836084case Builtin::BI__builtin_ms_va_copy: {6085// Lower this manually. We can't reliably determine whether or not any6086// given va_copy() is for a Win64 va_list from the calling convention6087// alone, because it's legal to do this from a System V ABI function.6088// With opaque pointer types, we won't have enough information in LLVM6089// IR to determine this from the argument types, either. Best to do it6090// now, while we have enough information.6091Address DestAddr = EmitMSVAListRef(E->getArg(0));6092Address SrcAddr = EmitMSVAListRef(E->getArg(1));60936094DestAddr = DestAddr.withElementType(Int8PtrTy);6095SrcAddr = SrcAddr.withElementType(Int8PtrTy);60966097Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");6098return RValue::get(Builder.CreateStore(ArgPtr, DestAddr));6099}61006101case Builtin::BI__builtin_get_device_side_mangled_name: {6102auto Name = CGM.getCUDARuntime().getDeviceSideName(6103cast<DeclRefExpr>(E->getArg(0)->IgnoreImpCasts())->getDecl());6104auto Str = CGM.GetAddrOfConstantCString(Name, "");6105return RValue::get(Str.getPointer());6106}6107}61086109// If this is an alias for a lib function (e.g. __builtin_sin), emit6110// the call using the normal call path, but using the unmangled6111// version of the function name.6112if (getContext().BuiltinInfo.isLibFunction(BuiltinID))6113return emitLibraryCall(*this, FD, E,6114CGM.getBuiltinLibFunction(FD, BuiltinID));61156116// If this is a predefined lib function (e.g. malloc), emit the call6117// using exactly the normal call path.6118if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))6119return emitLibraryCall(*this, FD, E, CGM.getRawFunctionPointer(FD));61206121// Check that a call to a target specific builtin has the correct target6122// features.6123// This is down here to avoid non-target specific builtins, however, if6124// generic builtins start to require generic target features then we6125// can move this up to the beginning of the function.6126checkTargetFeatures(E, FD);61276128if (unsigned VectorWidth = getContext().BuiltinInfo.getRequiredVectorWidth(BuiltinID))6129LargestVectorWidth = std::max(LargestVectorWidth, VectorWidth);61306131// See if we have a target specific intrinsic.6132StringRef Name = getContext().BuiltinInfo.getName(BuiltinID);6133Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;6134StringRef Prefix =6135llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch());6136if (!Prefix.empty()) {6137IntrinsicID = Intrinsic::getIntrinsicForClangBuiltin(Prefix.data(), Name);6138if (IntrinsicID == Intrinsic::not_intrinsic && Prefix == "spv" &&6139getTarget().getTriple().getOS() == llvm::Triple::OSType::AMDHSA)6140IntrinsicID = Intrinsic::getIntrinsicForClangBuiltin("amdgcn", Name);6141// NOTE we don't need to perform a compatibility flag check here since the6142// intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the6143// MS builtins via ALL_MS_LANGUAGES and are filtered earlier.6144if (IntrinsicID == Intrinsic::not_intrinsic)6145IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name);6146}61476148if (IntrinsicID != Intrinsic::not_intrinsic) {6149SmallVector<Value*, 16> Args;61506151// Find out if any arguments are required to be integer constant6152// expressions.6153unsigned ICEArguments = 0;6154ASTContext::GetBuiltinTypeError Error;6155getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);6156assert(Error == ASTContext::GE_None && "Should not codegen an error");61576158Function *F = CGM.getIntrinsic(IntrinsicID);6159llvm::FunctionType *FTy = F->getFunctionType();61606161for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {6162Value *ArgValue = EmitScalarOrConstFoldImmArg(ICEArguments, i, E);6163// If the intrinsic arg type is different from the builtin arg type6164// we need to do a bit cast.6165llvm::Type *PTy = FTy->getParamType(i);6166if (PTy != ArgValue->getType()) {6167// XXX - vector of pointers?6168if (auto *PtrTy = dyn_cast<llvm::PointerType>(PTy)) {6169if (PtrTy->getAddressSpace() !=6170ArgValue->getType()->getPointerAddressSpace()) {6171ArgValue = Builder.CreateAddrSpaceCast(6172ArgValue, llvm::PointerType::get(getLLVMContext(),6173PtrTy->getAddressSpace()));6174}6175}61766177// Cast vector type (e.g., v256i32) to x86_amx, this only happen6178// in amx intrinsics.6179if (PTy->isX86_AMXTy())6180ArgValue = Builder.CreateIntrinsic(Intrinsic::x86_cast_vector_to_tile,6181{ArgValue->getType()}, {ArgValue});6182else6183ArgValue = Builder.CreateBitCast(ArgValue, PTy);6184}61856186Args.push_back(ArgValue);6187}61886189Value *V = Builder.CreateCall(F, Args);6190QualType BuiltinRetType = E->getType();61916192llvm::Type *RetTy = VoidTy;6193if (!BuiltinRetType->isVoidType())6194RetTy = ConvertType(BuiltinRetType);61956196if (RetTy != V->getType()) {6197// XXX - vector of pointers?6198if (auto *PtrTy = dyn_cast<llvm::PointerType>(RetTy)) {6199if (PtrTy->getAddressSpace() != V->getType()->getPointerAddressSpace()) {6200V = Builder.CreateAddrSpaceCast(6201V, llvm::PointerType::get(getLLVMContext(),6202PtrTy->getAddressSpace()));6203}6204}62056206// Cast x86_amx to vector type (e.g., v256i32), this only happen6207// in amx intrinsics.6208if (V->getType()->isX86_AMXTy())6209V = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector, {RetTy},6210{V});6211else6212V = Builder.CreateBitCast(V, RetTy);6213}62146215if (RetTy->isVoidTy())6216return RValue::get(nullptr);62176218return RValue::get(V);6219}62206221// Some target-specific builtins can have aggregate return values, e.g.6222// __builtin_arm_mve_vld2q_u32. So if the result is an aggregate, force6223// ReturnValue to be non-null, so that the target-specific emission code can6224// always just emit into it.6225TypeEvaluationKind EvalKind = getEvaluationKind(E->getType());6226if (EvalKind == TEK_Aggregate && ReturnValue.isNull()) {6227Address DestPtr = CreateMemTemp(E->getType(), "agg.tmp");6228ReturnValue = ReturnValueSlot(DestPtr, false);6229}62306231// Now see if we can emit a target-specific builtin.6232if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E, ReturnValue)) {6233switch (EvalKind) {6234case TEK_Scalar:6235if (V->getType()->isVoidTy())6236return RValue::get(nullptr);6237return RValue::get(V);6238case TEK_Aggregate:6239return RValue::getAggregate(ReturnValue.getAddress(),6240ReturnValue.isVolatile());6241case TEK_Complex:6242llvm_unreachable("No current target builtin returns complex");6243}6244llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr");6245}62466247// EmitHLSLBuiltinExpr will check getLangOpts().HLSL6248if (Value *V = EmitHLSLBuiltinExpr(BuiltinID, E))6249return RValue::get(V);62506251if (getLangOpts().HIPStdPar && getLangOpts().CUDAIsDevice)6252return EmitHipStdParUnsupportedBuiltin(this, FD);62536254ErrorUnsupported(E, "builtin function");62556256// Unknown builtin, for now just dump it out and return undef.6257return GetUndefRValue(E->getType());6258}62596260static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF,6261unsigned BuiltinID, const CallExpr *E,6262ReturnValueSlot ReturnValue,6263llvm::Triple::ArchType Arch) {6264// When compiling in HipStdPar mode we have to be conservative in rejecting6265// target specific features in the FE, and defer the possible error to the6266// AcceleratorCodeSelection pass, wherein iff an unsupported target builtin is6267// referenced by an accelerator executable function, we emit an error.6268// Returning nullptr here leads to the builtin being handled in6269// EmitStdParUnsupportedBuiltin.6270if (CGF->getLangOpts().HIPStdPar && CGF->getLangOpts().CUDAIsDevice &&6271Arch != CGF->getTarget().getTriple().getArch())6272return nullptr;62736274switch (Arch) {6275case llvm::Triple::arm:6276case llvm::Triple::armeb:6277case llvm::Triple::thumb:6278case llvm::Triple::thumbeb:6279return CGF->EmitARMBuiltinExpr(BuiltinID, E, ReturnValue, Arch);6280case llvm::Triple::aarch64:6281case llvm::Triple::aarch64_32:6282case llvm::Triple::aarch64_be:6283return CGF->EmitAArch64BuiltinExpr(BuiltinID, E, Arch);6284case llvm::Triple::bpfeb:6285case llvm::Triple::bpfel:6286return CGF->EmitBPFBuiltinExpr(BuiltinID, E);6287case llvm::Triple::x86:6288case llvm::Triple::x86_64:6289return CGF->EmitX86BuiltinExpr(BuiltinID, E);6290case llvm::Triple::ppc:6291case llvm::Triple::ppcle:6292case llvm::Triple::ppc64:6293case llvm::Triple::ppc64le:6294return CGF->EmitPPCBuiltinExpr(BuiltinID, E);6295case llvm::Triple::r600:6296case llvm::Triple::amdgcn:6297return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);6298case llvm::Triple::systemz:6299return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);6300case llvm::Triple::nvptx:6301case llvm::Triple::nvptx64:6302return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);6303case llvm::Triple::wasm32:6304case llvm::Triple::wasm64:6305return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);6306case llvm::Triple::hexagon:6307return CGF->EmitHexagonBuiltinExpr(BuiltinID, E);6308case llvm::Triple::riscv32:6309case llvm::Triple::riscv64:6310return CGF->EmitRISCVBuiltinExpr(BuiltinID, E, ReturnValue);6311case llvm::Triple::spirv64:6312if (CGF->getTarget().getTriple().getOS() != llvm::Triple::OSType::AMDHSA)6313return nullptr;6314return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);6315default:6316return nullptr;6317}6318}63196320Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,6321const CallExpr *E,6322ReturnValueSlot ReturnValue) {6323if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {6324assert(getContext().getAuxTargetInfo() && "Missing aux target info");6325return EmitTargetArchBuiltinExpr(6326this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,6327ReturnValue, getContext().getAuxTargetInfo()->getTriple().getArch());6328}63296330return EmitTargetArchBuiltinExpr(this, BuiltinID, E, ReturnValue,6331getTarget().getTriple().getArch());6332}63336334static llvm::FixedVectorType *GetNeonType(CodeGenFunction *CGF,6335NeonTypeFlags TypeFlags,6336bool HasLegalHalfType = true,6337bool V1Ty = false,6338bool AllowBFloatArgsAndRet = true) {6339int IsQuad = TypeFlags.isQuad();6340switch (TypeFlags.getEltType()) {6341case NeonTypeFlags::Int8:6342case NeonTypeFlags::Poly8:6343return llvm::FixedVectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));6344case NeonTypeFlags::Int16:6345case NeonTypeFlags::Poly16:6346return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));6347case NeonTypeFlags::BFloat16:6348if (AllowBFloatArgsAndRet)6349return llvm::FixedVectorType::get(CGF->BFloatTy, V1Ty ? 1 : (4 << IsQuad));6350else6351return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));6352case NeonTypeFlags::Float16:6353if (HasLegalHalfType)6354return llvm::FixedVectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad));6355else6356return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));6357case NeonTypeFlags::Int32:6358return llvm::FixedVectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));6359case NeonTypeFlags::Int64:6360case NeonTypeFlags::Poly64:6361return llvm::FixedVectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));6362case NeonTypeFlags::Poly128:6363// FIXME: i128 and f128 doesn't get fully support in Clang and llvm.6364// There is a lot of i128 and f128 API missing.6365// so we use v16i8 to represent poly128 and get pattern matched.6366return llvm::FixedVectorType::get(CGF->Int8Ty, 16);6367case NeonTypeFlags::Float32:6368return llvm::FixedVectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));6369case NeonTypeFlags::Float64:6370return llvm::FixedVectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));6371}6372llvm_unreachable("Unknown vector element type!");6373}63746375static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,6376NeonTypeFlags IntTypeFlags) {6377int IsQuad = IntTypeFlags.isQuad();6378switch (IntTypeFlags.getEltType()) {6379case NeonTypeFlags::Int16:6380return llvm::FixedVectorType::get(CGF->HalfTy, (4 << IsQuad));6381case NeonTypeFlags::Int32:6382return llvm::FixedVectorType::get(CGF->FloatTy, (2 << IsQuad));6383case NeonTypeFlags::Int64:6384return llvm::FixedVectorType::get(CGF->DoubleTy, (1 << IsQuad));6385default:6386llvm_unreachable("Type can't be converted to floating-point!");6387}6388}63896390Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C,6391const ElementCount &Count) {6392Value *SV = llvm::ConstantVector::getSplat(Count, C);6393return Builder.CreateShuffleVector(V, V, SV, "lane");6394}63956396Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {6397ElementCount EC = cast<llvm::VectorType>(V->getType())->getElementCount();6398return EmitNeonSplat(V, C, EC);6399}64006401Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,6402const char *name,6403unsigned shift, bool rightshift) {6404unsigned j = 0;6405for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();6406ai != ae; ++ai, ++j) {6407if (F->isConstrainedFPIntrinsic())6408if (ai->getType()->isMetadataTy())6409continue;6410if (shift > 0 && shift == j)6411Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);6412else6413Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);6414}64156416if (F->isConstrainedFPIntrinsic())6417return Builder.CreateConstrainedFPCall(F, Ops, name);6418else6419return Builder.CreateCall(F, Ops, name);6420}64216422Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,6423bool neg) {6424int SV = cast<ConstantInt>(V)->getSExtValue();6425return ConstantInt::get(Ty, neg ? -SV : SV);6426}64276428// Right-shift a vector by a constant.6429Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift,6430llvm::Type *Ty, bool usgn,6431const char *name) {6432llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);64336434int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();6435int EltSize = VTy->getScalarSizeInBits();64366437Vec = Builder.CreateBitCast(Vec, Ty);64386439// lshr/ashr are undefined when the shift amount is equal to the vector6440// element size.6441if (ShiftAmt == EltSize) {6442if (usgn) {6443// Right-shifting an unsigned value by its size yields 0.6444return llvm::ConstantAggregateZero::get(VTy);6445} else {6446// Right-shifting a signed value by its size is equivalent6447// to a shift of size-1.6448--ShiftAmt;6449Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);6450}6451}64526453Shift = EmitNeonShiftVector(Shift, Ty, false);6454if (usgn)6455return Builder.CreateLShr(Vec, Shift, name);6456else6457return Builder.CreateAShr(Vec, Shift, name);6458}64596460enum {6461AddRetType = (1 << 0),6462Add1ArgType = (1 << 1),6463Add2ArgTypes = (1 << 2),64646465VectorizeRetType = (1 << 3),6466VectorizeArgTypes = (1 << 4),64676468InventFloatType = (1 << 5),6469UnsignedAlts = (1 << 6),64706471Use64BitVectors = (1 << 7),6472Use128BitVectors = (1 << 8),64736474Vectorize1ArgType = Add1ArgType | VectorizeArgTypes,6475VectorRet = AddRetType | VectorizeRetType,6476VectorRetGetArgs01 =6477AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes,6478FpCmpzModifiers =6479AddRetType | VectorizeRetType | Add1ArgType | InventFloatType6480};64816482namespace {6483struct ARMVectorIntrinsicInfo {6484const char *NameHint;6485unsigned BuiltinID;6486unsigned LLVMIntrinsic;6487unsigned AltLLVMIntrinsic;6488uint64_t TypeModifier;64896490bool operator<(unsigned RHSBuiltinID) const {6491return BuiltinID < RHSBuiltinID;6492}6493bool operator<(const ARMVectorIntrinsicInfo &TE) const {6494return BuiltinID < TE.BuiltinID;6495}6496};6497} // end anonymous namespace64986499#define NEONMAP0(NameBase) \6500{ #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }65016502#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \6503{ #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \6504Intrinsic::LLVMIntrinsic, 0, TypeModifier }65056506#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \6507{ #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \6508Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \6509TypeModifier }65106511static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {6512NEONMAP1(__a32_vcvt_bf16_f32, arm_neon_vcvtfp2bf, 0),6513NEONMAP0(splat_lane_v),6514NEONMAP0(splat_laneq_v),6515NEONMAP0(splatq_lane_v),6516NEONMAP0(splatq_laneq_v),6517NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),6518NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),6519NEONMAP1(vabs_v, arm_neon_vabs, 0),6520NEONMAP1(vabsq_v, arm_neon_vabs, 0),6521NEONMAP0(vadd_v),6522NEONMAP0(vaddhn_v),6523NEONMAP0(vaddq_v),6524NEONMAP1(vaesdq_u8, arm_neon_aesd, 0),6525NEONMAP1(vaeseq_u8, arm_neon_aese, 0),6526NEONMAP1(vaesimcq_u8, arm_neon_aesimc, 0),6527NEONMAP1(vaesmcq_u8, arm_neon_aesmc, 0),6528NEONMAP1(vbfdot_f32, arm_neon_bfdot, 0),6529NEONMAP1(vbfdotq_f32, arm_neon_bfdot, 0),6530NEONMAP1(vbfmlalbq_f32, arm_neon_bfmlalb, 0),6531NEONMAP1(vbfmlaltq_f32, arm_neon_bfmlalt, 0),6532NEONMAP1(vbfmmlaq_f32, arm_neon_bfmmla, 0),6533NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),6534NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),6535NEONMAP1(vcadd_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),6536NEONMAP1(vcadd_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),6537NEONMAP1(vcadd_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),6538NEONMAP1(vcadd_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),6539NEONMAP1(vcaddq_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType),6540NEONMAP1(vcaddq_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType),6541NEONMAP1(vcaddq_rot270_f64, arm_neon_vcadd_rot270, Add1ArgType),6542NEONMAP1(vcaddq_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType),6543NEONMAP1(vcaddq_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType),6544NEONMAP1(vcaddq_rot90_f64, arm_neon_vcadd_rot90, Add1ArgType),6545NEONMAP1(vcage_v, arm_neon_vacge, 0),6546NEONMAP1(vcageq_v, arm_neon_vacge, 0),6547NEONMAP1(vcagt_v, arm_neon_vacgt, 0),6548NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),6549NEONMAP1(vcale_v, arm_neon_vacge, 0),6550NEONMAP1(vcaleq_v, arm_neon_vacge, 0),6551NEONMAP1(vcalt_v, arm_neon_vacgt, 0),6552NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),6553NEONMAP0(vceqz_v),6554NEONMAP0(vceqzq_v),6555NEONMAP0(vcgez_v),6556NEONMAP0(vcgezq_v),6557NEONMAP0(vcgtz_v),6558NEONMAP0(vcgtzq_v),6559NEONMAP0(vclez_v),6560NEONMAP0(vclezq_v),6561NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),6562NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),6563NEONMAP0(vcltz_v),6564NEONMAP0(vcltzq_v),6565NEONMAP1(vclz_v, ctlz, Add1ArgType),6566NEONMAP1(vclzq_v, ctlz, Add1ArgType),6567NEONMAP1(vcnt_v, ctpop, Add1ArgType),6568NEONMAP1(vcntq_v, ctpop, Add1ArgType),6569NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),6570NEONMAP0(vcvt_f16_s16),6571NEONMAP0(vcvt_f16_u16),6572NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),6573NEONMAP0(vcvt_f32_v),6574NEONMAP1(vcvt_n_f16_s16, arm_neon_vcvtfxs2fp, 0),6575NEONMAP1(vcvt_n_f16_u16, arm_neon_vcvtfxu2fp, 0),6576NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),6577NEONMAP1(vcvt_n_s16_f16, arm_neon_vcvtfp2fxs, 0),6578NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),6579NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),6580NEONMAP1(vcvt_n_u16_f16, arm_neon_vcvtfp2fxu, 0),6581NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),6582NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),6583NEONMAP0(vcvt_s16_f16),6584NEONMAP0(vcvt_s32_v),6585NEONMAP0(vcvt_s64_v),6586NEONMAP0(vcvt_u16_f16),6587NEONMAP0(vcvt_u32_v),6588NEONMAP0(vcvt_u64_v),6589NEONMAP1(vcvta_s16_f16, arm_neon_vcvtas, 0),6590NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),6591NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),6592NEONMAP1(vcvta_u16_f16, arm_neon_vcvtau, 0),6593NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),6594NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),6595NEONMAP1(vcvtaq_s16_f16, arm_neon_vcvtas, 0),6596NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),6597NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),6598NEONMAP1(vcvtaq_u16_f16, arm_neon_vcvtau, 0),6599NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),6600NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),6601NEONMAP1(vcvth_bf16_f32, arm_neon_vcvtbfp2bf, 0),6602NEONMAP1(vcvtm_s16_f16, arm_neon_vcvtms, 0),6603NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),6604NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),6605NEONMAP1(vcvtm_u16_f16, arm_neon_vcvtmu, 0),6606NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),6607NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),6608NEONMAP1(vcvtmq_s16_f16, arm_neon_vcvtms, 0),6609NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),6610NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),6611NEONMAP1(vcvtmq_u16_f16, arm_neon_vcvtmu, 0),6612NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),6613NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),6614NEONMAP1(vcvtn_s16_f16, arm_neon_vcvtns, 0),6615NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),6616NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),6617NEONMAP1(vcvtn_u16_f16, arm_neon_vcvtnu, 0),6618NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),6619NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),6620NEONMAP1(vcvtnq_s16_f16, arm_neon_vcvtns, 0),6621NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),6622NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),6623NEONMAP1(vcvtnq_u16_f16, arm_neon_vcvtnu, 0),6624NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),6625NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),6626NEONMAP1(vcvtp_s16_f16, arm_neon_vcvtps, 0),6627NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),6628NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),6629NEONMAP1(vcvtp_u16_f16, arm_neon_vcvtpu, 0),6630NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),6631NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),6632NEONMAP1(vcvtpq_s16_f16, arm_neon_vcvtps, 0),6633NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),6634NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),6635NEONMAP1(vcvtpq_u16_f16, arm_neon_vcvtpu, 0),6636NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),6637NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),6638NEONMAP0(vcvtq_f16_s16),6639NEONMAP0(vcvtq_f16_u16),6640NEONMAP0(vcvtq_f32_v),6641NEONMAP1(vcvtq_n_f16_s16, arm_neon_vcvtfxs2fp, 0),6642NEONMAP1(vcvtq_n_f16_u16, arm_neon_vcvtfxu2fp, 0),6643NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),6644NEONMAP1(vcvtq_n_s16_f16, arm_neon_vcvtfp2fxs, 0),6645NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),6646NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),6647NEONMAP1(vcvtq_n_u16_f16, arm_neon_vcvtfp2fxu, 0),6648NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),6649NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),6650NEONMAP0(vcvtq_s16_f16),6651NEONMAP0(vcvtq_s32_v),6652NEONMAP0(vcvtq_s64_v),6653NEONMAP0(vcvtq_u16_f16),6654NEONMAP0(vcvtq_u32_v),6655NEONMAP0(vcvtq_u64_v),6656NEONMAP1(vdot_s32, arm_neon_sdot, 0),6657NEONMAP1(vdot_u32, arm_neon_udot, 0),6658NEONMAP1(vdotq_s32, arm_neon_sdot, 0),6659NEONMAP1(vdotq_u32, arm_neon_udot, 0),6660NEONMAP0(vext_v),6661NEONMAP0(vextq_v),6662NEONMAP0(vfma_v),6663NEONMAP0(vfmaq_v),6664NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),6665NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),6666NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),6667NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),6668NEONMAP0(vld1_dup_v),6669NEONMAP1(vld1_v, arm_neon_vld1, 0),6670NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0),6671NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0),6672NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0),6673NEONMAP0(vld1q_dup_v),6674NEONMAP1(vld1q_v, arm_neon_vld1, 0),6675NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0),6676NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0),6677NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0),6678NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0),6679NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),6680NEONMAP1(vld2_v, arm_neon_vld2, 0),6681NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0),6682NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),6683NEONMAP1(vld2q_v, arm_neon_vld2, 0),6684NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0),6685NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),6686NEONMAP1(vld3_v, arm_neon_vld3, 0),6687NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0),6688NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),6689NEONMAP1(vld3q_v, arm_neon_vld3, 0),6690NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0),6691NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),6692NEONMAP1(vld4_v, arm_neon_vld4, 0),6693NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0),6694NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),6695NEONMAP1(vld4q_v, arm_neon_vld4, 0),6696NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),6697NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),6698NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),6699NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),6700NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),6701NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),6702NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),6703NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),6704NEONMAP1(vmmlaq_s32, arm_neon_smmla, 0),6705NEONMAP1(vmmlaq_u32, arm_neon_ummla, 0),6706NEONMAP0(vmovl_v),6707NEONMAP0(vmovn_v),6708NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),6709NEONMAP0(vmull_v),6710NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),6711NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),6712NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),6713NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),6714NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),6715NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),6716NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),6717NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),6718NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),6719NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),6720NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),6721NEONMAP2(vqadd_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),6722NEONMAP2(vqaddq_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts),6723NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0),6724NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0),6725NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),6726NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),6727NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),6728NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),6729NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),6730NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),6731NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),6732NEONMAP1(vqrdmlah_s16, arm_neon_vqrdmlah, Add1ArgType),6733NEONMAP1(vqrdmlah_s32, arm_neon_vqrdmlah, Add1ArgType),6734NEONMAP1(vqrdmlahq_s16, arm_neon_vqrdmlah, Add1ArgType),6735NEONMAP1(vqrdmlahq_s32, arm_neon_vqrdmlah, Add1ArgType),6736NEONMAP1(vqrdmlsh_s16, arm_neon_vqrdmlsh, Add1ArgType),6737NEONMAP1(vqrdmlsh_s32, arm_neon_vqrdmlsh, Add1ArgType),6738NEONMAP1(vqrdmlshq_s16, arm_neon_vqrdmlsh, Add1ArgType),6739NEONMAP1(vqrdmlshq_s32, arm_neon_vqrdmlsh, Add1ArgType),6740NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),6741NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),6742NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),6743NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),6744NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),6745NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),6746NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),6747NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),6748NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),6749NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),6750NEONMAP2(vqsub_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),6751NEONMAP2(vqsubq_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts),6752NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),6753NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),6754NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),6755NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),6756NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),6757NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),6758NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),6759NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),6760NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),6761NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),6762NEONMAP0(vrndi_v),6763NEONMAP0(vrndiq_v),6764NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),6765NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),6766NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),6767NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),6768NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),6769NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),6770NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),6771NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),6772NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),6773NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),6774NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),6775NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),6776NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),6777NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),6778NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),6779NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),6780NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),6781NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),6782NEONMAP1(vsha1su0q_u32, arm_neon_sha1su0, 0),6783NEONMAP1(vsha1su1q_u32, arm_neon_sha1su1, 0),6784NEONMAP1(vsha256h2q_u32, arm_neon_sha256h2, 0),6785NEONMAP1(vsha256hq_u32, arm_neon_sha256h, 0),6786NEONMAP1(vsha256su0q_u32, arm_neon_sha256su0, 0),6787NEONMAP1(vsha256su1q_u32, arm_neon_sha256su1, 0),6788NEONMAP0(vshl_n_v),6789NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),6790NEONMAP0(vshll_n_v),6791NEONMAP0(vshlq_n_v),6792NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),6793NEONMAP0(vshr_n_v),6794NEONMAP0(vshrn_n_v),6795NEONMAP0(vshrq_n_v),6796NEONMAP1(vst1_v, arm_neon_vst1, 0),6797NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0),6798NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0),6799NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0),6800NEONMAP1(vst1q_v, arm_neon_vst1, 0),6801NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0),6802NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0),6803NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0),6804NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),6805NEONMAP1(vst2_v, arm_neon_vst2, 0),6806NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),6807NEONMAP1(vst2q_v, arm_neon_vst2, 0),6808NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),6809NEONMAP1(vst3_v, arm_neon_vst3, 0),6810NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),6811NEONMAP1(vst3q_v, arm_neon_vst3, 0),6812NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),6813NEONMAP1(vst4_v, arm_neon_vst4, 0),6814NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),6815NEONMAP1(vst4q_v, arm_neon_vst4, 0),6816NEONMAP0(vsubhn_v),6817NEONMAP0(vtrn_v),6818NEONMAP0(vtrnq_v),6819NEONMAP0(vtst_v),6820NEONMAP0(vtstq_v),6821NEONMAP1(vusdot_s32, arm_neon_usdot, 0),6822NEONMAP1(vusdotq_s32, arm_neon_usdot, 0),6823NEONMAP1(vusmmlaq_s32, arm_neon_usmmla, 0),6824NEONMAP0(vuzp_v),6825NEONMAP0(vuzpq_v),6826NEONMAP0(vzip_v),6827NEONMAP0(vzipq_v)6828};68296830static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {6831NEONMAP1(__a64_vcvtq_low_bf16_f32, aarch64_neon_bfcvtn, 0),6832NEONMAP0(splat_lane_v),6833NEONMAP0(splat_laneq_v),6834NEONMAP0(splatq_lane_v),6835NEONMAP0(splatq_laneq_v),6836NEONMAP1(vabs_v, aarch64_neon_abs, 0),6837NEONMAP1(vabsq_v, aarch64_neon_abs, 0),6838NEONMAP0(vadd_v),6839NEONMAP0(vaddhn_v),6840NEONMAP0(vaddq_p128),6841NEONMAP0(vaddq_v),6842NEONMAP1(vaesdq_u8, aarch64_crypto_aesd, 0),6843NEONMAP1(vaeseq_u8, aarch64_crypto_aese, 0),6844NEONMAP1(vaesimcq_u8, aarch64_crypto_aesimc, 0),6845NEONMAP1(vaesmcq_u8, aarch64_crypto_aesmc, 0),6846NEONMAP2(vbcaxq_s16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),6847NEONMAP2(vbcaxq_s32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),6848NEONMAP2(vbcaxq_s64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),6849NEONMAP2(vbcaxq_s8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),6850NEONMAP2(vbcaxq_u16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),6851NEONMAP2(vbcaxq_u32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),6852NEONMAP2(vbcaxq_u64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),6853NEONMAP2(vbcaxq_u8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts),6854NEONMAP1(vbfdot_f32, aarch64_neon_bfdot, 0),6855NEONMAP1(vbfdotq_f32, aarch64_neon_bfdot, 0),6856NEONMAP1(vbfmlalbq_f32, aarch64_neon_bfmlalb, 0),6857NEONMAP1(vbfmlaltq_f32, aarch64_neon_bfmlalt, 0),6858NEONMAP1(vbfmmlaq_f32, aarch64_neon_bfmmla, 0),6859NEONMAP1(vcadd_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),6860NEONMAP1(vcadd_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),6861NEONMAP1(vcadd_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),6862NEONMAP1(vcadd_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),6863NEONMAP1(vcaddq_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType),6864NEONMAP1(vcaddq_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType),6865NEONMAP1(vcaddq_rot270_f64, aarch64_neon_vcadd_rot270, Add1ArgType),6866NEONMAP1(vcaddq_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType),6867NEONMAP1(vcaddq_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType),6868NEONMAP1(vcaddq_rot90_f64, aarch64_neon_vcadd_rot90, Add1ArgType),6869NEONMAP1(vcage_v, aarch64_neon_facge, 0),6870NEONMAP1(vcageq_v, aarch64_neon_facge, 0),6871NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),6872NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),6873NEONMAP1(vcale_v, aarch64_neon_facge, 0),6874NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),6875NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),6876NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),6877NEONMAP0(vceqz_v),6878NEONMAP0(vceqzq_v),6879NEONMAP0(vcgez_v),6880NEONMAP0(vcgezq_v),6881NEONMAP0(vcgtz_v),6882NEONMAP0(vcgtzq_v),6883NEONMAP0(vclez_v),6884NEONMAP0(vclezq_v),6885NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),6886NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),6887NEONMAP0(vcltz_v),6888NEONMAP0(vcltzq_v),6889NEONMAP1(vclz_v, ctlz, Add1ArgType),6890NEONMAP1(vclzq_v, ctlz, Add1ArgType),6891NEONMAP1(vcmla_f16, aarch64_neon_vcmla_rot0, Add1ArgType),6892NEONMAP1(vcmla_f32, aarch64_neon_vcmla_rot0, Add1ArgType),6893NEONMAP1(vcmla_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),6894NEONMAP1(vcmla_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),6895NEONMAP1(vcmla_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),6896NEONMAP1(vcmla_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),6897NEONMAP1(vcmla_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),6898NEONMAP1(vcmla_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),6899NEONMAP1(vcmlaq_f16, aarch64_neon_vcmla_rot0, Add1ArgType),6900NEONMAP1(vcmlaq_f32, aarch64_neon_vcmla_rot0, Add1ArgType),6901NEONMAP1(vcmlaq_f64, aarch64_neon_vcmla_rot0, Add1ArgType),6902NEONMAP1(vcmlaq_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType),6903NEONMAP1(vcmlaq_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType),6904NEONMAP1(vcmlaq_rot180_f64, aarch64_neon_vcmla_rot180, Add1ArgType),6905NEONMAP1(vcmlaq_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType),6906NEONMAP1(vcmlaq_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType),6907NEONMAP1(vcmlaq_rot270_f64, aarch64_neon_vcmla_rot270, Add1ArgType),6908NEONMAP1(vcmlaq_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType),6909NEONMAP1(vcmlaq_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType),6910NEONMAP1(vcmlaq_rot90_f64, aarch64_neon_vcmla_rot90, Add1ArgType),6911NEONMAP1(vcnt_v, ctpop, Add1ArgType),6912NEONMAP1(vcntq_v, ctpop, Add1ArgType),6913NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),6914NEONMAP0(vcvt_f16_s16),6915NEONMAP0(vcvt_f16_u16),6916NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),6917NEONMAP0(vcvt_f32_v),6918NEONMAP1(vcvt_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),6919NEONMAP1(vcvt_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),6920NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),6921NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),6922NEONMAP1(vcvt_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),6923NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),6924NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),6925NEONMAP1(vcvt_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),6926NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),6927NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),6928NEONMAP0(vcvtq_f16_s16),6929NEONMAP0(vcvtq_f16_u16),6930NEONMAP0(vcvtq_f32_v),6931NEONMAP1(vcvtq_high_bf16_f32, aarch64_neon_bfcvtn2, 0),6932NEONMAP1(vcvtq_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0),6933NEONMAP1(vcvtq_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0),6934NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),6935NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),6936NEONMAP1(vcvtq_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0),6937NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),6938NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),6939NEONMAP1(vcvtq_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0),6940NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),6941NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),6942NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),6943NEONMAP1(vdot_s32, aarch64_neon_sdot, 0),6944NEONMAP1(vdot_u32, aarch64_neon_udot, 0),6945NEONMAP1(vdotq_s32, aarch64_neon_sdot, 0),6946NEONMAP1(vdotq_u32, aarch64_neon_udot, 0),6947NEONMAP2(veor3q_s16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),6948NEONMAP2(veor3q_s32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),6949NEONMAP2(veor3q_s64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),6950NEONMAP2(veor3q_s8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),6951NEONMAP2(veor3q_u16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),6952NEONMAP2(veor3q_u32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),6953NEONMAP2(veor3q_u64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),6954NEONMAP2(veor3q_u8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts),6955NEONMAP0(vext_v),6956NEONMAP0(vextq_v),6957NEONMAP0(vfma_v),6958NEONMAP0(vfmaq_v),6959NEONMAP1(vfmlal_high_f16, aarch64_neon_fmlal2, 0),6960NEONMAP1(vfmlal_low_f16, aarch64_neon_fmlal, 0),6961NEONMAP1(vfmlalq_high_f16, aarch64_neon_fmlal2, 0),6962NEONMAP1(vfmlalq_low_f16, aarch64_neon_fmlal, 0),6963NEONMAP1(vfmlsl_high_f16, aarch64_neon_fmlsl2, 0),6964NEONMAP1(vfmlsl_low_f16, aarch64_neon_fmlsl, 0),6965NEONMAP1(vfmlslq_high_f16, aarch64_neon_fmlsl2, 0),6966NEONMAP1(vfmlslq_low_f16, aarch64_neon_fmlsl, 0),6967NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),6968NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),6969NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),6970NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),6971NEONMAP1(vld1_x2_v, aarch64_neon_ld1x2, 0),6972NEONMAP1(vld1_x3_v, aarch64_neon_ld1x3, 0),6973NEONMAP1(vld1_x4_v, aarch64_neon_ld1x4, 0),6974NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0),6975NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0),6976NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0),6977NEONMAP1(vmmlaq_s32, aarch64_neon_smmla, 0),6978NEONMAP1(vmmlaq_u32, aarch64_neon_ummla, 0),6979NEONMAP0(vmovl_v),6980NEONMAP0(vmovn_v),6981NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),6982NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),6983NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),6984NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),6985NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),6986NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),6987NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),6988NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),6989NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),6990NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),6991NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),6992NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),6993NEONMAP1(vqdmulh_lane_v, aarch64_neon_sqdmulh_lane, 0),6994NEONMAP1(vqdmulh_laneq_v, aarch64_neon_sqdmulh_laneq, 0),6995NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),6996NEONMAP1(vqdmulhq_lane_v, aarch64_neon_sqdmulh_lane, 0),6997NEONMAP1(vqdmulhq_laneq_v, aarch64_neon_sqdmulh_laneq, 0),6998NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),6999NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),7000NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),7001NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),7002NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),7003NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),7004NEONMAP1(vqrdmlah_s16, aarch64_neon_sqrdmlah, Add1ArgType),7005NEONMAP1(vqrdmlah_s32, aarch64_neon_sqrdmlah, Add1ArgType),7006NEONMAP1(vqrdmlahq_s16, aarch64_neon_sqrdmlah, Add1ArgType),7007NEONMAP1(vqrdmlahq_s32, aarch64_neon_sqrdmlah, Add1ArgType),7008NEONMAP1(vqrdmlsh_s16, aarch64_neon_sqrdmlsh, Add1ArgType),7009NEONMAP1(vqrdmlsh_s32, aarch64_neon_sqrdmlsh, Add1ArgType),7010NEONMAP1(vqrdmlshq_s16, aarch64_neon_sqrdmlsh, Add1ArgType),7011NEONMAP1(vqrdmlshq_s32, aarch64_neon_sqrdmlsh, Add1ArgType),7012NEONMAP1(vqrdmulh_lane_v, aarch64_neon_sqrdmulh_lane, 0),7013NEONMAP1(vqrdmulh_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),7014NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),7015NEONMAP1(vqrdmulhq_lane_v, aarch64_neon_sqrdmulh_lane, 0),7016NEONMAP1(vqrdmulhq_laneq_v, aarch64_neon_sqrdmulh_laneq, 0),7017NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),7018NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),7019NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),7020NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),7021NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),7022NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),7023NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),7024NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),7025NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),7026NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),7027NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),7028NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),7029NEONMAP1(vrax1q_u64, aarch64_crypto_rax1, 0),7030NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),7031NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),7032NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),7033NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),7034NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),7035NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),7036NEONMAP1(vrnd32x_f32, aarch64_neon_frint32x, Add1ArgType),7037NEONMAP1(vrnd32x_f64, aarch64_neon_frint32x, Add1ArgType),7038NEONMAP1(vrnd32xq_f32, aarch64_neon_frint32x, Add1ArgType),7039NEONMAP1(vrnd32xq_f64, aarch64_neon_frint32x, Add1ArgType),7040NEONMAP1(vrnd32z_f32, aarch64_neon_frint32z, Add1ArgType),7041NEONMAP1(vrnd32z_f64, aarch64_neon_frint32z, Add1ArgType),7042NEONMAP1(vrnd32zq_f32, aarch64_neon_frint32z, Add1ArgType),7043NEONMAP1(vrnd32zq_f64, aarch64_neon_frint32z, Add1ArgType),7044NEONMAP1(vrnd64x_f32, aarch64_neon_frint64x, Add1ArgType),7045NEONMAP1(vrnd64x_f64, aarch64_neon_frint64x, Add1ArgType),7046NEONMAP1(vrnd64xq_f32, aarch64_neon_frint64x, Add1ArgType),7047NEONMAP1(vrnd64xq_f64, aarch64_neon_frint64x, Add1ArgType),7048NEONMAP1(vrnd64z_f32, aarch64_neon_frint64z, Add1ArgType),7049NEONMAP1(vrnd64z_f64, aarch64_neon_frint64z, Add1ArgType),7050NEONMAP1(vrnd64zq_f32, aarch64_neon_frint64z, Add1ArgType),7051NEONMAP1(vrnd64zq_f64, aarch64_neon_frint64z, Add1ArgType),7052NEONMAP0(vrndi_v),7053NEONMAP0(vrndiq_v),7054NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),7055NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),7056NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),7057NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),7058NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),7059NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),7060NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),7061NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),7062NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),7063NEONMAP1(vsha1su0q_u32, aarch64_crypto_sha1su0, 0),7064NEONMAP1(vsha1su1q_u32, aarch64_crypto_sha1su1, 0),7065NEONMAP1(vsha256h2q_u32, aarch64_crypto_sha256h2, 0),7066NEONMAP1(vsha256hq_u32, aarch64_crypto_sha256h, 0),7067NEONMAP1(vsha256su0q_u32, aarch64_crypto_sha256su0, 0),7068NEONMAP1(vsha256su1q_u32, aarch64_crypto_sha256su1, 0),7069NEONMAP1(vsha512h2q_u64, aarch64_crypto_sha512h2, 0),7070NEONMAP1(vsha512hq_u64, aarch64_crypto_sha512h, 0),7071NEONMAP1(vsha512su0q_u64, aarch64_crypto_sha512su0, 0),7072NEONMAP1(vsha512su1q_u64, aarch64_crypto_sha512su1, 0),7073NEONMAP0(vshl_n_v),7074NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),7075NEONMAP0(vshll_n_v),7076NEONMAP0(vshlq_n_v),7077NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),7078NEONMAP0(vshr_n_v),7079NEONMAP0(vshrn_n_v),7080NEONMAP0(vshrq_n_v),7081NEONMAP1(vsm3partw1q_u32, aarch64_crypto_sm3partw1, 0),7082NEONMAP1(vsm3partw2q_u32, aarch64_crypto_sm3partw2, 0),7083NEONMAP1(vsm3ss1q_u32, aarch64_crypto_sm3ss1, 0),7084NEONMAP1(vsm3tt1aq_u32, aarch64_crypto_sm3tt1a, 0),7085NEONMAP1(vsm3tt1bq_u32, aarch64_crypto_sm3tt1b, 0),7086NEONMAP1(vsm3tt2aq_u32, aarch64_crypto_sm3tt2a, 0),7087NEONMAP1(vsm3tt2bq_u32, aarch64_crypto_sm3tt2b, 0),7088NEONMAP1(vsm4ekeyq_u32, aarch64_crypto_sm4ekey, 0),7089NEONMAP1(vsm4eq_u32, aarch64_crypto_sm4e, 0),7090NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0),7091NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0),7092NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0),7093NEONMAP1(vst1q_x2_v, aarch64_neon_st1x2, 0),7094NEONMAP1(vst1q_x3_v, aarch64_neon_st1x3, 0),7095NEONMAP1(vst1q_x4_v, aarch64_neon_st1x4, 0),7096NEONMAP0(vsubhn_v),7097NEONMAP0(vtst_v),7098NEONMAP0(vtstq_v),7099NEONMAP1(vusdot_s32, aarch64_neon_usdot, 0),7100NEONMAP1(vusdotq_s32, aarch64_neon_usdot, 0),7101NEONMAP1(vusmmlaq_s32, aarch64_neon_usmmla, 0),7102NEONMAP1(vxarq_u64, aarch64_crypto_xar, 0),7103};71047105static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {7106NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),7107NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),7108NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),7109NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),7110NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),7111NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),7112NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),7113NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),7114NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),7115NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),7116NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),7117NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),7118NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),7119NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),7120NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),7121NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),7122NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),7123NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),7124NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),7125NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),7126NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),7127NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),7128NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),7129NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),7130NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),7131NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),7132NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),7133NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),7134NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),7135NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),7136NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),7137NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),7138NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),7139NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),7140NEONMAP1(vcvth_bf16_f32, aarch64_neon_bfcvt, 0),7141NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),7142NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),7143NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),7144NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),7145NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),7146NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),7147NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),7148NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),7149NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),7150NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),7151NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),7152NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),7153NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),7154NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),7155NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),7156NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),7157NEONMAP1(vcvts_s32_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),7158NEONMAP1(vcvts_u32_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),7159NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),7160NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),7161NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),7162NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),7163NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),7164NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),7165NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),7166NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),7167NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),7168NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),7169NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),7170NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),7171NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),7172NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),7173NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),7174NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),7175NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),7176NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),7177NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),7178NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),7179NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),7180NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),7181NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),7182NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),7183NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),7184NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),7185NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),7186NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),7187NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),7188NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),7189NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),7190NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),7191NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),7192NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),7193NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),7194NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),7195NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),7196NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),7197NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),7198NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),7199NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),7200NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),7201NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),7202NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),7203NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),7204NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),7205NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),7206NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),7207NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),7208NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),7209NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),7210NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),7211NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),7212NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),7213NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),7214NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),7215NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),7216NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),7217NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),7218NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),7219NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),7220NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),7221NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),7222NEONMAP1(vqrdmlahh_s16, aarch64_neon_sqrdmlah, Vectorize1ArgType | Use64BitVectors),7223NEONMAP1(vqrdmlahs_s32, aarch64_neon_sqrdmlah, Add1ArgType),7224NEONMAP1(vqrdmlshh_s16, aarch64_neon_sqrdmlsh, Vectorize1ArgType | Use64BitVectors),7225NEONMAP1(vqrdmlshs_s32, aarch64_neon_sqrdmlsh, Add1ArgType),7226NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),7227NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),7228NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),7229NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),7230NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),7231NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),7232NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),7233NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),7234NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),7235NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),7236NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),7237NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),7238NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),7239NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),7240NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),7241NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),7242NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),7243NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),7244NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),7245NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),7246NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),7247NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),7248NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),7249NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),7250NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),7251NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),7252NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),7253NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),7254NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),7255NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),7256NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),7257NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),7258NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),7259NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),7260NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),7261NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),7262NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),7263NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),7264NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),7265NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),7266NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),7267NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),7268NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),7269NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),7270NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),7271NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),7272NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),7273NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),7274NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),7275NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),7276NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),7277NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),7278NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),7279NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),7280NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),7281NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),7282NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),7283NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),7284NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),7285NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),7286NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),7287NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),7288NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),7289NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),7290NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),7291NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),7292NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),7293NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),7294NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),7295NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),7296NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),7297NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),7298NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),7299NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),7300NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),7301NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),7302NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),7303NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),7304NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),7305NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),7306NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),7307// FP16 scalar intrinisics go here.7308NEONMAP1(vabdh_f16, aarch64_sisd_fabd, Add1ArgType),7309NEONMAP1(vcvtah_s32_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),7310NEONMAP1(vcvtah_s64_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),7311NEONMAP1(vcvtah_u32_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),7312NEONMAP1(vcvtah_u64_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),7313NEONMAP1(vcvth_n_f16_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),7314NEONMAP1(vcvth_n_f16_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),7315NEONMAP1(vcvth_n_f16_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),7316NEONMAP1(vcvth_n_f16_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),7317NEONMAP1(vcvth_n_s32_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),7318NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),7319NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),7320NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),7321NEONMAP1(vcvth_s32_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),7322NEONMAP1(vcvth_s64_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),7323NEONMAP1(vcvth_u32_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),7324NEONMAP1(vcvth_u64_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),7325NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),7326NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),7327NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),7328NEONMAP1(vcvtmh_u64_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),7329NEONMAP1(vcvtnh_s32_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),7330NEONMAP1(vcvtnh_s64_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),7331NEONMAP1(vcvtnh_u32_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),7332NEONMAP1(vcvtnh_u64_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),7333NEONMAP1(vcvtph_s32_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),7334NEONMAP1(vcvtph_s64_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),7335NEONMAP1(vcvtph_u32_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),7336NEONMAP1(vcvtph_u64_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),7337NEONMAP1(vmulxh_f16, aarch64_neon_fmulx, Add1ArgType),7338NEONMAP1(vrecpeh_f16, aarch64_neon_frecpe, Add1ArgType),7339NEONMAP1(vrecpxh_f16, aarch64_neon_frecpx, Add1ArgType),7340NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType),7341NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType),7342};73437344// Some intrinsics are equivalent for codegen.7345static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = {7346{ NEON::BI__builtin_neon_splat_lane_bf16, NEON::BI__builtin_neon_splat_lane_v, },7347{ NEON::BI__builtin_neon_splat_laneq_bf16, NEON::BI__builtin_neon_splat_laneq_v, },7348{ NEON::BI__builtin_neon_splatq_lane_bf16, NEON::BI__builtin_neon_splatq_lane_v, },7349{ NEON::BI__builtin_neon_splatq_laneq_bf16, NEON::BI__builtin_neon_splatq_laneq_v, },7350{ NEON::BI__builtin_neon_vabd_f16, NEON::BI__builtin_neon_vabd_v, },7351{ NEON::BI__builtin_neon_vabdq_f16, NEON::BI__builtin_neon_vabdq_v, },7352{ NEON::BI__builtin_neon_vabs_f16, NEON::BI__builtin_neon_vabs_v, },7353{ NEON::BI__builtin_neon_vabsq_f16, NEON::BI__builtin_neon_vabsq_v, },7354{ NEON::BI__builtin_neon_vcage_f16, NEON::BI__builtin_neon_vcage_v, },7355{ NEON::BI__builtin_neon_vcageq_f16, NEON::BI__builtin_neon_vcageq_v, },7356{ NEON::BI__builtin_neon_vcagt_f16, NEON::BI__builtin_neon_vcagt_v, },7357{ NEON::BI__builtin_neon_vcagtq_f16, NEON::BI__builtin_neon_vcagtq_v, },7358{ NEON::BI__builtin_neon_vcale_f16, NEON::BI__builtin_neon_vcale_v, },7359{ NEON::BI__builtin_neon_vcaleq_f16, NEON::BI__builtin_neon_vcaleq_v, },7360{ NEON::BI__builtin_neon_vcalt_f16, NEON::BI__builtin_neon_vcalt_v, },7361{ NEON::BI__builtin_neon_vcaltq_f16, NEON::BI__builtin_neon_vcaltq_v, },7362{ NEON::BI__builtin_neon_vceqz_f16, NEON::BI__builtin_neon_vceqz_v, },7363{ NEON::BI__builtin_neon_vceqzq_f16, NEON::BI__builtin_neon_vceqzq_v, },7364{ NEON::BI__builtin_neon_vcgez_f16, NEON::BI__builtin_neon_vcgez_v, },7365{ NEON::BI__builtin_neon_vcgezq_f16, NEON::BI__builtin_neon_vcgezq_v, },7366{ NEON::BI__builtin_neon_vcgtz_f16, NEON::BI__builtin_neon_vcgtz_v, },7367{ NEON::BI__builtin_neon_vcgtzq_f16, NEON::BI__builtin_neon_vcgtzq_v, },7368{ NEON::BI__builtin_neon_vclez_f16, NEON::BI__builtin_neon_vclez_v, },7369{ NEON::BI__builtin_neon_vclezq_f16, NEON::BI__builtin_neon_vclezq_v, },7370{ NEON::BI__builtin_neon_vcltz_f16, NEON::BI__builtin_neon_vcltz_v, },7371{ NEON::BI__builtin_neon_vcltzq_f16, NEON::BI__builtin_neon_vcltzq_v, },7372{ NEON::BI__builtin_neon_vfma_f16, NEON::BI__builtin_neon_vfma_v, },7373{ NEON::BI__builtin_neon_vfma_lane_f16, NEON::BI__builtin_neon_vfma_lane_v, },7374{ NEON::BI__builtin_neon_vfma_laneq_f16, NEON::BI__builtin_neon_vfma_laneq_v, },7375{ NEON::BI__builtin_neon_vfmaq_f16, NEON::BI__builtin_neon_vfmaq_v, },7376{ NEON::BI__builtin_neon_vfmaq_lane_f16, NEON::BI__builtin_neon_vfmaq_lane_v, },7377{ NEON::BI__builtin_neon_vfmaq_laneq_f16, NEON::BI__builtin_neon_vfmaq_laneq_v, },7378{ NEON::BI__builtin_neon_vld1_bf16_x2, NEON::BI__builtin_neon_vld1_x2_v },7379{ NEON::BI__builtin_neon_vld1_bf16_x3, NEON::BI__builtin_neon_vld1_x3_v },7380{ NEON::BI__builtin_neon_vld1_bf16_x4, NEON::BI__builtin_neon_vld1_x4_v },7381{ NEON::BI__builtin_neon_vld1_bf16, NEON::BI__builtin_neon_vld1_v },7382{ NEON::BI__builtin_neon_vld1_dup_bf16, NEON::BI__builtin_neon_vld1_dup_v },7383{ NEON::BI__builtin_neon_vld1_lane_bf16, NEON::BI__builtin_neon_vld1_lane_v },7384{ NEON::BI__builtin_neon_vld1q_bf16_x2, NEON::BI__builtin_neon_vld1q_x2_v },7385{ NEON::BI__builtin_neon_vld1q_bf16_x3, NEON::BI__builtin_neon_vld1q_x3_v },7386{ NEON::BI__builtin_neon_vld1q_bf16_x4, NEON::BI__builtin_neon_vld1q_x4_v },7387{ NEON::BI__builtin_neon_vld1q_bf16, NEON::BI__builtin_neon_vld1q_v },7388{ NEON::BI__builtin_neon_vld1q_dup_bf16, NEON::BI__builtin_neon_vld1q_dup_v },7389{ NEON::BI__builtin_neon_vld1q_lane_bf16, NEON::BI__builtin_neon_vld1q_lane_v },7390{ NEON::BI__builtin_neon_vld2_bf16, NEON::BI__builtin_neon_vld2_v },7391{ NEON::BI__builtin_neon_vld2_dup_bf16, NEON::BI__builtin_neon_vld2_dup_v },7392{ NEON::BI__builtin_neon_vld2_lane_bf16, NEON::BI__builtin_neon_vld2_lane_v },7393{ NEON::BI__builtin_neon_vld2q_bf16, NEON::BI__builtin_neon_vld2q_v },7394{ NEON::BI__builtin_neon_vld2q_dup_bf16, NEON::BI__builtin_neon_vld2q_dup_v },7395{ NEON::BI__builtin_neon_vld2q_lane_bf16, NEON::BI__builtin_neon_vld2q_lane_v },7396{ NEON::BI__builtin_neon_vld3_bf16, NEON::BI__builtin_neon_vld3_v },7397{ NEON::BI__builtin_neon_vld3_dup_bf16, NEON::BI__builtin_neon_vld3_dup_v },7398{ NEON::BI__builtin_neon_vld3_lane_bf16, NEON::BI__builtin_neon_vld3_lane_v },7399{ NEON::BI__builtin_neon_vld3q_bf16, NEON::BI__builtin_neon_vld3q_v },7400{ NEON::BI__builtin_neon_vld3q_dup_bf16, NEON::BI__builtin_neon_vld3q_dup_v },7401{ NEON::BI__builtin_neon_vld3q_lane_bf16, NEON::BI__builtin_neon_vld3q_lane_v },7402{ NEON::BI__builtin_neon_vld4_bf16, NEON::BI__builtin_neon_vld4_v },7403{ NEON::BI__builtin_neon_vld4_dup_bf16, NEON::BI__builtin_neon_vld4_dup_v },7404{ NEON::BI__builtin_neon_vld4_lane_bf16, NEON::BI__builtin_neon_vld4_lane_v },7405{ NEON::BI__builtin_neon_vld4q_bf16, NEON::BI__builtin_neon_vld4q_v },7406{ NEON::BI__builtin_neon_vld4q_dup_bf16, NEON::BI__builtin_neon_vld4q_dup_v },7407{ NEON::BI__builtin_neon_vld4q_lane_bf16, NEON::BI__builtin_neon_vld4q_lane_v },7408{ NEON::BI__builtin_neon_vmax_f16, NEON::BI__builtin_neon_vmax_v, },7409{ NEON::BI__builtin_neon_vmaxnm_f16, NEON::BI__builtin_neon_vmaxnm_v, },7410{ NEON::BI__builtin_neon_vmaxnmq_f16, NEON::BI__builtin_neon_vmaxnmq_v, },7411{ NEON::BI__builtin_neon_vmaxq_f16, NEON::BI__builtin_neon_vmaxq_v, },7412{ NEON::BI__builtin_neon_vmin_f16, NEON::BI__builtin_neon_vmin_v, },7413{ NEON::BI__builtin_neon_vminnm_f16, NEON::BI__builtin_neon_vminnm_v, },7414{ NEON::BI__builtin_neon_vminnmq_f16, NEON::BI__builtin_neon_vminnmq_v, },7415{ NEON::BI__builtin_neon_vminq_f16, NEON::BI__builtin_neon_vminq_v, },7416{ NEON::BI__builtin_neon_vmulx_f16, NEON::BI__builtin_neon_vmulx_v, },7417{ NEON::BI__builtin_neon_vmulxq_f16, NEON::BI__builtin_neon_vmulxq_v, },7418{ NEON::BI__builtin_neon_vpadd_f16, NEON::BI__builtin_neon_vpadd_v, },7419{ NEON::BI__builtin_neon_vpaddq_f16, NEON::BI__builtin_neon_vpaddq_v, },7420{ NEON::BI__builtin_neon_vpmax_f16, NEON::BI__builtin_neon_vpmax_v, },7421{ NEON::BI__builtin_neon_vpmaxnm_f16, NEON::BI__builtin_neon_vpmaxnm_v, },7422{ NEON::BI__builtin_neon_vpmaxnmq_f16, NEON::BI__builtin_neon_vpmaxnmq_v, },7423{ NEON::BI__builtin_neon_vpmaxq_f16, NEON::BI__builtin_neon_vpmaxq_v, },7424{ NEON::BI__builtin_neon_vpmin_f16, NEON::BI__builtin_neon_vpmin_v, },7425{ NEON::BI__builtin_neon_vpminnm_f16, NEON::BI__builtin_neon_vpminnm_v, },7426{ NEON::BI__builtin_neon_vpminnmq_f16, NEON::BI__builtin_neon_vpminnmq_v, },7427{ NEON::BI__builtin_neon_vpminq_f16, NEON::BI__builtin_neon_vpminq_v, },7428{ NEON::BI__builtin_neon_vrecpe_f16, NEON::BI__builtin_neon_vrecpe_v, },7429{ NEON::BI__builtin_neon_vrecpeq_f16, NEON::BI__builtin_neon_vrecpeq_v, },7430{ NEON::BI__builtin_neon_vrecps_f16, NEON::BI__builtin_neon_vrecps_v, },7431{ NEON::BI__builtin_neon_vrecpsq_f16, NEON::BI__builtin_neon_vrecpsq_v, },7432{ NEON::BI__builtin_neon_vrnd_f16, NEON::BI__builtin_neon_vrnd_v, },7433{ NEON::BI__builtin_neon_vrnda_f16, NEON::BI__builtin_neon_vrnda_v, },7434{ NEON::BI__builtin_neon_vrndaq_f16, NEON::BI__builtin_neon_vrndaq_v, },7435{ NEON::BI__builtin_neon_vrndi_f16, NEON::BI__builtin_neon_vrndi_v, },7436{ NEON::BI__builtin_neon_vrndiq_f16, NEON::BI__builtin_neon_vrndiq_v, },7437{ NEON::BI__builtin_neon_vrndm_f16, NEON::BI__builtin_neon_vrndm_v, },7438{ NEON::BI__builtin_neon_vrndmq_f16, NEON::BI__builtin_neon_vrndmq_v, },7439{ NEON::BI__builtin_neon_vrndn_f16, NEON::BI__builtin_neon_vrndn_v, },7440{ NEON::BI__builtin_neon_vrndnq_f16, NEON::BI__builtin_neon_vrndnq_v, },7441{ NEON::BI__builtin_neon_vrndp_f16, NEON::BI__builtin_neon_vrndp_v, },7442{ NEON::BI__builtin_neon_vrndpq_f16, NEON::BI__builtin_neon_vrndpq_v, },7443{ NEON::BI__builtin_neon_vrndq_f16, NEON::BI__builtin_neon_vrndq_v, },7444{ NEON::BI__builtin_neon_vrndx_f16, NEON::BI__builtin_neon_vrndx_v, },7445{ NEON::BI__builtin_neon_vrndxq_f16, NEON::BI__builtin_neon_vrndxq_v, },7446{ NEON::BI__builtin_neon_vrsqrte_f16, NEON::BI__builtin_neon_vrsqrte_v, },7447{ NEON::BI__builtin_neon_vrsqrteq_f16, NEON::BI__builtin_neon_vrsqrteq_v, },7448{ NEON::BI__builtin_neon_vrsqrts_f16, NEON::BI__builtin_neon_vrsqrts_v, },7449{ NEON::BI__builtin_neon_vrsqrtsq_f16, NEON::BI__builtin_neon_vrsqrtsq_v, },7450{ NEON::BI__builtin_neon_vsqrt_f16, NEON::BI__builtin_neon_vsqrt_v, },7451{ NEON::BI__builtin_neon_vsqrtq_f16, NEON::BI__builtin_neon_vsqrtq_v, },7452{ NEON::BI__builtin_neon_vst1_bf16_x2, NEON::BI__builtin_neon_vst1_x2_v },7453{ NEON::BI__builtin_neon_vst1_bf16_x3, NEON::BI__builtin_neon_vst1_x3_v },7454{ NEON::BI__builtin_neon_vst1_bf16_x4, NEON::BI__builtin_neon_vst1_x4_v },7455{ NEON::BI__builtin_neon_vst1_bf16, NEON::BI__builtin_neon_vst1_v },7456{ NEON::BI__builtin_neon_vst1_lane_bf16, NEON::BI__builtin_neon_vst1_lane_v },7457{ NEON::BI__builtin_neon_vst1q_bf16_x2, NEON::BI__builtin_neon_vst1q_x2_v },7458{ NEON::BI__builtin_neon_vst1q_bf16_x3, NEON::BI__builtin_neon_vst1q_x3_v },7459{ NEON::BI__builtin_neon_vst1q_bf16_x4, NEON::BI__builtin_neon_vst1q_x4_v },7460{ NEON::BI__builtin_neon_vst1q_bf16, NEON::BI__builtin_neon_vst1q_v },7461{ NEON::BI__builtin_neon_vst1q_lane_bf16, NEON::BI__builtin_neon_vst1q_lane_v },7462{ NEON::BI__builtin_neon_vst2_bf16, NEON::BI__builtin_neon_vst2_v },7463{ NEON::BI__builtin_neon_vst2_lane_bf16, NEON::BI__builtin_neon_vst2_lane_v },7464{ NEON::BI__builtin_neon_vst2q_bf16, NEON::BI__builtin_neon_vst2q_v },7465{ NEON::BI__builtin_neon_vst2q_lane_bf16, NEON::BI__builtin_neon_vst2q_lane_v },7466{ NEON::BI__builtin_neon_vst3_bf16, NEON::BI__builtin_neon_vst3_v },7467{ NEON::BI__builtin_neon_vst3_lane_bf16, NEON::BI__builtin_neon_vst3_lane_v },7468{ NEON::BI__builtin_neon_vst3q_bf16, NEON::BI__builtin_neon_vst3q_v },7469{ NEON::BI__builtin_neon_vst3q_lane_bf16, NEON::BI__builtin_neon_vst3q_lane_v },7470{ NEON::BI__builtin_neon_vst4_bf16, NEON::BI__builtin_neon_vst4_v },7471{ NEON::BI__builtin_neon_vst4_lane_bf16, NEON::BI__builtin_neon_vst4_lane_v },7472{ NEON::BI__builtin_neon_vst4q_bf16, NEON::BI__builtin_neon_vst4q_v },7473{ NEON::BI__builtin_neon_vst4q_lane_bf16, NEON::BI__builtin_neon_vst4q_lane_v },7474// The mangling rules cause us to have one ID for each type for vldap1(q)_lane7475// and vstl1(q)_lane, but codegen is equivalent for all of them. Choose an7476// arbitrary one to be handled as tha canonical variation.7477{ NEON::BI__builtin_neon_vldap1_lane_u64, NEON::BI__builtin_neon_vldap1_lane_s64 },7478{ NEON::BI__builtin_neon_vldap1_lane_f64, NEON::BI__builtin_neon_vldap1_lane_s64 },7479{ NEON::BI__builtin_neon_vldap1_lane_p64, NEON::BI__builtin_neon_vldap1_lane_s64 },7480{ NEON::BI__builtin_neon_vldap1q_lane_u64, NEON::BI__builtin_neon_vldap1q_lane_s64 },7481{ NEON::BI__builtin_neon_vldap1q_lane_f64, NEON::BI__builtin_neon_vldap1q_lane_s64 },7482{ NEON::BI__builtin_neon_vldap1q_lane_p64, NEON::BI__builtin_neon_vldap1q_lane_s64 },7483{ NEON::BI__builtin_neon_vstl1_lane_u64, NEON::BI__builtin_neon_vstl1_lane_s64 },7484{ NEON::BI__builtin_neon_vstl1_lane_f64, NEON::BI__builtin_neon_vstl1_lane_s64 },7485{ NEON::BI__builtin_neon_vstl1_lane_p64, NEON::BI__builtin_neon_vstl1_lane_s64 },7486{ NEON::BI__builtin_neon_vstl1q_lane_u64, NEON::BI__builtin_neon_vstl1q_lane_s64 },7487{ NEON::BI__builtin_neon_vstl1q_lane_f64, NEON::BI__builtin_neon_vstl1q_lane_s64 },7488{ NEON::BI__builtin_neon_vstl1q_lane_p64, NEON::BI__builtin_neon_vstl1q_lane_s64 },7489};74907491#undef NEONMAP07492#undef NEONMAP17493#undef NEONMAP274947495#define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \7496{ \7497#NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \7498TypeModifier \7499}75007501#define SVEMAP2(NameBase, TypeModifier) \7502{ #NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier }7503static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = {7504#define GET_SVE_LLVM_INTRINSIC_MAP7505#include "clang/Basic/arm_sve_builtin_cg.inc"7506#include "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def"7507#undef GET_SVE_LLVM_INTRINSIC_MAP7508};75097510#undef SVEMAP17511#undef SVEMAP275127513#define SMEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \7514{ \7515#NameBase, SME::BI__builtin_sme_##NameBase, Intrinsic::LLVMIntrinsic, 0, \7516TypeModifier \7517}75187519#define SMEMAP2(NameBase, TypeModifier) \7520{ #NameBase, SME::BI__builtin_sme_##NameBase, 0, 0, TypeModifier }7521static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[] = {7522#define GET_SME_LLVM_INTRINSIC_MAP7523#include "clang/Basic/arm_sme_builtin_cg.inc"7524#undef GET_SME_LLVM_INTRINSIC_MAP7525};75267527#undef SMEMAP17528#undef SMEMAP275297530static bool NEONSIMDIntrinsicsProvenSorted = false;75317532static bool AArch64SIMDIntrinsicsProvenSorted = false;7533static bool AArch64SISDIntrinsicsProvenSorted = false;7534static bool AArch64SVEIntrinsicsProvenSorted = false;7535static bool AArch64SMEIntrinsicsProvenSorted = false;75367537static const ARMVectorIntrinsicInfo *7538findARMVectorIntrinsicInMap(ArrayRef<ARMVectorIntrinsicInfo> IntrinsicMap,7539unsigned BuiltinID, bool &MapProvenSorted) {75407541#ifndef NDEBUG7542if (!MapProvenSorted) {7543assert(llvm::is_sorted(IntrinsicMap));7544MapProvenSorted = true;7545}7546#endif75477548const ARMVectorIntrinsicInfo *Builtin =7549llvm::lower_bound(IntrinsicMap, BuiltinID);75507551if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)7552return Builtin;75537554return nullptr;7555}75567557Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,7558unsigned Modifier,7559llvm::Type *ArgType,7560const CallExpr *E) {7561int VectorSize = 0;7562if (Modifier & Use64BitVectors)7563VectorSize = 64;7564else if (Modifier & Use128BitVectors)7565VectorSize = 128;75667567// Return type.7568SmallVector<llvm::Type *, 3> Tys;7569if (Modifier & AddRetType) {7570llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));7571if (Modifier & VectorizeRetType)7572Ty = llvm::FixedVectorType::get(7573Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);75747575Tys.push_back(Ty);7576}75777578// Arguments.7579if (Modifier & VectorizeArgTypes) {7580int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;7581ArgType = llvm::FixedVectorType::get(ArgType, Elts);7582}75837584if (Modifier & (Add1ArgType | Add2ArgTypes))7585Tys.push_back(ArgType);75867587if (Modifier & Add2ArgTypes)7588Tys.push_back(ArgType);75897590if (Modifier & InventFloatType)7591Tys.push_back(FloatTy);75927593return CGM.getIntrinsic(IntrinsicID, Tys);7594}75957596static Value *EmitCommonNeonSISDBuiltinExpr(7597CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo,7598SmallVectorImpl<Value *> &Ops, const CallExpr *E) {7599unsigned BuiltinID = SISDInfo.BuiltinID;7600unsigned int Int = SISDInfo.LLVMIntrinsic;7601unsigned Modifier = SISDInfo.TypeModifier;7602const char *s = SISDInfo.NameHint;76037604switch (BuiltinID) {7605case NEON::BI__builtin_neon_vcled_s64:7606case NEON::BI__builtin_neon_vcled_u64:7607case NEON::BI__builtin_neon_vcles_f32:7608case NEON::BI__builtin_neon_vcled_f64:7609case NEON::BI__builtin_neon_vcltd_s64:7610case NEON::BI__builtin_neon_vcltd_u64:7611case NEON::BI__builtin_neon_vclts_f32:7612case NEON::BI__builtin_neon_vcltd_f64:7613case NEON::BI__builtin_neon_vcales_f32:7614case NEON::BI__builtin_neon_vcaled_f64:7615case NEON::BI__builtin_neon_vcalts_f32:7616case NEON::BI__builtin_neon_vcaltd_f64:7617// Only one direction of comparisons actually exist, cmle is actually a cmge7618// with swapped operands. The table gives us the right intrinsic but we7619// still need to do the swap.7620std::swap(Ops[0], Ops[1]);7621break;7622}76237624assert(Int && "Generic code assumes a valid intrinsic");76257626// Determine the type(s) of this overloaded AArch64 intrinsic.7627const Expr *Arg = E->getArg(0);7628llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());7629Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);76307631int j = 0;7632ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);7633for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();7634ai != ae; ++ai, ++j) {7635llvm::Type *ArgTy = ai->getType();7636if (Ops[j]->getType()->getPrimitiveSizeInBits() ==7637ArgTy->getPrimitiveSizeInBits())7638continue;76397640assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());7641// The constant argument to an _n_ intrinsic always has Int32Ty, so truncate7642// it before inserting.7643Ops[j] = CGF.Builder.CreateTruncOrBitCast(7644Ops[j], cast<llvm::VectorType>(ArgTy)->getElementType());7645Ops[j] =7646CGF.Builder.CreateInsertElement(PoisonValue::get(ArgTy), Ops[j], C0);7647}76487649Value *Result = CGF.EmitNeonCall(F, Ops, s);7650llvm::Type *ResultType = CGF.ConvertType(E->getType());7651if (ResultType->getPrimitiveSizeInBits().getFixedValue() <7652Result->getType()->getPrimitiveSizeInBits().getFixedValue())7653return CGF.Builder.CreateExtractElement(Result, C0);76547655return CGF.Builder.CreateBitCast(Result, ResultType, s);7656}76577658Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(7659unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,7660const char *NameHint, unsigned Modifier, const CallExpr *E,7661SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1,7662llvm::Triple::ArchType Arch) {7663// Get the last argument, which specifies the vector type.7664const Expr *Arg = E->getArg(E->getNumArgs() - 1);7665std::optional<llvm::APSInt> NeonTypeConst =7666Arg->getIntegerConstantExpr(getContext());7667if (!NeonTypeConst)7668return nullptr;76697670// Determine the type of this overloaded NEON intrinsic.7671NeonTypeFlags Type(NeonTypeConst->getZExtValue());7672bool Usgn = Type.isUnsigned();7673bool Quad = Type.isQuad();7674const bool HasLegalHalfType = getTarget().hasLegalHalfType();7675const bool AllowBFloatArgsAndRet =7676getTargetHooks().getABIInfo().allowBFloatArgsAndRet();76777678llvm::FixedVectorType *VTy =7679GetNeonType(this, Type, HasLegalHalfType, false, AllowBFloatArgsAndRet);7680llvm::Type *Ty = VTy;7681if (!Ty)7682return nullptr;76837684auto getAlignmentValue32 = [&](Address addr) -> Value* {7685return Builder.getInt32(addr.getAlignment().getQuantity());7686};76877688unsigned Int = LLVMIntrinsic;7689if ((Modifier & UnsignedAlts) && !Usgn)7690Int = AltLLVMIntrinsic;76917692switch (BuiltinID) {7693default: break;7694case NEON::BI__builtin_neon_splat_lane_v:7695case NEON::BI__builtin_neon_splat_laneq_v:7696case NEON::BI__builtin_neon_splatq_lane_v:7697case NEON::BI__builtin_neon_splatq_laneq_v: {7698auto NumElements = VTy->getElementCount();7699if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v)7700NumElements = NumElements * 2;7701if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v)7702NumElements = NumElements.divideCoefficientBy(2);77037704Ops[0] = Builder.CreateBitCast(Ops[0], VTy);7705return EmitNeonSplat(Ops[0], cast<ConstantInt>(Ops[1]), NumElements);7706}7707case NEON::BI__builtin_neon_vpadd_v:7708case NEON::BI__builtin_neon_vpaddq_v:7709// We don't allow fp/int overloading of intrinsics.7710if (VTy->getElementType()->isFloatingPointTy() &&7711Int == Intrinsic::aarch64_neon_addp)7712Int = Intrinsic::aarch64_neon_faddp;7713break;7714case NEON::BI__builtin_neon_vabs_v:7715case NEON::BI__builtin_neon_vabsq_v:7716if (VTy->getElementType()->isFloatingPointTy())7717return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");7718return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");7719case NEON::BI__builtin_neon_vadd_v:7720case NEON::BI__builtin_neon_vaddq_v: {7721llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, Quad ? 16 : 8);7722Ops[0] = Builder.CreateBitCast(Ops[0], VTy);7723Ops[1] = Builder.CreateBitCast(Ops[1], VTy);7724Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);7725return Builder.CreateBitCast(Ops[0], Ty);7726}7727case NEON::BI__builtin_neon_vaddhn_v: {7728llvm::FixedVectorType *SrcTy =7729llvm::FixedVectorType::getExtendedElementVectorType(VTy);77307731// %sum = add <4 x i32> %lhs, %rhs7732Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);7733Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);7734Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");77357736// %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>7737Constant *ShiftAmt =7738ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);7739Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");77407741// %res = trunc <4 x i32> %high to <4 x i16>7742return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");7743}7744case NEON::BI__builtin_neon_vcale_v:7745case NEON::BI__builtin_neon_vcaleq_v:7746case NEON::BI__builtin_neon_vcalt_v:7747case NEON::BI__builtin_neon_vcaltq_v:7748std::swap(Ops[0], Ops[1]);7749[[fallthrough]];7750case NEON::BI__builtin_neon_vcage_v:7751case NEON::BI__builtin_neon_vcageq_v:7752case NEON::BI__builtin_neon_vcagt_v:7753case NEON::BI__builtin_neon_vcagtq_v: {7754llvm::Type *Ty;7755switch (VTy->getScalarSizeInBits()) {7756default: llvm_unreachable("unexpected type");7757case 32:7758Ty = FloatTy;7759break;7760case 64:7761Ty = DoubleTy;7762break;7763case 16:7764Ty = HalfTy;7765break;7766}7767auto *VecFlt = llvm::FixedVectorType::get(Ty, VTy->getNumElements());7768llvm::Type *Tys[] = { VTy, VecFlt };7769Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);7770return EmitNeonCall(F, Ops, NameHint);7771}7772case NEON::BI__builtin_neon_vceqz_v:7773case NEON::BI__builtin_neon_vceqzq_v:7774return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,7775ICmpInst::ICMP_EQ, "vceqz");7776case NEON::BI__builtin_neon_vcgez_v:7777case NEON::BI__builtin_neon_vcgezq_v:7778return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,7779ICmpInst::ICMP_SGE, "vcgez");7780case NEON::BI__builtin_neon_vclez_v:7781case NEON::BI__builtin_neon_vclezq_v:7782return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,7783ICmpInst::ICMP_SLE, "vclez");7784case NEON::BI__builtin_neon_vcgtz_v:7785case NEON::BI__builtin_neon_vcgtzq_v:7786return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,7787ICmpInst::ICMP_SGT, "vcgtz");7788case NEON::BI__builtin_neon_vcltz_v:7789case NEON::BI__builtin_neon_vcltzq_v:7790return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,7791ICmpInst::ICMP_SLT, "vcltz");7792case NEON::BI__builtin_neon_vclz_v:7793case NEON::BI__builtin_neon_vclzq_v:7794// We generate target-independent intrinsic, which needs a second argument7795// for whether or not clz of zero is undefined; on ARM it isn't.7796Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));7797break;7798case NEON::BI__builtin_neon_vcvt_f32_v:7799case NEON::BI__builtin_neon_vcvtq_f32_v:7800Ops[0] = Builder.CreateBitCast(Ops[0], Ty);7801Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad),7802HasLegalHalfType);7803return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")7804: Builder.CreateSIToFP(Ops[0], Ty, "vcvt");7805case NEON::BI__builtin_neon_vcvt_f16_s16:7806case NEON::BI__builtin_neon_vcvt_f16_u16:7807case NEON::BI__builtin_neon_vcvtq_f16_s16:7808case NEON::BI__builtin_neon_vcvtq_f16_u16:7809Ops[0] = Builder.CreateBitCast(Ops[0], Ty);7810Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad),7811HasLegalHalfType);7812return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")7813: Builder.CreateSIToFP(Ops[0], Ty, "vcvt");7814case NEON::BI__builtin_neon_vcvt_n_f16_s16:7815case NEON::BI__builtin_neon_vcvt_n_f16_u16:7816case NEON::BI__builtin_neon_vcvtq_n_f16_s16:7817case NEON::BI__builtin_neon_vcvtq_n_f16_u16: {7818llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };7819Function *F = CGM.getIntrinsic(Int, Tys);7820return EmitNeonCall(F, Ops, "vcvt_n");7821}7822case NEON::BI__builtin_neon_vcvt_n_f32_v:7823case NEON::BI__builtin_neon_vcvt_n_f64_v:7824case NEON::BI__builtin_neon_vcvtq_n_f32_v:7825case NEON::BI__builtin_neon_vcvtq_n_f64_v: {7826llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };7827Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;7828Function *F = CGM.getIntrinsic(Int, Tys);7829return EmitNeonCall(F, Ops, "vcvt_n");7830}7831case NEON::BI__builtin_neon_vcvt_n_s16_f16:7832case NEON::BI__builtin_neon_vcvt_n_s32_v:7833case NEON::BI__builtin_neon_vcvt_n_u16_f16:7834case NEON::BI__builtin_neon_vcvt_n_u32_v:7835case NEON::BI__builtin_neon_vcvt_n_s64_v:7836case NEON::BI__builtin_neon_vcvt_n_u64_v:7837case NEON::BI__builtin_neon_vcvtq_n_s16_f16:7838case NEON::BI__builtin_neon_vcvtq_n_s32_v:7839case NEON::BI__builtin_neon_vcvtq_n_u16_f16:7840case NEON::BI__builtin_neon_vcvtq_n_u32_v:7841case NEON::BI__builtin_neon_vcvtq_n_s64_v:7842case NEON::BI__builtin_neon_vcvtq_n_u64_v: {7843llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };7844Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);7845return EmitNeonCall(F, Ops, "vcvt_n");7846}7847case NEON::BI__builtin_neon_vcvt_s32_v:7848case NEON::BI__builtin_neon_vcvt_u32_v:7849case NEON::BI__builtin_neon_vcvt_s64_v:7850case NEON::BI__builtin_neon_vcvt_u64_v:7851case NEON::BI__builtin_neon_vcvt_s16_f16:7852case NEON::BI__builtin_neon_vcvt_u16_f16:7853case NEON::BI__builtin_neon_vcvtq_s32_v:7854case NEON::BI__builtin_neon_vcvtq_u32_v:7855case NEON::BI__builtin_neon_vcvtq_s64_v:7856case NEON::BI__builtin_neon_vcvtq_u64_v:7857case NEON::BI__builtin_neon_vcvtq_s16_f16:7858case NEON::BI__builtin_neon_vcvtq_u16_f16: {7859Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));7860return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")7861: Builder.CreateFPToSI(Ops[0], Ty, "vcvt");7862}7863case NEON::BI__builtin_neon_vcvta_s16_f16:7864case NEON::BI__builtin_neon_vcvta_s32_v:7865case NEON::BI__builtin_neon_vcvta_s64_v:7866case NEON::BI__builtin_neon_vcvta_u16_f16:7867case NEON::BI__builtin_neon_vcvta_u32_v:7868case NEON::BI__builtin_neon_vcvta_u64_v:7869case NEON::BI__builtin_neon_vcvtaq_s16_f16:7870case NEON::BI__builtin_neon_vcvtaq_s32_v:7871case NEON::BI__builtin_neon_vcvtaq_s64_v:7872case NEON::BI__builtin_neon_vcvtaq_u16_f16:7873case NEON::BI__builtin_neon_vcvtaq_u32_v:7874case NEON::BI__builtin_neon_vcvtaq_u64_v:7875case NEON::BI__builtin_neon_vcvtn_s16_f16:7876case NEON::BI__builtin_neon_vcvtn_s32_v:7877case NEON::BI__builtin_neon_vcvtn_s64_v:7878case NEON::BI__builtin_neon_vcvtn_u16_f16:7879case NEON::BI__builtin_neon_vcvtn_u32_v:7880case NEON::BI__builtin_neon_vcvtn_u64_v:7881case NEON::BI__builtin_neon_vcvtnq_s16_f16:7882case NEON::BI__builtin_neon_vcvtnq_s32_v:7883case NEON::BI__builtin_neon_vcvtnq_s64_v:7884case NEON::BI__builtin_neon_vcvtnq_u16_f16:7885case NEON::BI__builtin_neon_vcvtnq_u32_v:7886case NEON::BI__builtin_neon_vcvtnq_u64_v:7887case NEON::BI__builtin_neon_vcvtp_s16_f16:7888case NEON::BI__builtin_neon_vcvtp_s32_v:7889case NEON::BI__builtin_neon_vcvtp_s64_v:7890case NEON::BI__builtin_neon_vcvtp_u16_f16:7891case NEON::BI__builtin_neon_vcvtp_u32_v:7892case NEON::BI__builtin_neon_vcvtp_u64_v:7893case NEON::BI__builtin_neon_vcvtpq_s16_f16:7894case NEON::BI__builtin_neon_vcvtpq_s32_v:7895case NEON::BI__builtin_neon_vcvtpq_s64_v:7896case NEON::BI__builtin_neon_vcvtpq_u16_f16:7897case NEON::BI__builtin_neon_vcvtpq_u32_v:7898case NEON::BI__builtin_neon_vcvtpq_u64_v:7899case NEON::BI__builtin_neon_vcvtm_s16_f16:7900case NEON::BI__builtin_neon_vcvtm_s32_v:7901case NEON::BI__builtin_neon_vcvtm_s64_v:7902case NEON::BI__builtin_neon_vcvtm_u16_f16:7903case NEON::BI__builtin_neon_vcvtm_u32_v:7904case NEON::BI__builtin_neon_vcvtm_u64_v:7905case NEON::BI__builtin_neon_vcvtmq_s16_f16:7906case NEON::BI__builtin_neon_vcvtmq_s32_v:7907case NEON::BI__builtin_neon_vcvtmq_s64_v:7908case NEON::BI__builtin_neon_vcvtmq_u16_f16:7909case NEON::BI__builtin_neon_vcvtmq_u32_v:7910case NEON::BI__builtin_neon_vcvtmq_u64_v: {7911llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };7912return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);7913}7914case NEON::BI__builtin_neon_vcvtx_f32_v: {7915llvm::Type *Tys[2] = { VTy->getTruncatedElementVectorType(VTy), Ty};7916return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);79177918}7919case NEON::BI__builtin_neon_vext_v:7920case NEON::BI__builtin_neon_vextq_v: {7921int CV = cast<ConstantInt>(Ops[2])->getSExtValue();7922SmallVector<int, 16> Indices;7923for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)7924Indices.push_back(i+CV);79257926Ops[0] = Builder.CreateBitCast(Ops[0], Ty);7927Ops[1] = Builder.CreateBitCast(Ops[1], Ty);7928return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");7929}7930case NEON::BI__builtin_neon_vfma_v:7931case NEON::BI__builtin_neon_vfmaq_v: {7932Ops[0] = Builder.CreateBitCast(Ops[0], Ty);7933Ops[1] = Builder.CreateBitCast(Ops[1], Ty);7934Ops[2] = Builder.CreateBitCast(Ops[2], Ty);79357936// NEON intrinsic puts accumulator first, unlike the LLVM fma.7937return emitCallMaybeConstrainedFPBuiltin(7938*this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,7939{Ops[1], Ops[2], Ops[0]});7940}7941case NEON::BI__builtin_neon_vld1_v:7942case NEON::BI__builtin_neon_vld1q_v: {7943llvm::Type *Tys[] = {Ty, Int8PtrTy};7944Ops.push_back(getAlignmentValue32(PtrOp0));7945return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");7946}7947case NEON::BI__builtin_neon_vld1_x2_v:7948case NEON::BI__builtin_neon_vld1q_x2_v:7949case NEON::BI__builtin_neon_vld1_x3_v:7950case NEON::BI__builtin_neon_vld1q_x3_v:7951case NEON::BI__builtin_neon_vld1_x4_v:7952case NEON::BI__builtin_neon_vld1q_x4_v: {7953llvm::Type *Tys[2] = {VTy, UnqualPtrTy};7954Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);7955Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");7956return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);7957}7958case NEON::BI__builtin_neon_vld2_v:7959case NEON::BI__builtin_neon_vld2q_v:7960case NEON::BI__builtin_neon_vld3_v:7961case NEON::BI__builtin_neon_vld3q_v:7962case NEON::BI__builtin_neon_vld4_v:7963case NEON::BI__builtin_neon_vld4q_v:7964case NEON::BI__builtin_neon_vld2_dup_v:7965case NEON::BI__builtin_neon_vld2q_dup_v:7966case NEON::BI__builtin_neon_vld3_dup_v:7967case NEON::BI__builtin_neon_vld3q_dup_v:7968case NEON::BI__builtin_neon_vld4_dup_v:7969case NEON::BI__builtin_neon_vld4q_dup_v: {7970llvm::Type *Tys[] = {Ty, Int8PtrTy};7971Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);7972Value *Align = getAlignmentValue32(PtrOp1);7973Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);7974return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);7975}7976case NEON::BI__builtin_neon_vld1_dup_v:7977case NEON::BI__builtin_neon_vld1q_dup_v: {7978Value *V = PoisonValue::get(Ty);7979PtrOp0 = PtrOp0.withElementType(VTy->getElementType());7980LoadInst *Ld = Builder.CreateLoad(PtrOp0);7981llvm::Constant *CI = ConstantInt::get(SizeTy, 0);7982Ops[0] = Builder.CreateInsertElement(V, Ld, CI);7983return EmitNeonSplat(Ops[0], CI);7984}7985case NEON::BI__builtin_neon_vld2_lane_v:7986case NEON::BI__builtin_neon_vld2q_lane_v:7987case NEON::BI__builtin_neon_vld3_lane_v:7988case NEON::BI__builtin_neon_vld3q_lane_v:7989case NEON::BI__builtin_neon_vld4_lane_v:7990case NEON::BI__builtin_neon_vld4q_lane_v: {7991llvm::Type *Tys[] = {Ty, Int8PtrTy};7992Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);7993for (unsigned I = 2; I < Ops.size() - 1; ++I)7994Ops[I] = Builder.CreateBitCast(Ops[I], Ty);7995Ops.push_back(getAlignmentValue32(PtrOp1));7996Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), NameHint);7997return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);7998}7999case NEON::BI__builtin_neon_vmovl_v: {8000llvm::FixedVectorType *DTy =8001llvm::FixedVectorType::getTruncatedElementVectorType(VTy);8002Ops[0] = Builder.CreateBitCast(Ops[0], DTy);8003if (Usgn)8004return Builder.CreateZExt(Ops[0], Ty, "vmovl");8005return Builder.CreateSExt(Ops[0], Ty, "vmovl");8006}8007case NEON::BI__builtin_neon_vmovn_v: {8008llvm::FixedVectorType *QTy =8009llvm::FixedVectorType::getExtendedElementVectorType(VTy);8010Ops[0] = Builder.CreateBitCast(Ops[0], QTy);8011return Builder.CreateTrunc(Ops[0], Ty, "vmovn");8012}8013case NEON::BI__builtin_neon_vmull_v:8014// FIXME: the integer vmull operations could be emitted in terms of pure8015// LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of8016// hoisting the exts outside loops. Until global ISel comes along that can8017// see through such movement this leads to bad CodeGen. So we need an8018// intrinsic for now.8019Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;8020Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;8021return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");8022case NEON::BI__builtin_neon_vpadal_v:8023case NEON::BI__builtin_neon_vpadalq_v: {8024// The source operand type has twice as many elements of half the size.8025unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();8026llvm::Type *EltTy =8027llvm::IntegerType::get(getLLVMContext(), EltBits / 2);8028auto *NarrowTy =8029llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);8030llvm::Type *Tys[2] = { Ty, NarrowTy };8031return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);8032}8033case NEON::BI__builtin_neon_vpaddl_v:8034case NEON::BI__builtin_neon_vpaddlq_v: {8035// The source operand type has twice as many elements of half the size.8036unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();8037llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);8038auto *NarrowTy =8039llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);8040llvm::Type *Tys[2] = { Ty, NarrowTy };8041return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");8042}8043case NEON::BI__builtin_neon_vqdmlal_v:8044case NEON::BI__builtin_neon_vqdmlsl_v: {8045SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());8046Ops[1] =8047EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");8048Ops.resize(2);8049return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);8050}8051case NEON::BI__builtin_neon_vqdmulhq_lane_v:8052case NEON::BI__builtin_neon_vqdmulh_lane_v:8053case NEON::BI__builtin_neon_vqrdmulhq_lane_v:8054case NEON::BI__builtin_neon_vqrdmulh_lane_v: {8055auto *RTy = cast<llvm::FixedVectorType>(Ty);8056if (BuiltinID == NEON::BI__builtin_neon_vqdmulhq_lane_v ||8057BuiltinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v)8058RTy = llvm::FixedVectorType::get(RTy->getElementType(),8059RTy->getNumElements() * 2);8060llvm::Type *Tys[2] = {8061RTy, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,8062/*isQuad*/ false))};8063return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);8064}8065case NEON::BI__builtin_neon_vqdmulhq_laneq_v:8066case NEON::BI__builtin_neon_vqdmulh_laneq_v:8067case NEON::BI__builtin_neon_vqrdmulhq_laneq_v:8068case NEON::BI__builtin_neon_vqrdmulh_laneq_v: {8069llvm::Type *Tys[2] = {8070Ty, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,8071/*isQuad*/ true))};8072return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);8073}8074case NEON::BI__builtin_neon_vqshl_n_v:8075case NEON::BI__builtin_neon_vqshlq_n_v:8076return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",80771, false);8078case NEON::BI__builtin_neon_vqshlu_n_v:8079case NEON::BI__builtin_neon_vqshluq_n_v:8080return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",80811, false);8082case NEON::BI__builtin_neon_vrecpe_v:8083case NEON::BI__builtin_neon_vrecpeq_v:8084case NEON::BI__builtin_neon_vrsqrte_v:8085case NEON::BI__builtin_neon_vrsqrteq_v:8086Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;8087return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);8088case NEON::BI__builtin_neon_vrndi_v:8089case NEON::BI__builtin_neon_vrndiq_v:8090Int = Builder.getIsFPConstrained()8091? Intrinsic::experimental_constrained_nearbyint8092: Intrinsic::nearbyint;8093return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);8094case NEON::BI__builtin_neon_vrshr_n_v:8095case NEON::BI__builtin_neon_vrshrq_n_v:8096return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",80971, true);8098case NEON::BI__builtin_neon_vsha512hq_u64:8099case NEON::BI__builtin_neon_vsha512h2q_u64:8100case NEON::BI__builtin_neon_vsha512su0q_u64:8101case NEON::BI__builtin_neon_vsha512su1q_u64: {8102Function *F = CGM.getIntrinsic(Int);8103return EmitNeonCall(F, Ops, "");8104}8105case NEON::BI__builtin_neon_vshl_n_v:8106case NEON::BI__builtin_neon_vshlq_n_v:8107Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);8108return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],8109"vshl_n");8110case NEON::BI__builtin_neon_vshll_n_v: {8111llvm::FixedVectorType *SrcTy =8112llvm::FixedVectorType::getTruncatedElementVectorType(VTy);8113Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);8114if (Usgn)8115Ops[0] = Builder.CreateZExt(Ops[0], VTy);8116else8117Ops[0] = Builder.CreateSExt(Ops[0], VTy);8118Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);8119return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");8120}8121case NEON::BI__builtin_neon_vshrn_n_v: {8122llvm::FixedVectorType *SrcTy =8123llvm::FixedVectorType::getExtendedElementVectorType(VTy);8124Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);8125Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);8126if (Usgn)8127Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);8128else8129Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);8130return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");8131}8132case NEON::BI__builtin_neon_vshr_n_v:8133case NEON::BI__builtin_neon_vshrq_n_v:8134return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");8135case NEON::BI__builtin_neon_vst1_v:8136case NEON::BI__builtin_neon_vst1q_v:8137case NEON::BI__builtin_neon_vst2_v:8138case NEON::BI__builtin_neon_vst2q_v:8139case NEON::BI__builtin_neon_vst3_v:8140case NEON::BI__builtin_neon_vst3q_v:8141case NEON::BI__builtin_neon_vst4_v:8142case NEON::BI__builtin_neon_vst4q_v:8143case NEON::BI__builtin_neon_vst2_lane_v:8144case NEON::BI__builtin_neon_vst2q_lane_v:8145case NEON::BI__builtin_neon_vst3_lane_v:8146case NEON::BI__builtin_neon_vst3q_lane_v:8147case NEON::BI__builtin_neon_vst4_lane_v:8148case NEON::BI__builtin_neon_vst4q_lane_v: {8149llvm::Type *Tys[] = {Int8PtrTy, Ty};8150Ops.push_back(getAlignmentValue32(PtrOp0));8151return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");8152}8153case NEON::BI__builtin_neon_vsm3partw1q_u32:8154case NEON::BI__builtin_neon_vsm3partw2q_u32:8155case NEON::BI__builtin_neon_vsm3ss1q_u32:8156case NEON::BI__builtin_neon_vsm4ekeyq_u32:8157case NEON::BI__builtin_neon_vsm4eq_u32: {8158Function *F = CGM.getIntrinsic(Int);8159return EmitNeonCall(F, Ops, "");8160}8161case NEON::BI__builtin_neon_vsm3tt1aq_u32:8162case NEON::BI__builtin_neon_vsm3tt1bq_u32:8163case NEON::BI__builtin_neon_vsm3tt2aq_u32:8164case NEON::BI__builtin_neon_vsm3tt2bq_u32: {8165Function *F = CGM.getIntrinsic(Int);8166Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);8167return EmitNeonCall(F, Ops, "");8168}8169case NEON::BI__builtin_neon_vst1_x2_v:8170case NEON::BI__builtin_neon_vst1q_x2_v:8171case NEON::BI__builtin_neon_vst1_x3_v:8172case NEON::BI__builtin_neon_vst1q_x3_v:8173case NEON::BI__builtin_neon_vst1_x4_v:8174case NEON::BI__builtin_neon_vst1q_x4_v: {8175// TODO: Currently in AArch32 mode the pointer operand comes first, whereas8176// in AArch64 it comes last. We may want to stick to one or another.8177if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be ||8178Arch == llvm::Triple::aarch64_32) {8179llvm::Type *Tys[2] = {VTy, UnqualPtrTy};8180std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());8181return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");8182}8183llvm::Type *Tys[2] = {UnqualPtrTy, VTy};8184return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");8185}8186case NEON::BI__builtin_neon_vsubhn_v: {8187llvm::FixedVectorType *SrcTy =8188llvm::FixedVectorType::getExtendedElementVectorType(VTy);81898190// %sum = add <4 x i32> %lhs, %rhs8191Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);8192Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);8193Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");81948195// %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>8196Constant *ShiftAmt =8197ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);8198Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");81998200// %res = trunc <4 x i32> %high to <4 x i16>8201return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");8202}8203case NEON::BI__builtin_neon_vtrn_v:8204case NEON::BI__builtin_neon_vtrnq_v: {8205Ops[1] = Builder.CreateBitCast(Ops[1], Ty);8206Ops[2] = Builder.CreateBitCast(Ops[2], Ty);8207Value *SV = nullptr;82088209for (unsigned vi = 0; vi != 2; ++vi) {8210SmallVector<int, 16> Indices;8211for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {8212Indices.push_back(i+vi);8213Indices.push_back(i+e+vi);8214}8215Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);8216SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");8217SV = Builder.CreateDefaultAlignedStore(SV, Addr);8218}8219return SV;8220}8221case NEON::BI__builtin_neon_vtst_v:8222case NEON::BI__builtin_neon_vtstq_v: {8223Ops[0] = Builder.CreateBitCast(Ops[0], Ty);8224Ops[1] = Builder.CreateBitCast(Ops[1], Ty);8225Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);8226Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],8227ConstantAggregateZero::get(Ty));8228return Builder.CreateSExt(Ops[0], Ty, "vtst");8229}8230case NEON::BI__builtin_neon_vuzp_v:8231case NEON::BI__builtin_neon_vuzpq_v: {8232Ops[1] = Builder.CreateBitCast(Ops[1], Ty);8233Ops[2] = Builder.CreateBitCast(Ops[2], Ty);8234Value *SV = nullptr;82358236for (unsigned vi = 0; vi != 2; ++vi) {8237SmallVector<int, 16> Indices;8238for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)8239Indices.push_back(2*i+vi);82408241Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);8242SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");8243SV = Builder.CreateDefaultAlignedStore(SV, Addr);8244}8245return SV;8246}8247case NEON::BI__builtin_neon_vxarq_u64: {8248Function *F = CGM.getIntrinsic(Int);8249Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);8250return EmitNeonCall(F, Ops, "");8251}8252case NEON::BI__builtin_neon_vzip_v:8253case NEON::BI__builtin_neon_vzipq_v: {8254Ops[1] = Builder.CreateBitCast(Ops[1], Ty);8255Ops[2] = Builder.CreateBitCast(Ops[2], Ty);8256Value *SV = nullptr;82578258for (unsigned vi = 0; vi != 2; ++vi) {8259SmallVector<int, 16> Indices;8260for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {8261Indices.push_back((i + vi*e) >> 1);8262Indices.push_back(((i + vi*e) >> 1)+e);8263}8264Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);8265SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");8266SV = Builder.CreateDefaultAlignedStore(SV, Addr);8267}8268return SV;8269}8270case NEON::BI__builtin_neon_vdot_s32:8271case NEON::BI__builtin_neon_vdot_u32:8272case NEON::BI__builtin_neon_vdotq_s32:8273case NEON::BI__builtin_neon_vdotq_u32: {8274auto *InputTy =8275llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);8276llvm::Type *Tys[2] = { Ty, InputTy };8277return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot");8278}8279case NEON::BI__builtin_neon_vfmlal_low_f16:8280case NEON::BI__builtin_neon_vfmlalq_low_f16: {8281auto *InputTy =8282llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);8283llvm::Type *Tys[2] = { Ty, InputTy };8284return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_low");8285}8286case NEON::BI__builtin_neon_vfmlsl_low_f16:8287case NEON::BI__builtin_neon_vfmlslq_low_f16: {8288auto *InputTy =8289llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);8290llvm::Type *Tys[2] = { Ty, InputTy };8291return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_low");8292}8293case NEON::BI__builtin_neon_vfmlal_high_f16:8294case NEON::BI__builtin_neon_vfmlalq_high_f16: {8295auto *InputTy =8296llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);8297llvm::Type *Tys[2] = { Ty, InputTy };8298return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_high");8299}8300case NEON::BI__builtin_neon_vfmlsl_high_f16:8301case NEON::BI__builtin_neon_vfmlslq_high_f16: {8302auto *InputTy =8303llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);8304llvm::Type *Tys[2] = { Ty, InputTy };8305return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_high");8306}8307case NEON::BI__builtin_neon_vmmlaq_s32:8308case NEON::BI__builtin_neon_vmmlaq_u32: {8309auto *InputTy =8310llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);8311llvm::Type *Tys[2] = { Ty, InputTy };8312return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vmmla");8313}8314case NEON::BI__builtin_neon_vusmmlaq_s32: {8315auto *InputTy =8316llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);8317llvm::Type *Tys[2] = { Ty, InputTy };8318return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusmmla");8319}8320case NEON::BI__builtin_neon_vusdot_s32:8321case NEON::BI__builtin_neon_vusdotq_s32: {8322auto *InputTy =8323llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);8324llvm::Type *Tys[2] = { Ty, InputTy };8325return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusdot");8326}8327case NEON::BI__builtin_neon_vbfdot_f32:8328case NEON::BI__builtin_neon_vbfdotq_f32: {8329llvm::Type *InputTy =8330llvm::FixedVectorType::get(BFloatTy, Ty->getPrimitiveSizeInBits() / 16);8331llvm::Type *Tys[2] = { Ty, InputTy };8332return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vbfdot");8333}8334case NEON::BI__builtin_neon___a32_vcvt_bf16_f32: {8335llvm::Type *Tys[1] = { Ty };8336Function *F = CGM.getIntrinsic(Int, Tys);8337return EmitNeonCall(F, Ops, "vcvtfp2bf");8338}83398340}83418342assert(Int && "Expected valid intrinsic number");83438344// Determine the type(s) of this overloaded AArch64 intrinsic.8345Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);83468347Value *Result = EmitNeonCall(F, Ops, NameHint);8348llvm::Type *ResultType = ConvertType(E->getType());8349// AArch64 intrinsic one-element vector type cast to8350// scalar type expected by the builtin8351return Builder.CreateBitCast(Result, ResultType, NameHint);8352}83538354Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr(8355Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,8356const CmpInst::Predicate Ip, const Twine &Name) {8357llvm::Type *OTy = Op->getType();83588359// FIXME: this is utterly horrific. We should not be looking at previous8360// codegen context to find out what needs doing. Unfortunately TableGen8361// currently gives us exactly the same calls for vceqz_f32 and vceqz_s328362// (etc).8363if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))8364OTy = BI->getOperand(0)->getType();83658366Op = Builder.CreateBitCast(Op, OTy);8367if (OTy->getScalarType()->isFloatingPointTy()) {8368if (Fp == CmpInst::FCMP_OEQ)8369Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));8370else8371Op = Builder.CreateFCmpS(Fp, Op, Constant::getNullValue(OTy));8372} else {8373Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));8374}8375return Builder.CreateSExt(Op, Ty, Name);8376}83778378static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops,8379Value *ExtOp, Value *IndexOp,8380llvm::Type *ResTy, unsigned IntID,8381const char *Name) {8382SmallVector<Value *, 2> TblOps;8383if (ExtOp)8384TblOps.push_back(ExtOp);83858386// Build a vector containing sequential number like (0, 1, 2, ..., 15)8387SmallVector<int, 16> Indices;8388auto *TblTy = cast<llvm::FixedVectorType>(Ops[0]->getType());8389for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {8390Indices.push_back(2*i);8391Indices.push_back(2*i+1);8392}83938394int PairPos = 0, End = Ops.size() - 1;8395while (PairPos < End) {8396TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],8397Ops[PairPos+1], Indices,8398Name));8399PairPos += 2;8400}84018402// If there's an odd number of 64-bit lookup table, fill the high 64-bit8403// of the 128-bit lookup table with zero.8404if (PairPos == End) {8405Value *ZeroTbl = ConstantAggregateZero::get(TblTy);8406TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],8407ZeroTbl, Indices, Name));8408}84098410Function *TblF;8411TblOps.push_back(IndexOp);8412TblF = CGF.CGM.getIntrinsic(IntID, ResTy);84138414return CGF.EmitNeonCall(TblF, TblOps, Name);8415}84168417Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {8418unsigned Value;8419switch (BuiltinID) {8420default:8421return nullptr;8422case clang::ARM::BI__builtin_arm_nop:8423Value = 0;8424break;8425case clang::ARM::BI__builtin_arm_yield:8426case clang::ARM::BI__yield:8427Value = 1;8428break;8429case clang::ARM::BI__builtin_arm_wfe:8430case clang::ARM::BI__wfe:8431Value = 2;8432break;8433case clang::ARM::BI__builtin_arm_wfi:8434case clang::ARM::BI__wfi:8435Value = 3;8436break;8437case clang::ARM::BI__builtin_arm_sev:8438case clang::ARM::BI__sev:8439Value = 4;8440break;8441case clang::ARM::BI__builtin_arm_sevl:8442case clang::ARM::BI__sevl:8443Value = 5;8444break;8445}84468447return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),8448llvm::ConstantInt::get(Int32Ty, Value));8449}84508451enum SpecialRegisterAccessKind {8452NormalRead,8453VolatileRead,8454Write,8455};84568457// Generates the IR for __builtin_read_exec_*.8458// Lowers the builtin to amdgcn_ballot intrinsic.8459static Value *EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E,8460llvm::Type *RegisterType,8461llvm::Type *ValueType, bool isExecHi) {8462CodeGen::CGBuilderTy &Builder = CGF.Builder;8463CodeGen::CodeGenModule &CGM = CGF.CGM;84648465Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {RegisterType});8466llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)});84678468if (isExecHi) {8469Value *Rt2 = Builder.CreateLShr(Call, 32);8470Rt2 = Builder.CreateTrunc(Rt2, CGF.Int32Ty);8471return Rt2;8472}84738474return Call;8475}84768477// Generates the IR for the read/write special register builtin,8478// ValueType is the type of the value that is to be written or read,8479// RegisterType is the type of the register being written to or read from.8480static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF,8481const CallExpr *E,8482llvm::Type *RegisterType,8483llvm::Type *ValueType,8484SpecialRegisterAccessKind AccessKind,8485StringRef SysReg = "") {8486// write and register intrinsics only support 32, 64 and 128 bit operations.8487assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64) ||8488RegisterType->isIntegerTy(128)) &&8489"Unsupported size for register.");84908491CodeGen::CGBuilderTy &Builder = CGF.Builder;8492CodeGen::CodeGenModule &CGM = CGF.CGM;8493LLVMContext &Context = CGM.getLLVMContext();84948495if (SysReg.empty()) {8496const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();8497SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();8498}84998500llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };8501llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);8502llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);85038504llvm::Type *Types[] = { RegisterType };85058506bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);8507assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))8508&& "Can't fit 64-bit value in 32-bit register");85098510if (AccessKind != Write) {8511assert(AccessKind == NormalRead || AccessKind == VolatileRead);8512llvm::Function *F = CGM.getIntrinsic(8513AccessKind == VolatileRead ? llvm::Intrinsic::read_volatile_register8514: llvm::Intrinsic::read_register,8515Types);8516llvm::Value *Call = Builder.CreateCall(F, Metadata);85178518if (MixedTypes)8519// Read into 64 bit register and then truncate result to 32 bit.8520return Builder.CreateTrunc(Call, ValueType);85218522if (ValueType->isPointerTy())8523// Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).8524return Builder.CreateIntToPtr(Call, ValueType);85258526return Call;8527}85288529llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);8530llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));8531if (MixedTypes) {8532// Extend 32 bit write value to 64 bit to pass to write.8533ArgValue = Builder.CreateZExt(ArgValue, RegisterType);8534return Builder.CreateCall(F, { Metadata, ArgValue });8535}85368537if (ValueType->isPointerTy()) {8538// Have VoidPtrTy ArgValue but want to return an i32/i64.8539ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);8540return Builder.CreateCall(F, { Metadata, ArgValue });8541}85428543return Builder.CreateCall(F, { Metadata, ArgValue });8544}85458546/// Return true if BuiltinID is an overloaded Neon intrinsic with an extra8547/// argument that specifies the vector type.8548static bool HasExtraNeonArgument(unsigned BuiltinID) {8549switch (BuiltinID) {8550default: break;8551case NEON::BI__builtin_neon_vget_lane_i8:8552case NEON::BI__builtin_neon_vget_lane_i16:8553case NEON::BI__builtin_neon_vget_lane_bf16:8554case NEON::BI__builtin_neon_vget_lane_i32:8555case NEON::BI__builtin_neon_vget_lane_i64:8556case NEON::BI__builtin_neon_vget_lane_f32:8557case NEON::BI__builtin_neon_vgetq_lane_i8:8558case NEON::BI__builtin_neon_vgetq_lane_i16:8559case NEON::BI__builtin_neon_vgetq_lane_bf16:8560case NEON::BI__builtin_neon_vgetq_lane_i32:8561case NEON::BI__builtin_neon_vgetq_lane_i64:8562case NEON::BI__builtin_neon_vgetq_lane_f32:8563case NEON::BI__builtin_neon_vduph_lane_bf16:8564case NEON::BI__builtin_neon_vduph_laneq_bf16:8565case NEON::BI__builtin_neon_vset_lane_i8:8566case NEON::BI__builtin_neon_vset_lane_i16:8567case NEON::BI__builtin_neon_vset_lane_bf16:8568case NEON::BI__builtin_neon_vset_lane_i32:8569case NEON::BI__builtin_neon_vset_lane_i64:8570case NEON::BI__builtin_neon_vset_lane_f32:8571case NEON::BI__builtin_neon_vsetq_lane_i8:8572case NEON::BI__builtin_neon_vsetq_lane_i16:8573case NEON::BI__builtin_neon_vsetq_lane_bf16:8574case NEON::BI__builtin_neon_vsetq_lane_i32:8575case NEON::BI__builtin_neon_vsetq_lane_i64:8576case NEON::BI__builtin_neon_vsetq_lane_f32:8577case NEON::BI__builtin_neon_vsha1h_u32:8578case NEON::BI__builtin_neon_vsha1cq_u32:8579case NEON::BI__builtin_neon_vsha1pq_u32:8580case NEON::BI__builtin_neon_vsha1mq_u32:8581case NEON::BI__builtin_neon_vcvth_bf16_f32:8582case clang::ARM::BI_MoveToCoprocessor:8583case clang::ARM::BI_MoveToCoprocessor2:8584return false;8585}8586return true;8587}85888589Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,8590const CallExpr *E,8591ReturnValueSlot ReturnValue,8592llvm::Triple::ArchType Arch) {8593if (auto Hint = GetValueForARMHint(BuiltinID))8594return Hint;85958596if (BuiltinID == clang::ARM::BI__emit) {8597bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;8598llvm::FunctionType *FTy =8599llvm::FunctionType::get(VoidTy, /*Variadic=*/false);86008601Expr::EvalResult Result;8602if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))8603llvm_unreachable("Sema will ensure that the parameter is constant");86048605llvm::APSInt Value = Result.Val.getInt();8606uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();86078608llvm::InlineAsm *Emit =8609IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",8610/*hasSideEffects=*/true)8611: InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",8612/*hasSideEffects=*/true);86138614return Builder.CreateCall(Emit);8615}86168617if (BuiltinID == clang::ARM::BI__builtin_arm_dbg) {8618Value *Option = EmitScalarExpr(E->getArg(0));8619return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);8620}86218622if (BuiltinID == clang::ARM::BI__builtin_arm_prefetch) {8623Value *Address = EmitScalarExpr(E->getArg(0));8624Value *RW = EmitScalarExpr(E->getArg(1));8625Value *IsData = EmitScalarExpr(E->getArg(2));86268627// Locality is not supported on ARM target8628Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);86298630Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());8631return Builder.CreateCall(F, {Address, RW, Locality, IsData});8632}86338634if (BuiltinID == clang::ARM::BI__builtin_arm_rbit) {8635llvm::Value *Arg = EmitScalarExpr(E->getArg(0));8636return Builder.CreateCall(8637CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");8638}86398640if (BuiltinID == clang::ARM::BI__builtin_arm_clz ||8641BuiltinID == clang::ARM::BI__builtin_arm_clz64) {8642llvm::Value *Arg = EmitScalarExpr(E->getArg(0));8643Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());8644Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});8645if (BuiltinID == clang::ARM::BI__builtin_arm_clz64)8646Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());8647return Res;8648}864986508651if (BuiltinID == clang::ARM::BI__builtin_arm_cls) {8652llvm::Value *Arg = EmitScalarExpr(E->getArg(0));8653return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls), Arg, "cls");8654}8655if (BuiltinID == clang::ARM::BI__builtin_arm_cls64) {8656llvm::Value *Arg = EmitScalarExpr(E->getArg(0));8657return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls64), Arg,8658"cls");8659}86608661if (BuiltinID == clang::ARM::BI__clear_cache) {8662assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");8663const FunctionDecl *FD = E->getDirectCallee();8664Value *Ops[2];8665for (unsigned i = 0; i < 2; i++)8666Ops[i] = EmitScalarExpr(E->getArg(i));8667llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());8668llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);8669StringRef Name = FD->getName();8670return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);8671}86728673if (BuiltinID == clang::ARM::BI__builtin_arm_mcrr ||8674BuiltinID == clang::ARM::BI__builtin_arm_mcrr2) {8675Function *F;86768677switch (BuiltinID) {8678default: llvm_unreachable("unexpected builtin");8679case clang::ARM::BI__builtin_arm_mcrr:8680F = CGM.getIntrinsic(Intrinsic::arm_mcrr);8681break;8682case clang::ARM::BI__builtin_arm_mcrr2:8683F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);8684break;8685}86868687// MCRR{2} instruction has 5 operands but8688// the intrinsic has 4 because Rt and Rt28689// are represented as a single unsigned 648690// bit integer in the intrinsic definition8691// but internally it's represented as 2 328692// bit integers.86938694Value *Coproc = EmitScalarExpr(E->getArg(0));8695Value *Opc1 = EmitScalarExpr(E->getArg(1));8696Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));8697Value *CRm = EmitScalarExpr(E->getArg(3));86988699Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);8700Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);8701Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);8702Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);87038704return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});8705}87068707if (BuiltinID == clang::ARM::BI__builtin_arm_mrrc ||8708BuiltinID == clang::ARM::BI__builtin_arm_mrrc2) {8709Function *F;87108711switch (BuiltinID) {8712default: llvm_unreachable("unexpected builtin");8713case clang::ARM::BI__builtin_arm_mrrc:8714F = CGM.getIntrinsic(Intrinsic::arm_mrrc);8715break;8716case clang::ARM::BI__builtin_arm_mrrc2:8717F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);8718break;8719}87208721Value *Coproc = EmitScalarExpr(E->getArg(0));8722Value *Opc1 = EmitScalarExpr(E->getArg(1));8723Value *CRm = EmitScalarExpr(E->getArg(2));8724Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});87258726// Returns an unsigned 64 bit integer, represented8727// as two 32 bit integers.87288729Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);8730Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);8731Rt = Builder.CreateZExt(Rt, Int64Ty);8732Rt1 = Builder.CreateZExt(Rt1, Int64Ty);87338734Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);8735RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);8736RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);87378738return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));8739}87408741if (BuiltinID == clang::ARM::BI__builtin_arm_ldrexd ||8742((BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||8743BuiltinID == clang::ARM::BI__builtin_arm_ldaex) &&8744getContext().getTypeSize(E->getType()) == 64) ||8745BuiltinID == clang::ARM::BI__ldrexd) {8746Function *F;87478748switch (BuiltinID) {8749default: llvm_unreachable("unexpected builtin");8750case clang::ARM::BI__builtin_arm_ldaex:8751F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);8752break;8753case clang::ARM::BI__builtin_arm_ldrexd:8754case clang::ARM::BI__builtin_arm_ldrex:8755case clang::ARM::BI__ldrexd:8756F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);8757break;8758}87598760Value *LdPtr = EmitScalarExpr(E->getArg(0));8761Value *Val = Builder.CreateCall(F, LdPtr, "ldrexd");87628763Value *Val0 = Builder.CreateExtractValue(Val, 1);8764Value *Val1 = Builder.CreateExtractValue(Val, 0);8765Val0 = Builder.CreateZExt(Val0, Int64Ty);8766Val1 = Builder.CreateZExt(Val1, Int64Ty);87678768Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);8769Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);8770Val = Builder.CreateOr(Val, Val1);8771return Builder.CreateBitCast(Val, ConvertType(E->getType()));8772}87738774if (BuiltinID == clang::ARM::BI__builtin_arm_ldrex ||8775BuiltinID == clang::ARM::BI__builtin_arm_ldaex) {8776Value *LoadAddr = EmitScalarExpr(E->getArg(0));87778778QualType Ty = E->getType();8779llvm::Type *RealResTy = ConvertType(Ty);8780llvm::Type *IntTy =8781llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));87828783Function *F = CGM.getIntrinsic(8784BuiltinID == clang::ARM::BI__builtin_arm_ldaex ? Intrinsic::arm_ldaex8785: Intrinsic::arm_ldrex,8786UnqualPtrTy);8787CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldrex");8788Val->addParamAttr(87890, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));87908791if (RealResTy->isPointerTy())8792return Builder.CreateIntToPtr(Val, RealResTy);8793else {8794llvm::Type *IntResTy = llvm::IntegerType::get(8795getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));8796return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),8797RealResTy);8798}8799}88008801if (BuiltinID == clang::ARM::BI__builtin_arm_strexd ||8802((BuiltinID == clang::ARM::BI__builtin_arm_stlex ||8803BuiltinID == clang::ARM::BI__builtin_arm_strex) &&8804getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {8805Function *F = CGM.getIntrinsic(8806BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlexd8807: Intrinsic::arm_strexd);8808llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty);88098810Address Tmp = CreateMemTemp(E->getArg(0)->getType());8811Value *Val = EmitScalarExpr(E->getArg(0));8812Builder.CreateStore(Val, Tmp);88138814Address LdPtr = Tmp.withElementType(STy);8815Val = Builder.CreateLoad(LdPtr);88168817Value *Arg0 = Builder.CreateExtractValue(Val, 0);8818Value *Arg1 = Builder.CreateExtractValue(Val, 1);8819Value *StPtr = EmitScalarExpr(E->getArg(1));8820return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");8821}88228823if (BuiltinID == clang::ARM::BI__builtin_arm_strex ||8824BuiltinID == clang::ARM::BI__builtin_arm_stlex) {8825Value *StoreVal = EmitScalarExpr(E->getArg(0));8826Value *StoreAddr = EmitScalarExpr(E->getArg(1));88278828QualType Ty = E->getArg(0)->getType();8829llvm::Type *StoreTy =8830llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));88318832if (StoreVal->getType()->isPointerTy())8833StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);8834else {8835llvm::Type *IntTy = llvm::IntegerType::get(8836getLLVMContext(),8837CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));8838StoreVal = Builder.CreateBitCast(StoreVal, IntTy);8839StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);8840}88418842Function *F = CGM.getIntrinsic(8843BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlex8844: Intrinsic::arm_strex,8845StoreAddr->getType());88468847CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");8848CI->addParamAttr(88491, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));8850return CI;8851}88528853if (BuiltinID == clang::ARM::BI__builtin_arm_clrex) {8854Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);8855return Builder.CreateCall(F);8856}88578858// CRC328859Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;8860switch (BuiltinID) {8861case clang::ARM::BI__builtin_arm_crc32b:8862CRCIntrinsicID = Intrinsic::arm_crc32b; break;8863case clang::ARM::BI__builtin_arm_crc32cb:8864CRCIntrinsicID = Intrinsic::arm_crc32cb; break;8865case clang::ARM::BI__builtin_arm_crc32h:8866CRCIntrinsicID = Intrinsic::arm_crc32h; break;8867case clang::ARM::BI__builtin_arm_crc32ch:8868CRCIntrinsicID = Intrinsic::arm_crc32ch; break;8869case clang::ARM::BI__builtin_arm_crc32w:8870case clang::ARM::BI__builtin_arm_crc32d:8871CRCIntrinsicID = Intrinsic::arm_crc32w; break;8872case clang::ARM::BI__builtin_arm_crc32cw:8873case clang::ARM::BI__builtin_arm_crc32cd:8874CRCIntrinsicID = Intrinsic::arm_crc32cw; break;8875}88768877if (CRCIntrinsicID != Intrinsic::not_intrinsic) {8878Value *Arg0 = EmitScalarExpr(E->getArg(0));8879Value *Arg1 = EmitScalarExpr(E->getArg(1));88808881// crc32{c,}d intrinsics are implemented as two calls to crc32{c,}w8882// intrinsics, hence we need different codegen for these cases.8883if (BuiltinID == clang::ARM::BI__builtin_arm_crc32d ||8884BuiltinID == clang::ARM::BI__builtin_arm_crc32cd) {8885Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);8886Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);8887Value *Arg1b = Builder.CreateLShr(Arg1, C1);8888Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);88898890Function *F = CGM.getIntrinsic(CRCIntrinsicID);8891Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});8892return Builder.CreateCall(F, {Res, Arg1b});8893} else {8894Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);88958896Function *F = CGM.getIntrinsic(CRCIntrinsicID);8897return Builder.CreateCall(F, {Arg0, Arg1});8898}8899}89008901if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||8902BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||8903BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||8904BuiltinID == clang::ARM::BI__builtin_arm_wsr ||8905BuiltinID == clang::ARM::BI__builtin_arm_wsr64 ||8906BuiltinID == clang::ARM::BI__builtin_arm_wsrp) {89078908SpecialRegisterAccessKind AccessKind = Write;8909if (BuiltinID == clang::ARM::BI__builtin_arm_rsr ||8910BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||8911BuiltinID == clang::ARM::BI__builtin_arm_rsrp)8912AccessKind = VolatileRead;89138914bool IsPointerBuiltin = BuiltinID == clang::ARM::BI__builtin_arm_rsrp ||8915BuiltinID == clang::ARM::BI__builtin_arm_wsrp;89168917bool Is64Bit = BuiltinID == clang::ARM::BI__builtin_arm_rsr64 ||8918BuiltinID == clang::ARM::BI__builtin_arm_wsr64;89198920llvm::Type *ValueType;8921llvm::Type *RegisterType;8922if (IsPointerBuiltin) {8923ValueType = VoidPtrTy;8924RegisterType = Int32Ty;8925} else if (Is64Bit) {8926ValueType = RegisterType = Int64Ty;8927} else {8928ValueType = RegisterType = Int32Ty;8929}89308931return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,8932AccessKind);8933}89348935if (BuiltinID == ARM::BI__builtin_sponentry) {8936llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);8937return Builder.CreateCall(F);8938}89398940// Handle MSVC intrinsics before argument evaluation to prevent double8941// evaluation.8942if (std::optional<MSVCIntrin> MsvcIntId = translateArmToMsvcIntrin(BuiltinID))8943return EmitMSVCBuiltinExpr(*MsvcIntId, E);89448945// Deal with MVE builtins8946if (Value *Result = EmitARMMVEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))8947return Result;8948// Handle CDE builtins8949if (Value *Result = EmitARMCDEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))8950return Result;89518952// Some intrinsics are equivalent - if they are use the base intrinsic ID.8953auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {8954return P.first == BuiltinID;8955});8956if (It != end(NEONEquivalentIntrinsicMap))8957BuiltinID = It->second;89588959// Find out if any arguments are required to be integer constant8960// expressions.8961unsigned ICEArguments = 0;8962ASTContext::GetBuiltinTypeError Error;8963getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);8964assert(Error == ASTContext::GE_None && "Should not codegen an error");89658966auto getAlignmentValue32 = [&](Address addr) -> Value* {8967return Builder.getInt32(addr.getAlignment().getQuantity());8968};89698970Address PtrOp0 = Address::invalid();8971Address PtrOp1 = Address::invalid();8972SmallVector<Value*, 4> Ops;8973bool HasExtraArg = HasExtraNeonArgument(BuiltinID);8974unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);8975for (unsigned i = 0, e = NumArgs; i != e; i++) {8976if (i == 0) {8977switch (BuiltinID) {8978case NEON::BI__builtin_neon_vld1_v:8979case NEON::BI__builtin_neon_vld1q_v:8980case NEON::BI__builtin_neon_vld1q_lane_v:8981case NEON::BI__builtin_neon_vld1_lane_v:8982case NEON::BI__builtin_neon_vld1_dup_v:8983case NEON::BI__builtin_neon_vld1q_dup_v:8984case NEON::BI__builtin_neon_vst1_v:8985case NEON::BI__builtin_neon_vst1q_v:8986case NEON::BI__builtin_neon_vst1q_lane_v:8987case NEON::BI__builtin_neon_vst1_lane_v:8988case NEON::BI__builtin_neon_vst2_v:8989case NEON::BI__builtin_neon_vst2q_v:8990case NEON::BI__builtin_neon_vst2_lane_v:8991case NEON::BI__builtin_neon_vst2q_lane_v:8992case NEON::BI__builtin_neon_vst3_v:8993case NEON::BI__builtin_neon_vst3q_v:8994case NEON::BI__builtin_neon_vst3_lane_v:8995case NEON::BI__builtin_neon_vst3q_lane_v:8996case NEON::BI__builtin_neon_vst4_v:8997case NEON::BI__builtin_neon_vst4q_v:8998case NEON::BI__builtin_neon_vst4_lane_v:8999case NEON::BI__builtin_neon_vst4q_lane_v:9000// Get the alignment for the argument in addition to the value;9001// we'll use it later.9002PtrOp0 = EmitPointerWithAlignment(E->getArg(0));9003Ops.push_back(PtrOp0.emitRawPointer(*this));9004continue;9005}9006}9007if (i == 1) {9008switch (BuiltinID) {9009case NEON::BI__builtin_neon_vld2_v:9010case NEON::BI__builtin_neon_vld2q_v:9011case NEON::BI__builtin_neon_vld3_v:9012case NEON::BI__builtin_neon_vld3q_v:9013case NEON::BI__builtin_neon_vld4_v:9014case NEON::BI__builtin_neon_vld4q_v:9015case NEON::BI__builtin_neon_vld2_lane_v:9016case NEON::BI__builtin_neon_vld2q_lane_v:9017case NEON::BI__builtin_neon_vld3_lane_v:9018case NEON::BI__builtin_neon_vld3q_lane_v:9019case NEON::BI__builtin_neon_vld4_lane_v:9020case NEON::BI__builtin_neon_vld4q_lane_v:9021case NEON::BI__builtin_neon_vld2_dup_v:9022case NEON::BI__builtin_neon_vld2q_dup_v:9023case NEON::BI__builtin_neon_vld3_dup_v:9024case NEON::BI__builtin_neon_vld3q_dup_v:9025case NEON::BI__builtin_neon_vld4_dup_v:9026case NEON::BI__builtin_neon_vld4q_dup_v:9027// Get the alignment for the argument in addition to the value;9028// we'll use it later.9029PtrOp1 = EmitPointerWithAlignment(E->getArg(1));9030Ops.push_back(PtrOp1.emitRawPointer(*this));9031continue;9032}9033}90349035Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));9036}90379038switch (BuiltinID) {9039default: break;90409041case NEON::BI__builtin_neon_vget_lane_i8:9042case NEON::BI__builtin_neon_vget_lane_i16:9043case NEON::BI__builtin_neon_vget_lane_i32:9044case NEON::BI__builtin_neon_vget_lane_i64:9045case NEON::BI__builtin_neon_vget_lane_bf16:9046case NEON::BI__builtin_neon_vget_lane_f32:9047case NEON::BI__builtin_neon_vgetq_lane_i8:9048case NEON::BI__builtin_neon_vgetq_lane_i16:9049case NEON::BI__builtin_neon_vgetq_lane_i32:9050case NEON::BI__builtin_neon_vgetq_lane_i64:9051case NEON::BI__builtin_neon_vgetq_lane_bf16:9052case NEON::BI__builtin_neon_vgetq_lane_f32:9053case NEON::BI__builtin_neon_vduph_lane_bf16:9054case NEON::BI__builtin_neon_vduph_laneq_bf16:9055return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");90569057case NEON::BI__builtin_neon_vrndns_f32: {9058Value *Arg = EmitScalarExpr(E->getArg(0));9059llvm::Type *Tys[] = {Arg->getType()};9060Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vrintn, Tys);9061return Builder.CreateCall(F, {Arg}, "vrndn"); }90629063case NEON::BI__builtin_neon_vset_lane_i8:9064case NEON::BI__builtin_neon_vset_lane_i16:9065case NEON::BI__builtin_neon_vset_lane_i32:9066case NEON::BI__builtin_neon_vset_lane_i64:9067case NEON::BI__builtin_neon_vset_lane_bf16:9068case NEON::BI__builtin_neon_vset_lane_f32:9069case NEON::BI__builtin_neon_vsetq_lane_i8:9070case NEON::BI__builtin_neon_vsetq_lane_i16:9071case NEON::BI__builtin_neon_vsetq_lane_i32:9072case NEON::BI__builtin_neon_vsetq_lane_i64:9073case NEON::BI__builtin_neon_vsetq_lane_bf16:9074case NEON::BI__builtin_neon_vsetq_lane_f32:9075return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");90769077case NEON::BI__builtin_neon_vsha1h_u32:9078return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,9079"vsha1h");9080case NEON::BI__builtin_neon_vsha1cq_u32:9081return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,9082"vsha1h");9083case NEON::BI__builtin_neon_vsha1pq_u32:9084return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,9085"vsha1h");9086case NEON::BI__builtin_neon_vsha1mq_u32:9087return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,9088"vsha1h");90899090case NEON::BI__builtin_neon_vcvth_bf16_f32: {9091return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vcvtbfp2bf), Ops,9092"vcvtbfp2bf");9093}90949095// The ARM _MoveToCoprocessor builtins put the input register value as9096// the first argument, but the LLVM intrinsic expects it as the third one.9097case clang::ARM::BI_MoveToCoprocessor:9098case clang::ARM::BI_MoveToCoprocessor2: {9099Function *F = CGM.getIntrinsic(BuiltinID == clang::ARM::BI_MoveToCoprocessor9100? Intrinsic::arm_mcr9101: Intrinsic::arm_mcr2);9102return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],9103Ops[3], Ops[4], Ops[5]});9104}9105}91069107// Get the last argument, which specifies the vector type.9108assert(HasExtraArg);9109const Expr *Arg = E->getArg(E->getNumArgs()-1);9110std::optional<llvm::APSInt> Result =9111Arg->getIntegerConstantExpr(getContext());9112if (!Result)9113return nullptr;91149115if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f ||9116BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_d) {9117// Determine the overloaded type of this builtin.9118llvm::Type *Ty;9119if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f)9120Ty = FloatTy;9121else9122Ty = DoubleTy;91239124// Determine whether this is an unsigned conversion or not.9125bool usgn = Result->getZExtValue() == 1;9126unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;91279128// Call the appropriate intrinsic.9129Function *F = CGM.getIntrinsic(Int, Ty);9130return Builder.CreateCall(F, Ops, "vcvtr");9131}91329133// Determine the type of this overloaded NEON intrinsic.9134NeonTypeFlags Type = Result->getZExtValue();9135bool usgn = Type.isUnsigned();9136bool rightShift = false;91379138llvm::FixedVectorType *VTy =9139GetNeonType(this, Type, getTarget().hasLegalHalfType(), false,9140getTarget().hasBFloat16Type());9141llvm::Type *Ty = VTy;9142if (!Ty)9143return nullptr;91449145// Many NEON builtins have identical semantics and uses in ARM and9146// AArch64. Emit these in a single function.9147auto IntrinsicMap = ArrayRef(ARMSIMDIntrinsicMap);9148const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(9149IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);9150if (Builtin)9151return EmitCommonNeonBuiltinExpr(9152Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,9153Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1, Arch);91549155unsigned Int;9156switch (BuiltinID) {9157default: return nullptr;9158case NEON::BI__builtin_neon_vld1q_lane_v:9159// Handle 64-bit integer elements as a special case. Use shuffles of9160// one-element vectors to avoid poor code for i64 in the backend.9161if (VTy->getElementType()->isIntegerTy(64)) {9162// Extract the other lane.9163Ops[1] = Builder.CreateBitCast(Ops[1], Ty);9164int Lane = cast<ConstantInt>(Ops[2])->getZExtValue();9165Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));9166Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);9167// Load the value as a one-element vector.9168Ty = llvm::FixedVectorType::get(VTy->getElementType(), 1);9169llvm::Type *Tys[] = {Ty, Int8PtrTy};9170Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);9171Value *Align = getAlignmentValue32(PtrOp0);9172Value *Ld = Builder.CreateCall(F, {Ops[0], Align});9173// Combine them.9174int Indices[] = {1 - Lane, Lane};9175return Builder.CreateShuffleVector(Ops[1], Ld, Indices, "vld1q_lane");9176}9177[[fallthrough]];9178case NEON::BI__builtin_neon_vld1_lane_v: {9179Ops[1] = Builder.CreateBitCast(Ops[1], Ty);9180PtrOp0 = PtrOp0.withElementType(VTy->getElementType());9181Value *Ld = Builder.CreateLoad(PtrOp0);9182return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");9183}9184case NEON::BI__builtin_neon_vqrshrn_n_v:9185Int =9186usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;9187return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",91881, true);9189case NEON::BI__builtin_neon_vqrshrun_n_v:9190return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),9191Ops, "vqrshrun_n", 1, true);9192case NEON::BI__builtin_neon_vqshrn_n_v:9193Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;9194return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",91951, true);9196case NEON::BI__builtin_neon_vqshrun_n_v:9197return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),9198Ops, "vqshrun_n", 1, true);9199case NEON::BI__builtin_neon_vrecpe_v:9200case NEON::BI__builtin_neon_vrecpeq_v:9201return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),9202Ops, "vrecpe");9203case NEON::BI__builtin_neon_vrshrn_n_v:9204return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),9205Ops, "vrshrn_n", 1, true);9206case NEON::BI__builtin_neon_vrsra_n_v:9207case NEON::BI__builtin_neon_vrsraq_n_v:9208Ops[0] = Builder.CreateBitCast(Ops[0], Ty);9209Ops[1] = Builder.CreateBitCast(Ops[1], Ty);9210Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);9211Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;9212Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});9213return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");9214case NEON::BI__builtin_neon_vsri_n_v:9215case NEON::BI__builtin_neon_vsriq_n_v:9216rightShift = true;9217[[fallthrough]];9218case NEON::BI__builtin_neon_vsli_n_v:9219case NEON::BI__builtin_neon_vsliq_n_v:9220Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);9221return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),9222Ops, "vsli_n");9223case NEON::BI__builtin_neon_vsra_n_v:9224case NEON::BI__builtin_neon_vsraq_n_v:9225Ops[0] = Builder.CreateBitCast(Ops[0], Ty);9226Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");9227return Builder.CreateAdd(Ops[0], Ops[1]);9228case NEON::BI__builtin_neon_vst1q_lane_v:9229// Handle 64-bit integer elements as a special case. Use a shuffle to get9230// a one-element vector and avoid poor code for i64 in the backend.9231if (VTy->getElementType()->isIntegerTy(64)) {9232Ops[1] = Builder.CreateBitCast(Ops[1], Ty);9233Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));9234Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);9235Ops[2] = getAlignmentValue32(PtrOp0);9236llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};9237return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,9238Tys), Ops);9239}9240[[fallthrough]];9241case NEON::BI__builtin_neon_vst1_lane_v: {9242Ops[1] = Builder.CreateBitCast(Ops[1], Ty);9243Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);9244return Builder.CreateStore(Ops[1],9245PtrOp0.withElementType(Ops[1]->getType()));9246}9247case NEON::BI__builtin_neon_vtbl1_v:9248return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),9249Ops, "vtbl1");9250case NEON::BI__builtin_neon_vtbl2_v:9251return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),9252Ops, "vtbl2");9253case NEON::BI__builtin_neon_vtbl3_v:9254return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),9255Ops, "vtbl3");9256case NEON::BI__builtin_neon_vtbl4_v:9257return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),9258Ops, "vtbl4");9259case NEON::BI__builtin_neon_vtbx1_v:9260return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),9261Ops, "vtbx1");9262case NEON::BI__builtin_neon_vtbx2_v:9263return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),9264Ops, "vtbx2");9265case NEON::BI__builtin_neon_vtbx3_v:9266return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),9267Ops, "vtbx3");9268case NEON::BI__builtin_neon_vtbx4_v:9269return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),9270Ops, "vtbx4");9271}9272}92739274template<typename Integer>9275static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context) {9276return E->getIntegerConstantExpr(Context)->getExtValue();9277}92789279static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V,9280llvm::Type *T, bool Unsigned) {9281// Helper function called by Tablegen-constructed ARM MVE builtin codegen,9282// which finds it convenient to specify signed/unsigned as a boolean flag.9283return Unsigned ? Builder.CreateZExt(V, T) : Builder.CreateSExt(V, T);9284}92859286static llvm::Value *MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V,9287uint32_t Shift, bool Unsigned) {9288// MVE helper function for integer shift right. This must handle signed vs9289// unsigned, and also deal specially with the case where the shift count is9290// equal to the lane size. In LLVM IR, an LShr with that parameter would be9291// undefined behavior, but in MVE it's legal, so we must convert it to code9292// that is not undefined in IR.9293unsigned LaneBits = cast<llvm::VectorType>(V->getType())9294->getElementType()9295->getPrimitiveSizeInBits();9296if (Shift == LaneBits) {9297// An unsigned shift of the full lane size always generates zero, so we can9298// simply emit a zero vector. A signed shift of the full lane size does the9299// same thing as shifting by one bit fewer.9300if (Unsigned)9301return llvm::Constant::getNullValue(V->getType());9302else9303--Shift;9304}9305return Unsigned ? Builder.CreateLShr(V, Shift) : Builder.CreateAShr(V, Shift);9306}93079308static llvm::Value *ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V) {9309// MVE-specific helper function for a vector splat, which infers the element9310// count of the output vector by knowing that MVE vectors are all 128 bits9311// wide.9312unsigned Elements = 128 / V->getType()->getPrimitiveSizeInBits();9313return Builder.CreateVectorSplat(Elements, V);9314}93159316static llvm::Value *ARMMVEVectorReinterpret(CGBuilderTy &Builder,9317CodeGenFunction *CGF,9318llvm::Value *V,9319llvm::Type *DestType) {9320// Convert one MVE vector type into another by reinterpreting its in-register9321// format.9322//9323// Little-endian, this is identical to a bitcast (which reinterprets the9324// memory format). But big-endian, they're not necessarily the same, because9325// the register and memory formats map to each other differently depending on9326// the lane size.9327//9328// We generate a bitcast whenever we can (if we're little-endian, or if the9329// lane sizes are the same anyway). Otherwise we fall back to an IR intrinsic9330// that performs the different kind of reinterpretation.9331if (CGF->getTarget().isBigEndian() &&9332V->getType()->getScalarSizeInBits() != DestType->getScalarSizeInBits()) {9333return Builder.CreateCall(9334CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vreinterpretq,9335{DestType, V->getType()}),9336V);9337} else {9338return Builder.CreateBitCast(V, DestType);9339}9340}93419342static llvm::Value *VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd) {9343// Make a shufflevector that extracts every other element of a vector (evens9344// or odds, as desired).9345SmallVector<int, 16> Indices;9346unsigned InputElements =9347cast<llvm::FixedVectorType>(V->getType())->getNumElements();9348for (unsigned i = 0; i < InputElements; i += 2)9349Indices.push_back(i + Odd);9350return Builder.CreateShuffleVector(V, Indices);9351}93529353static llvm::Value *VectorZip(CGBuilderTy &Builder, llvm::Value *V0,9354llvm::Value *V1) {9355// Make a shufflevector that interleaves two vectors element by element.9356assert(V0->getType() == V1->getType() && "Can't zip different vector types");9357SmallVector<int, 16> Indices;9358unsigned InputElements =9359cast<llvm::FixedVectorType>(V0->getType())->getNumElements();9360for (unsigned i = 0; i < InputElements; i++) {9361Indices.push_back(i);9362Indices.push_back(i + InputElements);9363}9364return Builder.CreateShuffleVector(V0, V1, Indices);9365}93669367template<unsigned HighBit, unsigned OtherBits>9368static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) {9369// MVE-specific helper function to make a vector splat of a constant such as9370// UINT_MAX or INT_MIN, in which all bits below the highest one are equal.9371llvm::Type *T = cast<llvm::VectorType>(VT)->getElementType();9372unsigned LaneBits = T->getPrimitiveSizeInBits();9373uint32_t Value = HighBit << (LaneBits - 1);9374if (OtherBits)9375Value |= (1UL << (LaneBits - 1)) - 1;9376llvm::Value *Lane = llvm::ConstantInt::get(T, Value);9377return ARMMVEVectorSplat(Builder, Lane);9378}93799380static llvm::Value *ARMMVEVectorElementReverse(CGBuilderTy &Builder,9381llvm::Value *V,9382unsigned ReverseWidth) {9383// MVE-specific helper function which reverses the elements of a9384// vector within every (ReverseWidth)-bit collection of lanes.9385SmallVector<int, 16> Indices;9386unsigned LaneSize = V->getType()->getScalarSizeInBits();9387unsigned Elements = 128 / LaneSize;9388unsigned Mask = ReverseWidth / LaneSize - 1;9389for (unsigned i = 0; i < Elements; i++)9390Indices.push_back(i ^ Mask);9391return Builder.CreateShuffleVector(V, Indices);9392}93939394Value *CodeGenFunction::EmitARMMVEBuiltinExpr(unsigned BuiltinID,9395const CallExpr *E,9396ReturnValueSlot ReturnValue,9397llvm::Triple::ArchType Arch) {9398enum class CustomCodeGen { VLD24, VST24 } CustomCodeGenType;9399Intrinsic::ID IRIntr;9400unsigned NumVectors;94019402// Code autogenerated by Tablegen will handle all the simple builtins.9403switch (BuiltinID) {9404#include "clang/Basic/arm_mve_builtin_cg.inc"94059406// If we didn't match an MVE builtin id at all, go back to the9407// main EmitARMBuiltinExpr.9408default:9409return nullptr;9410}94119412// Anything that breaks from that switch is an MVE builtin that9413// needs handwritten code to generate.94149415switch (CustomCodeGenType) {94169417case CustomCodeGen::VLD24: {9418llvm::SmallVector<Value *, 4> Ops;9419llvm::SmallVector<llvm::Type *, 4> Tys;94209421auto MvecCType = E->getType();9422auto MvecLType = ConvertType(MvecCType);9423assert(MvecLType->isStructTy() &&9424"Return type for vld[24]q should be a struct");9425assert(MvecLType->getStructNumElements() == 1 &&9426"Return-type struct for vld[24]q should have one element");9427auto MvecLTypeInner = MvecLType->getStructElementType(0);9428assert(MvecLTypeInner->isArrayTy() &&9429"Return-type struct for vld[24]q should contain an array");9430assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&9431"Array member of return-type struct vld[24]q has wrong length");9432auto VecLType = MvecLTypeInner->getArrayElementType();94339434Tys.push_back(VecLType);94359436auto Addr = E->getArg(0);9437Ops.push_back(EmitScalarExpr(Addr));9438Tys.push_back(ConvertType(Addr->getType()));94399440Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));9441Value *LoadResult = Builder.CreateCall(F, Ops);9442Value *MvecOut = PoisonValue::get(MvecLType);9443for (unsigned i = 0; i < NumVectors; ++i) {9444Value *Vec = Builder.CreateExtractValue(LoadResult, i);9445MvecOut = Builder.CreateInsertValue(MvecOut, Vec, {0, i});9446}94479448if (ReturnValue.isNull())9449return MvecOut;9450else9451return Builder.CreateStore(MvecOut, ReturnValue.getAddress());9452}94539454case CustomCodeGen::VST24: {9455llvm::SmallVector<Value *, 4> Ops;9456llvm::SmallVector<llvm::Type *, 4> Tys;94579458auto Addr = E->getArg(0);9459Ops.push_back(EmitScalarExpr(Addr));9460Tys.push_back(ConvertType(Addr->getType()));94619462auto MvecCType = E->getArg(1)->getType();9463auto MvecLType = ConvertType(MvecCType);9464assert(MvecLType->isStructTy() && "Data type for vst2q should be a struct");9465assert(MvecLType->getStructNumElements() == 1 &&9466"Data-type struct for vst2q should have one element");9467auto MvecLTypeInner = MvecLType->getStructElementType(0);9468assert(MvecLTypeInner->isArrayTy() &&9469"Data-type struct for vst2q should contain an array");9470assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&9471"Array member of return-type struct vld[24]q has wrong length");9472auto VecLType = MvecLTypeInner->getArrayElementType();94739474Tys.push_back(VecLType);94759476AggValueSlot MvecSlot = CreateAggTemp(MvecCType);9477EmitAggExpr(E->getArg(1), MvecSlot);9478auto Mvec = Builder.CreateLoad(MvecSlot.getAddress());9479for (unsigned i = 0; i < NumVectors; i++)9480Ops.push_back(Builder.CreateExtractValue(Mvec, {0, i}));94819482Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys));9483Value *ToReturn = nullptr;9484for (unsigned i = 0; i < NumVectors; i++) {9485Ops.push_back(llvm::ConstantInt::get(Int32Ty, i));9486ToReturn = Builder.CreateCall(F, Ops);9487Ops.pop_back();9488}9489return ToReturn;9490}9491}9492llvm_unreachable("unknown custom codegen type.");9493}94949495Value *CodeGenFunction::EmitARMCDEBuiltinExpr(unsigned BuiltinID,9496const CallExpr *E,9497ReturnValueSlot ReturnValue,9498llvm::Triple::ArchType Arch) {9499switch (BuiltinID) {9500default:9501return nullptr;9502#include "clang/Basic/arm_cde_builtin_cg.inc"9503}9504}95059506static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,9507const CallExpr *E,9508SmallVectorImpl<Value *> &Ops,9509llvm::Triple::ArchType Arch) {9510unsigned int Int = 0;9511const char *s = nullptr;95129513switch (BuiltinID) {9514default:9515return nullptr;9516case NEON::BI__builtin_neon_vtbl1_v:9517case NEON::BI__builtin_neon_vqtbl1_v:9518case NEON::BI__builtin_neon_vqtbl1q_v:9519case NEON::BI__builtin_neon_vtbl2_v:9520case NEON::BI__builtin_neon_vqtbl2_v:9521case NEON::BI__builtin_neon_vqtbl2q_v:9522case NEON::BI__builtin_neon_vtbl3_v:9523case NEON::BI__builtin_neon_vqtbl3_v:9524case NEON::BI__builtin_neon_vqtbl3q_v:9525case NEON::BI__builtin_neon_vtbl4_v:9526case NEON::BI__builtin_neon_vqtbl4_v:9527case NEON::BI__builtin_neon_vqtbl4q_v:9528break;9529case NEON::BI__builtin_neon_vtbx1_v:9530case NEON::BI__builtin_neon_vqtbx1_v:9531case NEON::BI__builtin_neon_vqtbx1q_v:9532case NEON::BI__builtin_neon_vtbx2_v:9533case NEON::BI__builtin_neon_vqtbx2_v:9534case NEON::BI__builtin_neon_vqtbx2q_v:9535case NEON::BI__builtin_neon_vtbx3_v:9536case NEON::BI__builtin_neon_vqtbx3_v:9537case NEON::BI__builtin_neon_vqtbx3q_v:9538case NEON::BI__builtin_neon_vtbx4_v:9539case NEON::BI__builtin_neon_vqtbx4_v:9540case NEON::BI__builtin_neon_vqtbx4q_v:9541break;9542}95439544assert(E->getNumArgs() >= 3);95459546// Get the last argument, which specifies the vector type.9547const Expr *Arg = E->getArg(E->getNumArgs() - 1);9548std::optional<llvm::APSInt> Result =9549Arg->getIntegerConstantExpr(CGF.getContext());9550if (!Result)9551return nullptr;95529553// Determine the type of this overloaded NEON intrinsic.9554NeonTypeFlags Type = Result->getZExtValue();9555llvm::FixedVectorType *Ty = GetNeonType(&CGF, Type);9556if (!Ty)9557return nullptr;95589559CodeGen::CGBuilderTy &Builder = CGF.Builder;95609561// AArch64 scalar builtins are not overloaded, they do not have an extra9562// argument that specifies the vector type, need to handle each case.9563switch (BuiltinID) {9564case NEON::BI__builtin_neon_vtbl1_v: {9565return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 1), nullptr, Ops[1],9566Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");9567}9568case NEON::BI__builtin_neon_vtbl2_v: {9569return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 2), nullptr, Ops[2],9570Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");9571}9572case NEON::BI__builtin_neon_vtbl3_v: {9573return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 3), nullptr, Ops[3],9574Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");9575}9576case NEON::BI__builtin_neon_vtbl4_v: {9577return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 4), nullptr, Ops[4],9578Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");9579}9580case NEON::BI__builtin_neon_vtbx1_v: {9581Value *TblRes =9582packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 1), nullptr, Ops[2], Ty,9583Intrinsic::aarch64_neon_tbl1, "vtbl1");95849585llvm::Constant *EightV = ConstantInt::get(Ty, 8);9586Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);9587CmpRes = Builder.CreateSExt(CmpRes, Ty);95889589Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);9590Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);9591return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");9592}9593case NEON::BI__builtin_neon_vtbx2_v: {9594return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 2), Ops[0], Ops[3],9595Ty, Intrinsic::aarch64_neon_tbx1, "vtbx1");9596}9597case NEON::BI__builtin_neon_vtbx3_v: {9598Value *TblRes =9599packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 3), nullptr, Ops[4], Ty,9600Intrinsic::aarch64_neon_tbl2, "vtbl2");96019602llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);9603Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],9604TwentyFourV);9605CmpRes = Builder.CreateSExt(CmpRes, Ty);96069607Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);9608Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);9609return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");9610}9611case NEON::BI__builtin_neon_vtbx4_v: {9612return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 4), Ops[0], Ops[5],9613Ty, Intrinsic::aarch64_neon_tbx2, "vtbx2");9614}9615case NEON::BI__builtin_neon_vqtbl1_v:9616case NEON::BI__builtin_neon_vqtbl1q_v:9617Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;9618case NEON::BI__builtin_neon_vqtbl2_v:9619case NEON::BI__builtin_neon_vqtbl2q_v: {9620Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;9621case NEON::BI__builtin_neon_vqtbl3_v:9622case NEON::BI__builtin_neon_vqtbl3q_v:9623Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;9624case NEON::BI__builtin_neon_vqtbl4_v:9625case NEON::BI__builtin_neon_vqtbl4q_v:9626Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;9627case NEON::BI__builtin_neon_vqtbx1_v:9628case NEON::BI__builtin_neon_vqtbx1q_v:9629Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;9630case NEON::BI__builtin_neon_vqtbx2_v:9631case NEON::BI__builtin_neon_vqtbx2q_v:9632Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;9633case NEON::BI__builtin_neon_vqtbx3_v:9634case NEON::BI__builtin_neon_vqtbx3q_v:9635Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;9636case NEON::BI__builtin_neon_vqtbx4_v:9637case NEON::BI__builtin_neon_vqtbx4q_v:9638Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;9639}9640}96419642if (!Int)9643return nullptr;96449645Function *F = CGF.CGM.getIntrinsic(Int, Ty);9646return CGF.EmitNeonCall(F, Ops, s);9647}96489649Value *CodeGenFunction::vectorWrapScalar16(Value *Op) {9650auto *VTy = llvm::FixedVectorType::get(Int16Ty, 4);9651Op = Builder.CreateBitCast(Op, Int16Ty);9652Value *V = PoisonValue::get(VTy);9653llvm::Constant *CI = ConstantInt::get(SizeTy, 0);9654Op = Builder.CreateInsertElement(V, Op, CI);9655return Op;9656}96579658/// SVEBuiltinMemEltTy - Returns the memory element type for this memory9659/// access builtin. Only required if it can't be inferred from the base pointer9660/// operand.9661llvm::Type *CodeGenFunction::SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags) {9662switch (TypeFlags.getMemEltType()) {9663case SVETypeFlags::MemEltTyDefault:9664return getEltType(TypeFlags);9665case SVETypeFlags::MemEltTyInt8:9666return Builder.getInt8Ty();9667case SVETypeFlags::MemEltTyInt16:9668return Builder.getInt16Ty();9669case SVETypeFlags::MemEltTyInt32:9670return Builder.getInt32Ty();9671case SVETypeFlags::MemEltTyInt64:9672return Builder.getInt64Ty();9673}9674llvm_unreachable("Unknown MemEltType");9675}96769677llvm::Type *CodeGenFunction::getEltType(const SVETypeFlags &TypeFlags) {9678switch (TypeFlags.getEltType()) {9679default:9680llvm_unreachable("Invalid SVETypeFlag!");96819682case SVETypeFlags::EltTyInt8:9683return Builder.getInt8Ty();9684case SVETypeFlags::EltTyInt16:9685return Builder.getInt16Ty();9686case SVETypeFlags::EltTyInt32:9687return Builder.getInt32Ty();9688case SVETypeFlags::EltTyInt64:9689return Builder.getInt64Ty();9690case SVETypeFlags::EltTyInt128:9691return Builder.getInt128Ty();96929693case SVETypeFlags::EltTyFloat16:9694return Builder.getHalfTy();9695case SVETypeFlags::EltTyFloat32:9696return Builder.getFloatTy();9697case SVETypeFlags::EltTyFloat64:9698return Builder.getDoubleTy();96999700case SVETypeFlags::EltTyBFloat16:9701return Builder.getBFloatTy();97029703case SVETypeFlags::EltTyBool8:9704case SVETypeFlags::EltTyBool16:9705case SVETypeFlags::EltTyBool32:9706case SVETypeFlags::EltTyBool64:9707return Builder.getInt1Ty();9708}9709}97109711// Return the llvm predicate vector type corresponding to the specified element9712// TypeFlags.9713llvm::ScalableVectorType *9714CodeGenFunction::getSVEPredType(const SVETypeFlags &TypeFlags) {9715switch (TypeFlags.getEltType()) {9716default: llvm_unreachable("Unhandled SVETypeFlag!");97179718case SVETypeFlags::EltTyInt8:9719return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);9720case SVETypeFlags::EltTyInt16:9721return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);9722case SVETypeFlags::EltTyInt32:9723return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);9724case SVETypeFlags::EltTyInt64:9725return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);97269727case SVETypeFlags::EltTyBFloat16:9728return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);9729case SVETypeFlags::EltTyFloat16:9730return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);9731case SVETypeFlags::EltTyFloat32:9732return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);9733case SVETypeFlags::EltTyFloat64:9734return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);97359736case SVETypeFlags::EltTyBool8:9737return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);9738case SVETypeFlags::EltTyBool16:9739return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);9740case SVETypeFlags::EltTyBool32:9741return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);9742case SVETypeFlags::EltTyBool64:9743return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);9744}9745}97469747// Return the llvm vector type corresponding to the specified element TypeFlags.9748llvm::ScalableVectorType *9749CodeGenFunction::getSVEType(const SVETypeFlags &TypeFlags) {9750switch (TypeFlags.getEltType()) {9751default:9752llvm_unreachable("Invalid SVETypeFlag!");97539754case SVETypeFlags::EltTyInt8:9755return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);9756case SVETypeFlags::EltTyInt16:9757return llvm::ScalableVectorType::get(Builder.getInt16Ty(), 8);9758case SVETypeFlags::EltTyInt32:9759return llvm::ScalableVectorType::get(Builder.getInt32Ty(), 4);9760case SVETypeFlags::EltTyInt64:9761return llvm::ScalableVectorType::get(Builder.getInt64Ty(), 2);97629763case SVETypeFlags::EltTyFloat16:9764return llvm::ScalableVectorType::get(Builder.getHalfTy(), 8);9765case SVETypeFlags::EltTyBFloat16:9766return llvm::ScalableVectorType::get(Builder.getBFloatTy(), 8);9767case SVETypeFlags::EltTyFloat32:9768return llvm::ScalableVectorType::get(Builder.getFloatTy(), 4);9769case SVETypeFlags::EltTyFloat64:9770return llvm::ScalableVectorType::get(Builder.getDoubleTy(), 2);97719772case SVETypeFlags::EltTyBool8:9773return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16);9774case SVETypeFlags::EltTyBool16:9775return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8);9776case SVETypeFlags::EltTyBool32:9777return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4);9778case SVETypeFlags::EltTyBool64:9779return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2);9780}9781}97829783llvm::Value *9784CodeGenFunction::EmitSVEAllTruePred(const SVETypeFlags &TypeFlags) {9785Function *Ptrue =9786CGM.getIntrinsic(Intrinsic::aarch64_sve_ptrue, getSVEPredType(TypeFlags));9787return Builder.CreateCall(Ptrue, {Builder.getInt32(/*SV_ALL*/ 31)});9788}97899790constexpr unsigned SVEBitsPerBlock = 128;97919792static llvm::ScalableVectorType *getSVEVectorForElementType(llvm::Type *EltTy) {9793unsigned NumElts = SVEBitsPerBlock / EltTy->getScalarSizeInBits();9794return llvm::ScalableVectorType::get(EltTy, NumElts);9795}97969797// Reinterpret the input predicate so that it can be used to correctly isolate9798// the elements of the specified datatype.9799Value *CodeGenFunction::EmitSVEPredicateCast(Value *Pred,9800llvm::ScalableVectorType *VTy) {98019802if (isa<TargetExtType>(Pred->getType()) &&9803cast<TargetExtType>(Pred->getType())->getName() == "aarch64.svcount")9804return Pred;98059806auto *RTy = llvm::VectorType::get(IntegerType::get(getLLVMContext(), 1), VTy);9807if (Pred->getType() == RTy)9808return Pred;98099810unsigned IntID;9811llvm::Type *IntrinsicTy;9812switch (VTy->getMinNumElements()) {9813default:9814llvm_unreachable("unsupported element count!");9815case 1:9816case 2:9817case 4:9818case 8:9819IntID = Intrinsic::aarch64_sve_convert_from_svbool;9820IntrinsicTy = RTy;9821break;9822case 16:9823IntID = Intrinsic::aarch64_sve_convert_to_svbool;9824IntrinsicTy = Pred->getType();9825break;9826}98279828Function *F = CGM.getIntrinsic(IntID, IntrinsicTy);9829Value *C = Builder.CreateCall(F, Pred);9830assert(C->getType() == RTy && "Unexpected return type!");9831return C;9832}98339834Value *CodeGenFunction::EmitSVEGatherLoad(const SVETypeFlags &TypeFlags,9835SmallVectorImpl<Value *> &Ops,9836unsigned IntID) {9837auto *ResultTy = getSVEType(TypeFlags);9838auto *OverloadedTy =9839llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), ResultTy);98409841Function *F = nullptr;9842if (Ops[1]->getType()->isVectorTy())9843// This is the "vector base, scalar offset" case. In order to uniquely9844// map this built-in to an LLVM IR intrinsic, we need both the return type9845// and the type of the vector base.9846F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[1]->getType()});9847else9848// This is the "scalar base, vector offset case". The type of the offset9849// is encoded in the name of the intrinsic. We only need to specify the9850// return type in order to uniquely map this built-in to an LLVM IR9851// intrinsic.9852F = CGM.getIntrinsic(IntID, OverloadedTy);98539854// At the ACLE level there's only one predicate type, svbool_t, which is9855// mapped to <n x 16 x i1>. However, this might be incompatible with the9856// actual type being loaded. For example, when loading doubles (i64) the9857// predicate should be <n x 2 x i1> instead. At the IR level the type of9858// the predicate and the data being loaded must match. Cast to the type9859// expected by the intrinsic. The intrinsic itself should be defined in9860// a way than enforces relations between parameter types.9861Ops[0] = EmitSVEPredicateCast(9862Ops[0], cast<llvm::ScalableVectorType>(F->getArg(0)->getType()));98639864// Pass 0 when the offset is missing. This can only be applied when using9865// the "vector base" addressing mode for which ACLE allows no offset. The9866// corresponding LLVM IR always requires an offset.9867if (Ops.size() == 2) {9868assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");9869Ops.push_back(ConstantInt::get(Int64Ty, 0));9870}98719872// For "vector base, scalar index" scale the index so that it becomes a9873// scalar offset.9874if (!TypeFlags.isByteIndexed() && Ops[1]->getType()->isVectorTy()) {9875unsigned BytesPerElt =9876OverloadedTy->getElementType()->getScalarSizeInBits() / 8;9877Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));9878}98799880Value *Call = Builder.CreateCall(F, Ops);98819882// The following sext/zext is only needed when ResultTy != OverloadedTy. In9883// other cases it's folded into a nop.9884return TypeFlags.isZExtReturn() ? Builder.CreateZExt(Call, ResultTy)9885: Builder.CreateSExt(Call, ResultTy);9886}98879888Value *CodeGenFunction::EmitSVEScatterStore(const SVETypeFlags &TypeFlags,9889SmallVectorImpl<Value *> &Ops,9890unsigned IntID) {9891auto *SrcDataTy = getSVEType(TypeFlags);9892auto *OverloadedTy =9893llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), SrcDataTy);98949895// In ACLE the source data is passed in the last argument, whereas in LLVM IR9896// it's the first argument. Move it accordingly.9897Ops.insert(Ops.begin(), Ops.pop_back_val());98989899Function *F = nullptr;9900if (Ops[2]->getType()->isVectorTy())9901// This is the "vector base, scalar offset" case. In order to uniquely9902// map this built-in to an LLVM IR intrinsic, we need both the return type9903// and the type of the vector base.9904F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[2]->getType()});9905else9906// This is the "scalar base, vector offset case". The type of the offset9907// is encoded in the name of the intrinsic. We only need to specify the9908// return type in order to uniquely map this built-in to an LLVM IR9909// intrinsic.9910F = CGM.getIntrinsic(IntID, OverloadedTy);99119912// Pass 0 when the offset is missing. This can only be applied when using9913// the "vector base" addressing mode for which ACLE allows no offset. The9914// corresponding LLVM IR always requires an offset.9915if (Ops.size() == 3) {9916assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset");9917Ops.push_back(ConstantInt::get(Int64Ty, 0));9918}99199920// Truncation is needed when SrcDataTy != OverloadedTy. In other cases it's9921// folded into a nop.9922Ops[0] = Builder.CreateTrunc(Ops[0], OverloadedTy);99239924// At the ACLE level there's only one predicate type, svbool_t, which is9925// mapped to <n x 16 x i1>. However, this might be incompatible with the9926// actual type being stored. For example, when storing doubles (i64) the9927// predicated should be <n x 2 x i1> instead. At the IR level the type of9928// the predicate and the data being stored must match. Cast to the type9929// expected by the intrinsic. The intrinsic itself should be defined in9930// a way that enforces relations between parameter types.9931Ops[1] = EmitSVEPredicateCast(9932Ops[1], cast<llvm::ScalableVectorType>(F->getArg(1)->getType()));99339934// For "vector base, scalar index" scale the index so that it becomes a9935// scalar offset.9936if (!TypeFlags.isByteIndexed() && Ops[2]->getType()->isVectorTy()) {9937unsigned BytesPerElt =9938OverloadedTy->getElementType()->getScalarSizeInBits() / 8;9939Ops[3] = Builder.CreateShl(Ops[3], Log2_32(BytesPerElt));9940}99419942return Builder.CreateCall(F, Ops);9943}99449945Value *CodeGenFunction::EmitSVEGatherPrefetch(const SVETypeFlags &TypeFlags,9946SmallVectorImpl<Value *> &Ops,9947unsigned IntID) {9948// The gather prefetches are overloaded on the vector input - this can either9949// be the vector of base addresses or vector of offsets.9950auto *OverloadedTy = dyn_cast<llvm::ScalableVectorType>(Ops[1]->getType());9951if (!OverloadedTy)9952OverloadedTy = cast<llvm::ScalableVectorType>(Ops[2]->getType());99539954// Cast the predicate from svbool_t to the right number of elements.9955Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy);99569957// vector + imm addressing modes9958if (Ops[1]->getType()->isVectorTy()) {9959if (Ops.size() == 3) {9960// Pass 0 for 'vector+imm' when the index is omitted.9961Ops.push_back(ConstantInt::get(Int64Ty, 0));99629963// The sv_prfop is the last operand in the builtin and IR intrinsic.9964std::swap(Ops[2], Ops[3]);9965} else {9966// Index needs to be passed as scaled offset.9967llvm::Type *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);9968unsigned BytesPerElt = MemEltTy->getPrimitiveSizeInBits() / 8;9969if (BytesPerElt > 1)9970Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt));9971}9972}99739974Function *F = CGM.getIntrinsic(IntID, OverloadedTy);9975return Builder.CreateCall(F, Ops);9976}99779978Value *CodeGenFunction::EmitSVEStructLoad(const SVETypeFlags &TypeFlags,9979SmallVectorImpl<Value*> &Ops,9980unsigned IntID) {9981llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);99829983unsigned N;9984switch (IntID) {9985case Intrinsic::aarch64_sve_ld2_sret:9986case Intrinsic::aarch64_sve_ld1_pn_x2:9987case Intrinsic::aarch64_sve_ldnt1_pn_x2:9988case Intrinsic::aarch64_sve_ld2q_sret:9989N = 2;9990break;9991case Intrinsic::aarch64_sve_ld3_sret:9992case Intrinsic::aarch64_sve_ld3q_sret:9993N = 3;9994break;9995case Intrinsic::aarch64_sve_ld4_sret:9996case Intrinsic::aarch64_sve_ld1_pn_x4:9997case Intrinsic::aarch64_sve_ldnt1_pn_x4:9998case Intrinsic::aarch64_sve_ld4q_sret:9999N = 4;10000break;10001default:10002llvm_unreachable("unknown intrinsic!");10003}10004auto RetTy = llvm::VectorType::get(VTy->getElementType(),10005VTy->getElementCount() * N);1000610007Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);10008Value *BasePtr = Ops[1];1000910010// Does the load have an offset?10011if (Ops.size() > 2)10012BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);1001310014Function *F = CGM.getIntrinsic(IntID, {VTy});10015Value *Call = Builder.CreateCall(F, {Predicate, BasePtr});10016unsigned MinElts = VTy->getMinNumElements();10017Value *Ret = llvm::PoisonValue::get(RetTy);10018for (unsigned I = 0; I < N; I++) {10019Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts);10020Value *SRet = Builder.CreateExtractValue(Call, I);10021Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, Idx);10022}10023return Ret;10024}1002510026Value *CodeGenFunction::EmitSVEStructStore(const SVETypeFlags &TypeFlags,10027SmallVectorImpl<Value*> &Ops,10028unsigned IntID) {10029llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);1003010031unsigned N;10032switch (IntID) {10033case Intrinsic::aarch64_sve_st2:10034case Intrinsic::aarch64_sve_st1_pn_x2:10035case Intrinsic::aarch64_sve_stnt1_pn_x2:10036case Intrinsic::aarch64_sve_st2q:10037N = 2;10038break;10039case Intrinsic::aarch64_sve_st3:10040case Intrinsic::aarch64_sve_st3q:10041N = 3;10042break;10043case Intrinsic::aarch64_sve_st4:10044case Intrinsic::aarch64_sve_st1_pn_x4:10045case Intrinsic::aarch64_sve_stnt1_pn_x4:10046case Intrinsic::aarch64_sve_st4q:10047N = 4;10048break;10049default:10050llvm_unreachable("unknown intrinsic!");10051}1005210053Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);10054Value *BasePtr = Ops[1];1005510056// Does the store have an offset?10057if (Ops.size() > (2 + N))10058BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]);1005910060// The llvm.aarch64.sve.st2/3/4 intrinsics take legal part vectors, so we10061// need to break up the tuple vector.10062SmallVector<llvm::Value*, 5> Operands;10063for (unsigned I = Ops.size() - N; I < Ops.size(); ++I)10064Operands.push_back(Ops[I]);10065Operands.append({Predicate, BasePtr});10066Function *F = CGM.getIntrinsic(IntID, { VTy });1006710068return Builder.CreateCall(F, Operands);10069}1007010071// SVE2's svpmullb and svpmullt builtins are similar to the svpmullb_pair and10072// svpmullt_pair intrinsics, with the exception that their results are bitcast10073// to a wider type.10074Value *CodeGenFunction::EmitSVEPMull(const SVETypeFlags &TypeFlags,10075SmallVectorImpl<Value *> &Ops,10076unsigned BuiltinID) {10077// Splat scalar operand to vector (intrinsics with _n infix)10078if (TypeFlags.hasSplatOperand()) {10079unsigned OpNo = TypeFlags.getSplatOperand();10080Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);10081}1008210083// The pair-wise function has a narrower overloaded type.10084Function *F = CGM.getIntrinsic(BuiltinID, Ops[0]->getType());10085Value *Call = Builder.CreateCall(F, {Ops[0], Ops[1]});1008610087// Now bitcast to the wider result type.10088llvm::ScalableVectorType *Ty = getSVEType(TypeFlags);10089return EmitSVEReinterpret(Call, Ty);10090}1009110092Value *CodeGenFunction::EmitSVEMovl(const SVETypeFlags &TypeFlags,10093ArrayRef<Value *> Ops, unsigned BuiltinID) {10094llvm::Type *OverloadedTy = getSVEType(TypeFlags);10095Function *F = CGM.getIntrinsic(BuiltinID, OverloadedTy);10096return Builder.CreateCall(F, {Ops[0], Builder.getInt32(0)});10097}1009810099Value *CodeGenFunction::EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags,10100SmallVectorImpl<Value *> &Ops,10101unsigned BuiltinID) {10102auto *MemEltTy = SVEBuiltinMemEltTy(TypeFlags);10103auto *VectorTy = getSVEVectorForElementType(MemEltTy);10104auto *MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);1010510106Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);10107Value *BasePtr = Ops[1];1010810109// Implement the index operand if not omitted.10110if (Ops.size() > 3)10111BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);1011210113Value *PrfOp = Ops.back();1011410115Function *F = CGM.getIntrinsic(BuiltinID, Predicate->getType());10116return Builder.CreateCall(F, {Predicate, BasePtr, PrfOp});10117}1011810119Value *CodeGenFunction::EmitSVEMaskedLoad(const CallExpr *E,10120llvm::Type *ReturnTy,10121SmallVectorImpl<Value *> &Ops,10122unsigned IntrinsicID,10123bool IsZExtReturn) {10124QualType LangPTy = E->getArg(1)->getType();10125llvm::Type *MemEltTy = CGM.getTypes().ConvertType(10126LangPTy->castAs<PointerType>()->getPointeeType());1012710128// The vector type that is returned may be different from the10129// eventual type loaded from memory.10130auto VectorTy = cast<llvm::ScalableVectorType>(ReturnTy);10131llvm::ScalableVectorType *MemoryTy = nullptr;10132llvm::ScalableVectorType *PredTy = nullptr;10133bool IsQuadLoad = false;10134switch (IntrinsicID) {10135case Intrinsic::aarch64_sve_ld1uwq:10136case Intrinsic::aarch64_sve_ld1udq:10137MemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);10138PredTy = llvm::ScalableVectorType::get(10139llvm::Type::getInt1Ty(getLLVMContext()), 1);10140IsQuadLoad = true;10141break;10142default:10143MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);10144PredTy = MemoryTy;10145break;10146}1014710148Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);10149Value *BasePtr = Ops[1];1015010151// Does the load have an offset?10152if (Ops.size() > 2)10153BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]);1015410155Function *F = CGM.getIntrinsic(IntrinsicID, IsQuadLoad ? VectorTy : MemoryTy);10156auto *Load =10157cast<llvm::Instruction>(Builder.CreateCall(F, {Predicate, BasePtr}));10158auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());10159CGM.DecorateInstructionWithTBAA(Load, TBAAInfo);1016010161if (IsQuadLoad)10162return Load;1016310164return IsZExtReturn ? Builder.CreateZExt(Load, VectorTy)10165: Builder.CreateSExt(Load, VectorTy);10166}1016710168Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E,10169SmallVectorImpl<Value *> &Ops,10170unsigned IntrinsicID) {10171QualType LangPTy = E->getArg(1)->getType();10172llvm::Type *MemEltTy = CGM.getTypes().ConvertType(10173LangPTy->castAs<PointerType>()->getPointeeType());1017410175// The vector type that is stored may be different from the10176// eventual type stored to memory.10177auto VectorTy = cast<llvm::ScalableVectorType>(Ops.back()->getType());10178auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy);1017910180auto PredTy = MemoryTy;10181auto AddrMemoryTy = MemoryTy;10182bool IsQuadStore = false;1018310184switch (IntrinsicID) {10185case Intrinsic::aarch64_sve_st1wq:10186case Intrinsic::aarch64_sve_st1dq:10187AddrMemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1);10188PredTy =10189llvm::ScalableVectorType::get(IntegerType::get(getLLVMContext(), 1), 1);10190IsQuadStore = true;10191break;10192default:10193break;10194}10195Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy);10196Value *BasePtr = Ops[1];1019710198// Does the store have an offset?10199if (Ops.size() == 4)10200BasePtr = Builder.CreateGEP(AddrMemoryTy, BasePtr, Ops[2]);1020110202// Last value is always the data10203Value *Val =10204IsQuadStore ? Ops.back() : Builder.CreateTrunc(Ops.back(), MemoryTy);1020510206Function *F =10207CGM.getIntrinsic(IntrinsicID, IsQuadStore ? VectorTy : MemoryTy);10208auto *Store =10209cast<llvm::Instruction>(Builder.CreateCall(F, {Val, Predicate, BasePtr}));10210auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType());10211CGM.DecorateInstructionWithTBAA(Store, TBAAInfo);10212return Store;10213}1021410215Value *CodeGenFunction::EmitSMELd1St1(const SVETypeFlags &TypeFlags,10216SmallVectorImpl<Value *> &Ops,10217unsigned IntID) {10218Ops[2] = EmitSVEPredicateCast(10219Ops[2], getSVEVectorForElementType(SVEBuiltinMemEltTy(TypeFlags)));1022010221SmallVector<Value *> NewOps;10222NewOps.push_back(Ops[2]);1022310224llvm::Value *BasePtr = Ops[3];1022510226// If the intrinsic contains the vnum parameter, multiply it with the vector10227// size in bytes.10228if (Ops.size() == 5) {10229Function *StreamingVectorLength =10230CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb);10231llvm::Value *StreamingVectorLengthCall =10232Builder.CreateCall(StreamingVectorLength);10233llvm::Value *Mulvl =10234Builder.CreateMul(StreamingVectorLengthCall, Ops[4], "mulvl");10235// The type of the ptr parameter is void *, so use Int8Ty here.10236BasePtr = Builder.CreateGEP(Int8Ty, Ops[3], Mulvl);10237}10238NewOps.push_back(BasePtr);10239NewOps.push_back(Ops[0]);10240NewOps.push_back(Ops[1]);10241Function *F = CGM.getIntrinsic(IntID);10242return Builder.CreateCall(F, NewOps);10243}1024410245Value *CodeGenFunction::EmitSMEReadWrite(const SVETypeFlags &TypeFlags,10246SmallVectorImpl<Value *> &Ops,10247unsigned IntID) {10248auto *VecTy = getSVEType(TypeFlags);10249Function *F = CGM.getIntrinsic(IntID, VecTy);10250if (TypeFlags.isReadZA())10251Ops[1] = EmitSVEPredicateCast(Ops[1], VecTy);10252else if (TypeFlags.isWriteZA())10253Ops[2] = EmitSVEPredicateCast(Ops[2], VecTy);10254return Builder.CreateCall(F, Ops);10255}1025610257Value *CodeGenFunction::EmitSMEZero(const SVETypeFlags &TypeFlags,10258SmallVectorImpl<Value *> &Ops,10259unsigned IntID) {10260// svzero_za() intrinsic zeros the entire za tile and has no paramters.10261if (Ops.size() == 0)10262Ops.push_back(llvm::ConstantInt::get(Int32Ty, 255));10263Function *F = CGM.getIntrinsic(IntID, {});10264return Builder.CreateCall(F, Ops);10265}1026610267Value *CodeGenFunction::EmitSMELdrStr(const SVETypeFlags &TypeFlags,10268SmallVectorImpl<Value *> &Ops,10269unsigned IntID) {10270if (Ops.size() == 2)10271Ops.push_back(Builder.getInt32(0));10272else10273Ops[2] = Builder.CreateIntCast(Ops[2], Int32Ty, true);10274Function *F = CGM.getIntrinsic(IntID, {});10275return Builder.CreateCall(F, Ops);10276}1027710278// Limit the usage of scalable llvm IR generated by the ACLE by using the10279// sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat.10280Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) {10281return Builder.CreateVectorSplat(10282cast<llvm::VectorType>(Ty)->getElementCount(), Scalar);10283}1028410285Value *CodeGenFunction::EmitSVEDupX(Value* Scalar) {10286return EmitSVEDupX(Scalar, getSVEVectorForElementType(Scalar->getType()));10287}1028810289Value *CodeGenFunction::EmitSVEReinterpret(Value *Val, llvm::Type *Ty) {10290// FIXME: For big endian this needs an additional REV, or needs a separate10291// intrinsic that is code-generated as a no-op, because the LLVM bitcast10292// instruction is defined as 'bitwise' equivalent from memory point of10293// view (when storing/reloading), whereas the svreinterpret builtin10294// implements bitwise equivalent cast from register point of view.10295// LLVM CodeGen for a bitcast must add an explicit REV for big-endian.10296return Builder.CreateBitCast(Val, Ty);10297}1029810299static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty,10300SmallVectorImpl<Value *> &Ops) {10301auto *SplatZero = Constant::getNullValue(Ty);10302Ops.insert(Ops.begin(), SplatZero);10303}1030410305static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty,10306SmallVectorImpl<Value *> &Ops) {10307auto *SplatUndef = UndefValue::get(Ty);10308Ops.insert(Ops.begin(), SplatUndef);10309}1031010311SmallVector<llvm::Type *, 2>10312CodeGenFunction::getSVEOverloadTypes(const SVETypeFlags &TypeFlags,10313llvm::Type *ResultType,10314ArrayRef<Value *> Ops) {10315if (TypeFlags.isOverloadNone())10316return {};1031710318llvm::Type *DefaultType = getSVEType(TypeFlags);1031910320if (TypeFlags.isOverloadWhileOrMultiVecCvt())10321return {DefaultType, Ops[1]->getType()};1032210323if (TypeFlags.isOverloadWhileRW())10324return {getSVEPredType(TypeFlags), Ops[0]->getType()};1032510326if (TypeFlags.isOverloadCvt())10327return {Ops[0]->getType(), Ops.back()->getType()};1032810329if (TypeFlags.isReductionQV() && !ResultType->isScalableTy() &&10330ResultType->isVectorTy())10331return {ResultType, Ops[1]->getType()};1033210333assert(TypeFlags.isOverloadDefault() && "Unexpected value for overloads");10334return {DefaultType};10335}1033610337Value *CodeGenFunction::EmitSVETupleSetOrGet(const SVETypeFlags &TypeFlags,10338llvm::Type *Ty,10339ArrayRef<Value *> Ops) {10340assert((TypeFlags.isTupleSet() || TypeFlags.isTupleGet()) &&10341"Expects TypleFlags.isTupleSet() or TypeFlags.isTupleGet()");1034210343unsigned I = cast<ConstantInt>(Ops[1])->getSExtValue();10344auto *SingleVecTy = dyn_cast<llvm::ScalableVectorType>(10345TypeFlags.isTupleSet() ? Ops[2]->getType() : Ty);1034610347if (!SingleVecTy)10348return nullptr;1034910350Value *Idx = ConstantInt::get(CGM.Int64Ty,10351I * SingleVecTy->getMinNumElements());1035210353if (TypeFlags.isTupleSet())10354return Builder.CreateInsertVector(Ty, Ops[0], Ops[2], Idx);10355return Builder.CreateExtractVector(Ty, Ops[0], Idx);10356}1035710358Value *CodeGenFunction::EmitSVETupleCreate(const SVETypeFlags &TypeFlags,10359llvm::Type *Ty,10360ArrayRef<Value *> Ops) {10361assert(TypeFlags.isTupleCreate() && "Expects TypleFlag isTupleCreate");1036210363auto *SrcTy = dyn_cast<llvm::ScalableVectorType>(Ops[0]->getType());1036410365if (!SrcTy)10366return nullptr;1036710368unsigned MinElts = SrcTy->getMinNumElements();10369Value *Call = llvm::PoisonValue::get(Ty);10370for (unsigned I = 0; I < Ops.size(); I++) {10371Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts);10372Call = Builder.CreateInsertVector(Ty, Call, Ops[I], Idx);10373}1037410375return Call;10376}1037710378Value *CodeGenFunction::FormSVEBuiltinResult(Value *Call) {10379// Multi-vector results should be broken up into a single (wide) result10380// vector.10381auto *StructTy = dyn_cast<StructType>(Call->getType());10382if (!StructTy)10383return Call;1038410385auto *VTy = dyn_cast<ScalableVectorType>(StructTy->getTypeAtIndex(0U));10386if (!VTy)10387return Call;10388unsigned N = StructTy->getNumElements();1038910390// We may need to emit a cast to a svbool_t10391bool IsPredTy = VTy->getElementType()->isIntegerTy(1);10392unsigned MinElts = IsPredTy ? 16 : VTy->getMinNumElements();1039310394ScalableVectorType *WideVTy =10395ScalableVectorType::get(VTy->getElementType(), MinElts * N);10396Value *Ret = llvm::PoisonValue::get(WideVTy);10397for (unsigned I = 0; I < N; ++I) {10398Value *SRet = Builder.CreateExtractValue(Call, I);10399assert(SRet->getType() == VTy && "Unexpected type for result value");10400Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts);1040110402if (IsPredTy)10403SRet = EmitSVEPredicateCast(10404SRet, ScalableVectorType::get(Builder.getInt1Ty(), 16));1040510406Ret = Builder.CreateInsertVector(WideVTy, Ret, SRet, Idx);10407}10408Call = Ret;1040910410return Call;10411}1041210413void CodeGenFunction::GetAArch64SVEProcessedOperands(10414unsigned BuiltinID, const CallExpr *E, SmallVectorImpl<Value *> &Ops,10415SVETypeFlags TypeFlags) {10416// Find out if any arguments are required to be integer constant expressions.10417unsigned ICEArguments = 0;10418ASTContext::GetBuiltinTypeError Error;10419getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);10420assert(Error == ASTContext::GE_None && "Should not codegen an error");1042110422// Tuple set/get only requires one insert/extract vector, which is10423// created by EmitSVETupleSetOrGet.10424bool IsTupleGetOrSet = TypeFlags.isTupleSet() || TypeFlags.isTupleGet();1042510426for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {10427bool IsICE = ICEArguments & (1 << i);10428Value *Arg = EmitScalarExpr(E->getArg(i));1042910430if (IsICE) {10431// If this is required to be a constant, constant fold it so that we know10432// that the generated intrinsic gets a ConstantInt.10433std::optional<llvm::APSInt> Result =10434E->getArg(i)->getIntegerConstantExpr(getContext());10435assert(Result && "Expected argument to be a constant");1043610437// Immediates for SVE llvm intrinsics are always 32bit. We can safely10438// truncate because the immediate has been range checked and no valid10439// immediate requires more than a handful of bits.10440*Result = Result->extOrTrunc(32);10441Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result));10442continue;10443}1044410445if (IsTupleGetOrSet || !isa<ScalableVectorType>(Arg->getType())) {10446Ops.push_back(Arg);10447continue;10448}1044910450auto *VTy = cast<ScalableVectorType>(Arg->getType());10451unsigned MinElts = VTy->getMinNumElements();10452bool IsPred = VTy->getElementType()->isIntegerTy(1);10453unsigned N = (MinElts * VTy->getScalarSizeInBits()) / (IsPred ? 16 : 128);1045410455if (N == 1) {10456Ops.push_back(Arg);10457continue;10458}1045910460for (unsigned I = 0; I < N; ++I) {10461Value *Idx = ConstantInt::get(CGM.Int64Ty, (I * MinElts) / N);10462auto *NewVTy =10463ScalableVectorType::get(VTy->getElementType(), MinElts / N);10464Ops.push_back(Builder.CreateExtractVector(NewVTy, Arg, Idx));10465}10466}10467}1046810469Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,10470const CallExpr *E) {10471llvm::Type *Ty = ConvertType(E->getType());10472if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 &&10473BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64_x4) {10474Value *Val = EmitScalarExpr(E->getArg(0));10475return EmitSVEReinterpret(Val, Ty);10476}1047710478auto *Builtin = findARMVectorIntrinsicInMap(AArch64SVEIntrinsicMap, BuiltinID,10479AArch64SVEIntrinsicsProvenSorted);1048010481llvm::SmallVector<Value *, 4> Ops;10482SVETypeFlags TypeFlags(Builtin->TypeModifier);10483GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);1048410485if (TypeFlags.isLoad())10486return EmitSVEMaskedLoad(E, Ty, Ops, Builtin->LLVMIntrinsic,10487TypeFlags.isZExtReturn());10488else if (TypeFlags.isStore())10489return EmitSVEMaskedStore(E, Ops, Builtin->LLVMIntrinsic);10490else if (TypeFlags.isGatherLoad())10491return EmitSVEGatherLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);10492else if (TypeFlags.isScatterStore())10493return EmitSVEScatterStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);10494else if (TypeFlags.isPrefetch())10495return EmitSVEPrefetchLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);10496else if (TypeFlags.isGatherPrefetch())10497return EmitSVEGatherPrefetch(TypeFlags, Ops, Builtin->LLVMIntrinsic);10498else if (TypeFlags.isStructLoad())10499return EmitSVEStructLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);10500else if (TypeFlags.isStructStore())10501return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);10502else if (TypeFlags.isTupleSet() || TypeFlags.isTupleGet())10503return EmitSVETupleSetOrGet(TypeFlags, Ty, Ops);10504else if (TypeFlags.isTupleCreate())10505return EmitSVETupleCreate(TypeFlags, Ty, Ops);10506else if (TypeFlags.isUndef())10507return UndefValue::get(Ty);10508else if (Builtin->LLVMIntrinsic != 0) {10509if (TypeFlags.getMergeType() == SVETypeFlags::MergeZeroExp)10510InsertExplicitZeroOperand(Builder, Ty, Ops);1051110512if (TypeFlags.getMergeType() == SVETypeFlags::MergeAnyExp)10513InsertExplicitUndefOperand(Builder, Ty, Ops);1051410515// Some ACLE builtins leave out the argument to specify the predicate10516// pattern, which is expected to be expanded to an SV_ALL pattern.10517if (TypeFlags.isAppendSVALL())10518Ops.push_back(Builder.getInt32(/*SV_ALL*/ 31));10519if (TypeFlags.isInsertOp1SVALL())10520Ops.insert(&Ops[1], Builder.getInt32(/*SV_ALL*/ 31));1052110522// Predicates must match the main datatype.10523for (unsigned i = 0, e = Ops.size(); i != e; ++i)10524if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType()))10525if (PredTy->getElementType()->isIntegerTy(1))10526Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags));1052710528// Splat scalar operand to vector (intrinsics with _n infix)10529if (TypeFlags.hasSplatOperand()) {10530unsigned OpNo = TypeFlags.getSplatOperand();10531Ops[OpNo] = EmitSVEDupX(Ops[OpNo]);10532}1053310534if (TypeFlags.isReverseCompare())10535std::swap(Ops[1], Ops[2]);10536else if (TypeFlags.isReverseUSDOT())10537std::swap(Ops[1], Ops[2]);10538else if (TypeFlags.isReverseMergeAnyBinOp() &&10539TypeFlags.getMergeType() == SVETypeFlags::MergeAny)10540std::swap(Ops[1], Ops[2]);10541else if (TypeFlags.isReverseMergeAnyAccOp() &&10542TypeFlags.getMergeType() == SVETypeFlags::MergeAny)10543std::swap(Ops[1], Ops[3]);1054410545// Predicated intrinsics with _z suffix need a select w/ zeroinitializer.10546if (TypeFlags.getMergeType() == SVETypeFlags::MergeZero) {10547llvm::Type *OpndTy = Ops[1]->getType();10548auto *SplatZero = Constant::getNullValue(OpndTy);10549Ops[1] = Builder.CreateSelect(Ops[0], Ops[1], SplatZero);10550}1055110552Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic,10553getSVEOverloadTypes(TypeFlags, Ty, Ops));10554Value *Call = Builder.CreateCall(F, Ops);1055510556// Predicate results must be converted to svbool_t.10557if (auto PredTy = dyn_cast<llvm::VectorType>(Call->getType()))10558if (PredTy->getScalarType()->isIntegerTy(1))10559Call = EmitSVEPredicateCast(Call, cast<llvm::ScalableVectorType>(Ty));1056010561return FormSVEBuiltinResult(Call);10562}1056310564switch (BuiltinID) {10565default:10566return nullptr;1056710568case SVE::BI__builtin_sve_svreinterpret_b: {10569auto SVCountTy =10570llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");10571Function *CastFromSVCountF =10572CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);10573return Builder.CreateCall(CastFromSVCountF, Ops[0]);10574}10575case SVE::BI__builtin_sve_svreinterpret_c: {10576auto SVCountTy =10577llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");10578Function *CastToSVCountF =10579CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);10580return Builder.CreateCall(CastToSVCountF, Ops[0]);10581}1058210583case SVE::BI__builtin_sve_svpsel_lane_b8:10584case SVE::BI__builtin_sve_svpsel_lane_b16:10585case SVE::BI__builtin_sve_svpsel_lane_b32:10586case SVE::BI__builtin_sve_svpsel_lane_b64:10587case SVE::BI__builtin_sve_svpsel_lane_c8:10588case SVE::BI__builtin_sve_svpsel_lane_c16:10589case SVE::BI__builtin_sve_svpsel_lane_c32:10590case SVE::BI__builtin_sve_svpsel_lane_c64: {10591bool IsSVCount = isa<TargetExtType>(Ops[0]->getType());10592assert(((!IsSVCount || cast<TargetExtType>(Ops[0]->getType())->getName() ==10593"aarch64.svcount")) &&10594"Unexpected TargetExtType");10595auto SVCountTy =10596llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount");10597Function *CastFromSVCountF =10598CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy);10599Function *CastToSVCountF =10600CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy);1060110602auto OverloadedTy = getSVEType(SVETypeFlags(Builtin->TypeModifier));10603Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_psel, OverloadedTy);10604llvm::Value *Ops0 =10605IsSVCount ? Builder.CreateCall(CastFromSVCountF, Ops[0]) : Ops[0];10606llvm::Value *Ops1 = EmitSVEPredicateCast(Ops[1], OverloadedTy);10607llvm::Value *PSel = Builder.CreateCall(F, {Ops0, Ops1, Ops[2]});10608return IsSVCount ? Builder.CreateCall(CastToSVCountF, PSel) : PSel;10609}10610case SVE::BI__builtin_sve_svmov_b_z: {10611// svmov_b_z(pg, op) <=> svand_b_z(pg, op, op)10612SVETypeFlags TypeFlags(Builtin->TypeModifier);10613llvm::Type* OverloadedTy = getSVEType(TypeFlags);10614Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_and_z, OverloadedTy);10615return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[1]});10616}1061710618case SVE::BI__builtin_sve_svnot_b_z: {10619// svnot_b_z(pg, op) <=> sveor_b_z(pg, op, pg)10620SVETypeFlags TypeFlags(Builtin->TypeModifier);10621llvm::Type* OverloadedTy = getSVEType(TypeFlags);10622Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_eor_z, OverloadedTy);10623return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[0]});10624}1062510626case SVE::BI__builtin_sve_svmovlb_u16:10627case SVE::BI__builtin_sve_svmovlb_u32:10628case SVE::BI__builtin_sve_svmovlb_u64:10629return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllb);1063010631case SVE::BI__builtin_sve_svmovlb_s16:10632case SVE::BI__builtin_sve_svmovlb_s32:10633case SVE::BI__builtin_sve_svmovlb_s64:10634return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllb);1063510636case SVE::BI__builtin_sve_svmovlt_u16:10637case SVE::BI__builtin_sve_svmovlt_u32:10638case SVE::BI__builtin_sve_svmovlt_u64:10639return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllt);1064010641case SVE::BI__builtin_sve_svmovlt_s16:10642case SVE::BI__builtin_sve_svmovlt_s32:10643case SVE::BI__builtin_sve_svmovlt_s64:10644return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllt);1064510646case SVE::BI__builtin_sve_svpmullt_u16:10647case SVE::BI__builtin_sve_svpmullt_u64:10648case SVE::BI__builtin_sve_svpmullt_n_u16:10649case SVE::BI__builtin_sve_svpmullt_n_u64:10650return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullt_pair);1065110652case SVE::BI__builtin_sve_svpmullb_u16:10653case SVE::BI__builtin_sve_svpmullb_u64:10654case SVE::BI__builtin_sve_svpmullb_n_u16:10655case SVE::BI__builtin_sve_svpmullb_n_u64:10656return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullb_pair);1065710658case SVE::BI__builtin_sve_svdup_n_b8:10659case SVE::BI__builtin_sve_svdup_n_b16:10660case SVE::BI__builtin_sve_svdup_n_b32:10661case SVE::BI__builtin_sve_svdup_n_b64: {10662Value *CmpNE =10663Builder.CreateICmpNE(Ops[0], Constant::getNullValue(Ops[0]->getType()));10664llvm::ScalableVectorType *OverloadedTy = getSVEType(TypeFlags);10665Value *Dup = EmitSVEDupX(CmpNE, OverloadedTy);10666return EmitSVEPredicateCast(Dup, cast<llvm::ScalableVectorType>(Ty));10667}1066810669case SVE::BI__builtin_sve_svdupq_n_b8:10670case SVE::BI__builtin_sve_svdupq_n_b16:10671case SVE::BI__builtin_sve_svdupq_n_b32:10672case SVE::BI__builtin_sve_svdupq_n_b64:10673case SVE::BI__builtin_sve_svdupq_n_u8:10674case SVE::BI__builtin_sve_svdupq_n_s8:10675case SVE::BI__builtin_sve_svdupq_n_u64:10676case SVE::BI__builtin_sve_svdupq_n_f64:10677case SVE::BI__builtin_sve_svdupq_n_s64:10678case SVE::BI__builtin_sve_svdupq_n_u16:10679case SVE::BI__builtin_sve_svdupq_n_f16:10680case SVE::BI__builtin_sve_svdupq_n_bf16:10681case SVE::BI__builtin_sve_svdupq_n_s16:10682case SVE::BI__builtin_sve_svdupq_n_u32:10683case SVE::BI__builtin_sve_svdupq_n_f32:10684case SVE::BI__builtin_sve_svdupq_n_s32: {10685// These builtins are implemented by storing each element to an array and using10686// ld1rq to materialize a vector.10687unsigned NumOpnds = Ops.size();1068810689bool IsBoolTy =10690cast<llvm::VectorType>(Ty)->getElementType()->isIntegerTy(1);1069110692// For svdupq_n_b* the element type of is an integer of type 128/numelts,10693// so that the compare can use the width that is natural for the expected10694// number of predicate lanes.10695llvm::Type *EltTy = Ops[0]->getType();10696if (IsBoolTy)10697EltTy = IntegerType::get(getLLVMContext(), SVEBitsPerBlock / NumOpnds);1069810699SmallVector<llvm::Value *, 16> VecOps;10700for (unsigned I = 0; I < NumOpnds; ++I)10701VecOps.push_back(Builder.CreateZExt(Ops[I], EltTy));10702Value *Vec = BuildVector(VecOps);1070310704llvm::Type *OverloadedTy = getSVEVectorForElementType(EltTy);10705Value *InsertSubVec = Builder.CreateInsertVector(10706OverloadedTy, PoisonValue::get(OverloadedTy), Vec, Builder.getInt64(0));1070710708Function *F =10709CGM.getIntrinsic(Intrinsic::aarch64_sve_dupq_lane, OverloadedTy);10710Value *DupQLane =10711Builder.CreateCall(F, {InsertSubVec, Builder.getInt64(0)});1071210713if (!IsBoolTy)10714return DupQLane;1071510716SVETypeFlags TypeFlags(Builtin->TypeModifier);10717Value *Pred = EmitSVEAllTruePred(TypeFlags);1071810719// For svdupq_n_b* we need to add an additional 'cmpne' with '0'.10720F = CGM.getIntrinsic(NumOpnds == 2 ? Intrinsic::aarch64_sve_cmpne10721: Intrinsic::aarch64_sve_cmpne_wide,10722OverloadedTy);10723Value *Call = Builder.CreateCall(10724F, {Pred, DupQLane, EmitSVEDupX(Builder.getInt64(0))});10725return EmitSVEPredicateCast(Call, cast<llvm::ScalableVectorType>(Ty));10726}1072710728case SVE::BI__builtin_sve_svpfalse_b:10729return ConstantInt::getFalse(Ty);1073010731case SVE::BI__builtin_sve_svpfalse_c: {10732auto SVBoolTy = ScalableVectorType::get(Builder.getInt1Ty(), 16);10733Function *CastToSVCountF =10734CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, Ty);10735return Builder.CreateCall(CastToSVCountF, ConstantInt::getFalse(SVBoolTy));10736}1073710738case SVE::BI__builtin_sve_svlen_bf16:10739case SVE::BI__builtin_sve_svlen_f16:10740case SVE::BI__builtin_sve_svlen_f32:10741case SVE::BI__builtin_sve_svlen_f64:10742case SVE::BI__builtin_sve_svlen_s8:10743case SVE::BI__builtin_sve_svlen_s16:10744case SVE::BI__builtin_sve_svlen_s32:10745case SVE::BI__builtin_sve_svlen_s64:10746case SVE::BI__builtin_sve_svlen_u8:10747case SVE::BI__builtin_sve_svlen_u16:10748case SVE::BI__builtin_sve_svlen_u32:10749case SVE::BI__builtin_sve_svlen_u64: {10750SVETypeFlags TF(Builtin->TypeModifier);10751auto VTy = cast<llvm::VectorType>(getSVEType(TF));10752auto *NumEls =10753llvm::ConstantInt::get(Ty, VTy->getElementCount().getKnownMinValue());1075410755Function *F = CGM.getIntrinsic(Intrinsic::vscale, Ty);10756return Builder.CreateMul(NumEls, Builder.CreateCall(F));10757}1075810759case SVE::BI__builtin_sve_svtbl2_u8:10760case SVE::BI__builtin_sve_svtbl2_s8:10761case SVE::BI__builtin_sve_svtbl2_u16:10762case SVE::BI__builtin_sve_svtbl2_s16:10763case SVE::BI__builtin_sve_svtbl2_u32:10764case SVE::BI__builtin_sve_svtbl2_s32:10765case SVE::BI__builtin_sve_svtbl2_u64:10766case SVE::BI__builtin_sve_svtbl2_s64:10767case SVE::BI__builtin_sve_svtbl2_f16:10768case SVE::BI__builtin_sve_svtbl2_bf16:10769case SVE::BI__builtin_sve_svtbl2_f32:10770case SVE::BI__builtin_sve_svtbl2_f64: {10771SVETypeFlags TF(Builtin->TypeModifier);10772auto VTy = cast<llvm::ScalableVectorType>(getSVEType(TF));10773Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_tbl2, VTy);10774return Builder.CreateCall(F, Ops);10775}1077610777case SVE::BI__builtin_sve_svset_neonq_s8:10778case SVE::BI__builtin_sve_svset_neonq_s16:10779case SVE::BI__builtin_sve_svset_neonq_s32:10780case SVE::BI__builtin_sve_svset_neonq_s64:10781case SVE::BI__builtin_sve_svset_neonq_u8:10782case SVE::BI__builtin_sve_svset_neonq_u16:10783case SVE::BI__builtin_sve_svset_neonq_u32:10784case SVE::BI__builtin_sve_svset_neonq_u64:10785case SVE::BI__builtin_sve_svset_neonq_f16:10786case SVE::BI__builtin_sve_svset_neonq_f32:10787case SVE::BI__builtin_sve_svset_neonq_f64:10788case SVE::BI__builtin_sve_svset_neonq_bf16: {10789return Builder.CreateInsertVector(Ty, Ops[0], Ops[1], Builder.getInt64(0));10790}1079110792case SVE::BI__builtin_sve_svget_neonq_s8:10793case SVE::BI__builtin_sve_svget_neonq_s16:10794case SVE::BI__builtin_sve_svget_neonq_s32:10795case SVE::BI__builtin_sve_svget_neonq_s64:10796case SVE::BI__builtin_sve_svget_neonq_u8:10797case SVE::BI__builtin_sve_svget_neonq_u16:10798case SVE::BI__builtin_sve_svget_neonq_u32:10799case SVE::BI__builtin_sve_svget_neonq_u64:10800case SVE::BI__builtin_sve_svget_neonq_f16:10801case SVE::BI__builtin_sve_svget_neonq_f32:10802case SVE::BI__builtin_sve_svget_neonq_f64:10803case SVE::BI__builtin_sve_svget_neonq_bf16: {10804return Builder.CreateExtractVector(Ty, Ops[0], Builder.getInt64(0));10805}1080610807case SVE::BI__builtin_sve_svdup_neonq_s8:10808case SVE::BI__builtin_sve_svdup_neonq_s16:10809case SVE::BI__builtin_sve_svdup_neonq_s32:10810case SVE::BI__builtin_sve_svdup_neonq_s64:10811case SVE::BI__builtin_sve_svdup_neonq_u8:10812case SVE::BI__builtin_sve_svdup_neonq_u16:10813case SVE::BI__builtin_sve_svdup_neonq_u32:10814case SVE::BI__builtin_sve_svdup_neonq_u64:10815case SVE::BI__builtin_sve_svdup_neonq_f16:10816case SVE::BI__builtin_sve_svdup_neonq_f32:10817case SVE::BI__builtin_sve_svdup_neonq_f64:10818case SVE::BI__builtin_sve_svdup_neonq_bf16: {10819Value *Insert = Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0],10820Builder.getInt64(0));10821return Builder.CreateIntrinsic(Intrinsic::aarch64_sve_dupq_lane, {Ty},10822{Insert, Builder.getInt64(0)});10823}10824}1082510826/// Should not happen10827return nullptr;10828}1082910830static void swapCommutativeSMEOperands(unsigned BuiltinID,10831SmallVectorImpl<Value *> &Ops) {10832unsigned MultiVec;10833switch (BuiltinID) {10834default:10835return;10836case SME::BI__builtin_sme_svsumla_za32_s8_vg4x1:10837MultiVec = 1;10838break;10839case SME::BI__builtin_sme_svsumla_za32_s8_vg4x2:10840case SME::BI__builtin_sme_svsudot_za32_s8_vg1x2:10841MultiVec = 2;10842break;10843case SME::BI__builtin_sme_svsudot_za32_s8_vg1x4:10844case SME::BI__builtin_sme_svsumla_za32_s8_vg4x4:10845MultiVec = 4;10846break;10847}1084810849if (MultiVec > 0)10850for (unsigned I = 0; I < MultiVec; ++I)10851std::swap(Ops[I + 1], Ops[I + 1 + MultiVec]);10852}1085310854Value *CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID,10855const CallExpr *E) {10856auto *Builtin = findARMVectorIntrinsicInMap(AArch64SMEIntrinsicMap, BuiltinID,10857AArch64SMEIntrinsicsProvenSorted);1085810859llvm::SmallVector<Value *, 4> Ops;10860SVETypeFlags TypeFlags(Builtin->TypeModifier);10861GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags);1086210863if (TypeFlags.isLoad() || TypeFlags.isStore())10864return EmitSMELd1St1(TypeFlags, Ops, Builtin->LLVMIntrinsic);10865else if (TypeFlags.isReadZA() || TypeFlags.isWriteZA())10866return EmitSMEReadWrite(TypeFlags, Ops, Builtin->LLVMIntrinsic);10867else if (BuiltinID == SME::BI__builtin_sme_svzero_mask_za ||10868BuiltinID == SME::BI__builtin_sme_svzero_za)10869return EmitSMEZero(TypeFlags, Ops, Builtin->LLVMIntrinsic);10870else if (BuiltinID == SME::BI__builtin_sme_svldr_vnum_za ||10871BuiltinID == SME::BI__builtin_sme_svstr_vnum_za ||10872BuiltinID == SME::BI__builtin_sme_svldr_za ||10873BuiltinID == SME::BI__builtin_sme_svstr_za)10874return EmitSMELdrStr(TypeFlags, Ops, Builtin->LLVMIntrinsic);1087510876// Handle builtins which require their multi-vector operands to be swapped10877swapCommutativeSMEOperands(BuiltinID, Ops);1087810879// Should not happen!10880if (Builtin->LLVMIntrinsic == 0)10881return nullptr;1088210883// Predicates must match the main datatype.10884for (unsigned i = 0, e = Ops.size(); i != e; ++i)10885if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType()))10886if (PredTy->getElementType()->isIntegerTy(1))10887Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags));1088810889Function *F =10890TypeFlags.isOverloadNone()10891? CGM.getIntrinsic(Builtin->LLVMIntrinsic)10892: CGM.getIntrinsic(Builtin->LLVMIntrinsic, {getSVEType(TypeFlags)});10893Value *Call = Builder.CreateCall(F, Ops);1089410895return FormSVEBuiltinResult(Call);10896}1089710898Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,10899const CallExpr *E,10900llvm::Triple::ArchType Arch) {10901if (BuiltinID >= clang::AArch64::FirstSVEBuiltin &&10902BuiltinID <= clang::AArch64::LastSVEBuiltin)10903return EmitAArch64SVEBuiltinExpr(BuiltinID, E);1090410905if (BuiltinID >= clang::AArch64::FirstSMEBuiltin &&10906BuiltinID <= clang::AArch64::LastSMEBuiltin)10907return EmitAArch64SMEBuiltinExpr(BuiltinID, E);1090810909if (BuiltinID == Builtin::BI__builtin_cpu_supports)10910return EmitAArch64CpuSupports(E);1091110912unsigned HintID = static_cast<unsigned>(-1);10913switch (BuiltinID) {10914default: break;10915case clang::AArch64::BI__builtin_arm_nop:10916HintID = 0;10917break;10918case clang::AArch64::BI__builtin_arm_yield:10919case clang::AArch64::BI__yield:10920HintID = 1;10921break;10922case clang::AArch64::BI__builtin_arm_wfe:10923case clang::AArch64::BI__wfe:10924HintID = 2;10925break;10926case clang::AArch64::BI__builtin_arm_wfi:10927case clang::AArch64::BI__wfi:10928HintID = 3;10929break;10930case clang::AArch64::BI__builtin_arm_sev:10931case clang::AArch64::BI__sev:10932HintID = 4;10933break;10934case clang::AArch64::BI__builtin_arm_sevl:10935case clang::AArch64::BI__sevl:10936HintID = 5;10937break;10938}1093910940if (HintID != static_cast<unsigned>(-1)) {10941Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);10942return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));10943}1094410945if (BuiltinID == clang::AArch64::BI__builtin_arm_trap) {10946Function *F = CGM.getIntrinsic(Intrinsic::aarch64_break);10947llvm::Value *Arg = EmitScalarExpr(E->getArg(0));10948return Builder.CreateCall(F, Builder.CreateZExt(Arg, CGM.Int32Ty));10949}1095010951if (BuiltinID == clang::AArch64::BI__builtin_arm_get_sme_state) {10952// Create call to __arm_sme_state and store the results to the two pointers.10953CallInst *CI = EmitRuntimeCall(CGM.CreateRuntimeFunction(10954llvm::FunctionType::get(StructType::get(CGM.Int64Ty, CGM.Int64Ty), {},10955false),10956"__arm_sme_state"));10957auto Attrs = AttributeList().addFnAttribute(getLLVMContext(),10958"aarch64_pstate_sm_compatible");10959CI->setAttributes(Attrs);10960CI->setCallingConv(10961llvm::CallingConv::10962AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2);10963Builder.CreateStore(Builder.CreateExtractValue(CI, 0),10964EmitPointerWithAlignment(E->getArg(0)));10965return Builder.CreateStore(Builder.CreateExtractValue(CI, 1),10966EmitPointerWithAlignment(E->getArg(1)));10967}1096810969if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) {10970assert((getContext().getTypeSize(E->getType()) == 32) &&10971"rbit of unusual size!");10972llvm::Value *Arg = EmitScalarExpr(E->getArg(0));10973return Builder.CreateCall(10974CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");10975}10976if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit64) {10977assert((getContext().getTypeSize(E->getType()) == 64) &&10978"rbit of unusual size!");10979llvm::Value *Arg = EmitScalarExpr(E->getArg(0));10980return Builder.CreateCall(10981CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");10982}1098310984if (BuiltinID == clang::AArch64::BI__builtin_arm_clz ||10985BuiltinID == clang::AArch64::BI__builtin_arm_clz64) {10986llvm::Value *Arg = EmitScalarExpr(E->getArg(0));10987Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType());10988Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});10989if (BuiltinID == clang::AArch64::BI__builtin_arm_clz64)10990Res = Builder.CreateTrunc(Res, Builder.getInt32Ty());10991return Res;10992}1099310994if (BuiltinID == clang::AArch64::BI__builtin_arm_cls) {10995llvm::Value *Arg = EmitScalarExpr(E->getArg(0));10996return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls), Arg,10997"cls");10998}10999if (BuiltinID == clang::AArch64::BI__builtin_arm_cls64) {11000llvm::Value *Arg = EmitScalarExpr(E->getArg(0));11001return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls64), Arg,11002"cls");11003}1100411005if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32zf ||11006BuiltinID == clang::AArch64::BI__builtin_arm_rint32z) {11007llvm::Value *Arg = EmitScalarExpr(E->getArg(0));11008llvm::Type *Ty = Arg->getType();11009return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32z, Ty),11010Arg, "frint32z");11011}1101211013if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64zf ||11014BuiltinID == clang::AArch64::BI__builtin_arm_rint64z) {11015llvm::Value *Arg = EmitScalarExpr(E->getArg(0));11016llvm::Type *Ty = Arg->getType();11017return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64z, Ty),11018Arg, "frint64z");11019}1102011021if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32xf ||11022BuiltinID == clang::AArch64::BI__builtin_arm_rint32x) {11023llvm::Value *Arg = EmitScalarExpr(E->getArg(0));11024llvm::Type *Ty = Arg->getType();11025return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32x, Ty),11026Arg, "frint32x");11027}1102811029if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64xf ||11030BuiltinID == clang::AArch64::BI__builtin_arm_rint64x) {11031llvm::Value *Arg = EmitScalarExpr(E->getArg(0));11032llvm::Type *Ty = Arg->getType();11033return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64x, Ty),11034Arg, "frint64x");11035}1103611037if (BuiltinID == clang::AArch64::BI__builtin_arm_jcvt) {11038assert((getContext().getTypeSize(E->getType()) == 32) &&11039"__jcvt of unusual size!");11040llvm::Value *Arg = EmitScalarExpr(E->getArg(0));11041return Builder.CreateCall(11042CGM.getIntrinsic(Intrinsic::aarch64_fjcvtzs), Arg);11043}1104411045if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b ||11046BuiltinID == clang::AArch64::BI__builtin_arm_st64b ||11047BuiltinID == clang::AArch64::BI__builtin_arm_st64bv ||11048BuiltinID == clang::AArch64::BI__builtin_arm_st64bv0) {11049llvm::Value *MemAddr = EmitScalarExpr(E->getArg(0));11050llvm::Value *ValPtr = EmitScalarExpr(E->getArg(1));1105111052if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b) {11053// Load from the address via an LLVM intrinsic, receiving a11054// tuple of 8 i64 words, and store each one to ValPtr.11055Function *F = CGM.getIntrinsic(Intrinsic::aarch64_ld64b);11056llvm::Value *Val = Builder.CreateCall(F, MemAddr);11057llvm::Value *ToRet;11058for (size_t i = 0; i < 8; i++) {11059llvm::Value *ValOffsetPtr =11060Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));11061Address Addr =11062Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));11063ToRet = Builder.CreateStore(Builder.CreateExtractValue(Val, i), Addr);11064}11065return ToRet;11066} else {11067// Load 8 i64 words from ValPtr, and store them to the address11068// via an LLVM intrinsic.11069SmallVector<llvm::Value *, 9> Args;11070Args.push_back(MemAddr);11071for (size_t i = 0; i < 8; i++) {11072llvm::Value *ValOffsetPtr =11073Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i));11074Address Addr =11075Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8));11076Args.push_back(Builder.CreateLoad(Addr));11077}1107811079auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_st64b11080? Intrinsic::aarch64_st64b11081: BuiltinID == clang::AArch64::BI__builtin_arm_st64bv11082? Intrinsic::aarch64_st64bv11083: Intrinsic::aarch64_st64bv0);11084Function *F = CGM.getIntrinsic(Intr);11085return Builder.CreateCall(F, Args);11086}11087}1108811089if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ||11090BuiltinID == clang::AArch64::BI__builtin_arm_rndrrs) {1109111092auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_rndr11093? Intrinsic::aarch64_rndr11094: Intrinsic::aarch64_rndrrs);11095Function *F = CGM.getIntrinsic(Intr);11096llvm::Value *Val = Builder.CreateCall(F);11097Value *RandomValue = Builder.CreateExtractValue(Val, 0);11098Value *Status = Builder.CreateExtractValue(Val, 1);1109911100Address MemAddress = EmitPointerWithAlignment(E->getArg(0));11101Builder.CreateStore(RandomValue, MemAddress);11102Status = Builder.CreateZExt(Status, Int32Ty);11103return Status;11104}1110511106if (BuiltinID == clang::AArch64::BI__clear_cache) {11107assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");11108const FunctionDecl *FD = E->getDirectCallee();11109Value *Ops[2];11110for (unsigned i = 0; i < 2; i++)11111Ops[i] = EmitScalarExpr(E->getArg(i));11112llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());11113llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);11114StringRef Name = FD->getName();11115return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);11116}1111711118if ((BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||11119BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) &&11120getContext().getTypeSize(E->getType()) == 128) {11121Function *F =11122CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex11123? Intrinsic::aarch64_ldaxp11124: Intrinsic::aarch64_ldxp);1112511126Value *LdPtr = EmitScalarExpr(E->getArg(0));11127Value *Val = Builder.CreateCall(F, LdPtr, "ldxp");1112811129Value *Val0 = Builder.CreateExtractValue(Val, 1);11130Value *Val1 = Builder.CreateExtractValue(Val, 0);11131llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);11132Val0 = Builder.CreateZExt(Val0, Int128Ty);11133Val1 = Builder.CreateZExt(Val1, Int128Ty);1113411135Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);11136Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);11137Val = Builder.CreateOr(Val, Val1);11138return Builder.CreateBitCast(Val, ConvertType(E->getType()));11139} else if (BuiltinID == clang::AArch64::BI__builtin_arm_ldrex ||11140BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) {11141Value *LoadAddr = EmitScalarExpr(E->getArg(0));1114211143QualType Ty = E->getType();11144llvm::Type *RealResTy = ConvertType(Ty);11145llvm::Type *IntTy =11146llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));1114711148Function *F =11149CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex11150? Intrinsic::aarch64_ldaxr11151: Intrinsic::aarch64_ldxr,11152UnqualPtrTy);11153CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldxr");11154Val->addParamAttr(111550, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy));1115611157if (RealResTy->isPointerTy())11158return Builder.CreateIntToPtr(Val, RealResTy);1115911160llvm::Type *IntResTy = llvm::IntegerType::get(11161getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));11162return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy),11163RealResTy);11164}1116511166if ((BuiltinID == clang::AArch64::BI__builtin_arm_strex ||11167BuiltinID == clang::AArch64::BI__builtin_arm_stlex) &&11168getContext().getTypeSize(E->getArg(0)->getType()) == 128) {11169Function *F =11170CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex11171? Intrinsic::aarch64_stlxp11172: Intrinsic::aarch64_stxp);11173llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty);1117411175Address Tmp = CreateMemTemp(E->getArg(0)->getType());11176EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);1117711178Tmp = Tmp.withElementType(STy);11179llvm::Value *Val = Builder.CreateLoad(Tmp);1118011181Value *Arg0 = Builder.CreateExtractValue(Val, 0);11182Value *Arg1 = Builder.CreateExtractValue(Val, 1);11183Value *StPtr = EmitScalarExpr(E->getArg(1));11184return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");11185}1118611187if (BuiltinID == clang::AArch64::BI__builtin_arm_strex ||11188BuiltinID == clang::AArch64::BI__builtin_arm_stlex) {11189Value *StoreVal = EmitScalarExpr(E->getArg(0));11190Value *StoreAddr = EmitScalarExpr(E->getArg(1));1119111192QualType Ty = E->getArg(0)->getType();11193llvm::Type *StoreTy =11194llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty));1119511196if (StoreVal->getType()->isPointerTy())11197StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);11198else {11199llvm::Type *IntTy = llvm::IntegerType::get(11200getLLVMContext(),11201CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));11202StoreVal = Builder.CreateBitCast(StoreVal, IntTy);11203StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);11204}1120511206Function *F =11207CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex11208? Intrinsic::aarch64_stlxr11209: Intrinsic::aarch64_stxr,11210StoreAddr->getType());11211CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");11212CI->addParamAttr(112131, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy));11214return CI;11215}1121611217if (BuiltinID == clang::AArch64::BI__getReg) {11218Expr::EvalResult Result;11219if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))11220llvm_unreachable("Sema will ensure that the parameter is constant");1122111222llvm::APSInt Value = Result.Val.getInt();11223LLVMContext &Context = CGM.getLLVMContext();11224std::string Reg = Value == 31 ? "sp" : "x" + toString(Value, 10);1122511226llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)};11227llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);11228llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);1122911230llvm::Function *F =11231CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});11232return Builder.CreateCall(F, Metadata);11233}1123411235if (BuiltinID == clang::AArch64::BI__break) {11236Expr::EvalResult Result;11237if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))11238llvm_unreachable("Sema will ensure that the parameter is constant");1123911240llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::aarch64_break);11241return Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))});11242}1124311244if (BuiltinID == clang::AArch64::BI__builtin_arm_clrex) {11245Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);11246return Builder.CreateCall(F);11247}1124811249if (BuiltinID == clang::AArch64::BI_ReadWriteBarrier)11250return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,11251llvm::SyncScope::SingleThread);1125211253// CRC3211254Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;11255switch (BuiltinID) {11256case clang::AArch64::BI__builtin_arm_crc32b:11257CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;11258case clang::AArch64::BI__builtin_arm_crc32cb:11259CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;11260case clang::AArch64::BI__builtin_arm_crc32h:11261CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;11262case clang::AArch64::BI__builtin_arm_crc32ch:11263CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;11264case clang::AArch64::BI__builtin_arm_crc32w:11265CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;11266case clang::AArch64::BI__builtin_arm_crc32cw:11267CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;11268case clang::AArch64::BI__builtin_arm_crc32d:11269CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;11270case clang::AArch64::BI__builtin_arm_crc32cd:11271CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;11272}1127311274if (CRCIntrinsicID != Intrinsic::not_intrinsic) {11275Value *Arg0 = EmitScalarExpr(E->getArg(0));11276Value *Arg1 = EmitScalarExpr(E->getArg(1));11277Function *F = CGM.getIntrinsic(CRCIntrinsicID);1127811279llvm::Type *DataTy = F->getFunctionType()->getParamType(1);11280Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);1128111282return Builder.CreateCall(F, {Arg0, Arg1});11283}1128411285// Memory Operations (MOPS)11286if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) {11287Value *Dst = EmitScalarExpr(E->getArg(0));11288Value *Val = EmitScalarExpr(E->getArg(1));11289Value *Size = EmitScalarExpr(E->getArg(2));11290Dst = Builder.CreatePointerCast(Dst, Int8PtrTy);11291Val = Builder.CreateTrunc(Val, Int8Ty);11292Size = Builder.CreateIntCast(Size, Int64Ty, false);11293return Builder.CreateCall(11294CGM.getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, Size});11295}1129611297// Memory Tagging Extensions (MTE) Intrinsics11298Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic;11299switch (BuiltinID) {11300case clang::AArch64::BI__builtin_arm_irg:11301MTEIntrinsicID = Intrinsic::aarch64_irg; break;11302case clang::AArch64::BI__builtin_arm_addg:11303MTEIntrinsicID = Intrinsic::aarch64_addg; break;11304case clang::AArch64::BI__builtin_arm_gmi:11305MTEIntrinsicID = Intrinsic::aarch64_gmi; break;11306case clang::AArch64::BI__builtin_arm_ldg:11307MTEIntrinsicID = Intrinsic::aarch64_ldg; break;11308case clang::AArch64::BI__builtin_arm_stg:11309MTEIntrinsicID = Intrinsic::aarch64_stg; break;11310case clang::AArch64::BI__builtin_arm_subp:11311MTEIntrinsicID = Intrinsic::aarch64_subp; break;11312}1131311314if (MTEIntrinsicID != Intrinsic::not_intrinsic) {11315llvm::Type *T = ConvertType(E->getType());1131611317if (MTEIntrinsicID == Intrinsic::aarch64_irg) {11318Value *Pointer = EmitScalarExpr(E->getArg(0));11319Value *Mask = EmitScalarExpr(E->getArg(1));1132011321Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy);11322Mask = Builder.CreateZExt(Mask, Int64Ty);11323Value *RV = Builder.CreateCall(11324CGM.getIntrinsic(MTEIntrinsicID), {Pointer, Mask});11325return Builder.CreatePointerCast(RV, T);11326}11327if (MTEIntrinsicID == Intrinsic::aarch64_addg) {11328Value *Pointer = EmitScalarExpr(E->getArg(0));11329Value *TagOffset = EmitScalarExpr(E->getArg(1));1133011331Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy);11332TagOffset = Builder.CreateZExt(TagOffset, Int64Ty);11333Value *RV = Builder.CreateCall(11334CGM.getIntrinsic(MTEIntrinsicID), {Pointer, TagOffset});11335return Builder.CreatePointerCast(RV, T);11336}11337if (MTEIntrinsicID == Intrinsic::aarch64_gmi) {11338Value *Pointer = EmitScalarExpr(E->getArg(0));11339Value *ExcludedMask = EmitScalarExpr(E->getArg(1));1134011341ExcludedMask = Builder.CreateZExt(ExcludedMask, Int64Ty);11342Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy);11343return Builder.CreateCall(11344CGM.getIntrinsic(MTEIntrinsicID), {Pointer, ExcludedMask});11345}11346// Although it is possible to supply a different return11347// address (first arg) to this intrinsic, for now we set11348// return address same as input address.11349if (MTEIntrinsicID == Intrinsic::aarch64_ldg) {11350Value *TagAddress = EmitScalarExpr(E->getArg(0));11351TagAddress = Builder.CreatePointerCast(TagAddress, Int8PtrTy);11352Value *RV = Builder.CreateCall(11353CGM.getIntrinsic(MTEIntrinsicID), {TagAddress, TagAddress});11354return Builder.CreatePointerCast(RV, T);11355}11356// Although it is possible to supply a different tag (to set)11357// to this intrinsic (as first arg), for now we supply11358// the tag that is in input address arg (common use case).11359if (MTEIntrinsicID == Intrinsic::aarch64_stg) {11360Value *TagAddress = EmitScalarExpr(E->getArg(0));11361TagAddress = Builder.CreatePointerCast(TagAddress, Int8PtrTy);11362return Builder.CreateCall(11363CGM.getIntrinsic(MTEIntrinsicID), {TagAddress, TagAddress});11364}11365if (MTEIntrinsicID == Intrinsic::aarch64_subp) {11366Value *PointerA = EmitScalarExpr(E->getArg(0));11367Value *PointerB = EmitScalarExpr(E->getArg(1));11368PointerA = Builder.CreatePointerCast(PointerA, Int8PtrTy);11369PointerB = Builder.CreatePointerCast(PointerB, Int8PtrTy);11370return Builder.CreateCall(11371CGM.getIntrinsic(MTEIntrinsicID), {PointerA, PointerB});11372}11373}1137411375if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||11376BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||11377BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||11378BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||11379BuiltinID == clang::AArch64::BI__builtin_arm_wsr ||11380BuiltinID == clang::AArch64::BI__builtin_arm_wsr64 ||11381BuiltinID == clang::AArch64::BI__builtin_arm_wsr128 ||11382BuiltinID == clang::AArch64::BI__builtin_arm_wsrp) {1138311384SpecialRegisterAccessKind AccessKind = Write;11385if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||11386BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 ||11387BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||11388BuiltinID == clang::AArch64::BI__builtin_arm_rsrp)11389AccessKind = VolatileRead;1139011391bool IsPointerBuiltin = BuiltinID == clang::AArch64::BI__builtin_arm_rsrp ||11392BuiltinID == clang::AArch64::BI__builtin_arm_wsrp;1139311394bool Is32Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr ||11395BuiltinID == clang::AArch64::BI__builtin_arm_wsr;1139611397bool Is128Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 ||11398BuiltinID == clang::AArch64::BI__builtin_arm_wsr128;1139911400llvm::Type *ValueType;11401llvm::Type *RegisterType = Int64Ty;11402if (Is32Bit) {11403ValueType = Int32Ty;11404} else if (Is128Bit) {11405llvm::Type *Int128Ty =11406llvm::IntegerType::getInt128Ty(CGM.getLLVMContext());11407ValueType = Int128Ty;11408RegisterType = Int128Ty;11409} else if (IsPointerBuiltin) {11410ValueType = VoidPtrTy;11411} else {11412ValueType = Int64Ty;11413};1141411415return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,11416AccessKind);11417}1141811419if (BuiltinID == clang::AArch64::BI_ReadStatusReg ||11420BuiltinID == clang::AArch64::BI_WriteStatusReg) {11421LLVMContext &Context = CGM.getLLVMContext();1142211423unsigned SysReg =11424E->getArg(0)->EvaluateKnownConstInt(getContext()).getZExtValue();1142511426std::string SysRegStr;11427llvm::raw_string_ostream(SysRegStr) <<11428((1 << 1) | ((SysReg >> 14) & 1)) << ":" <<11429((SysReg >> 11) & 7) << ":" <<11430((SysReg >> 7) & 15) << ":" <<11431((SysReg >> 3) & 15) << ":" <<11432( SysReg & 7);1143311434llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysRegStr) };11435llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);11436llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);1143711438llvm::Type *RegisterType = Int64Ty;11439llvm::Type *Types[] = { RegisterType };1144011441if (BuiltinID == clang::AArch64::BI_ReadStatusReg) {11442llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);1144311444return Builder.CreateCall(F, Metadata);11445}1144611447llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);11448llvm::Value *ArgValue = EmitScalarExpr(E->getArg(1));1144911450return Builder.CreateCall(F, { Metadata, ArgValue });11451}1145211453if (BuiltinID == clang::AArch64::BI_AddressOfReturnAddress) {11454llvm::Function *F =11455CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);11456return Builder.CreateCall(F);11457}1145811459if (BuiltinID == clang::AArch64::BI__builtin_sponentry) {11460llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy);11461return Builder.CreateCall(F);11462}1146311464if (BuiltinID == clang::AArch64::BI__mulh ||11465BuiltinID == clang::AArch64::BI__umulh) {11466llvm::Type *ResType = ConvertType(E->getType());11467llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);1146811469bool IsSigned = BuiltinID == clang::AArch64::BI__mulh;11470Value *LHS =11471Builder.CreateIntCast(EmitScalarExpr(E->getArg(0)), Int128Ty, IsSigned);11472Value *RHS =11473Builder.CreateIntCast(EmitScalarExpr(E->getArg(1)), Int128Ty, IsSigned);1147411475Value *MulResult, *HigherBits;11476if (IsSigned) {11477MulResult = Builder.CreateNSWMul(LHS, RHS);11478HigherBits = Builder.CreateAShr(MulResult, 64);11479} else {11480MulResult = Builder.CreateNUWMul(LHS, RHS);11481HigherBits = Builder.CreateLShr(MulResult, 64);11482}11483HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);1148411485return HigherBits;11486}1148711488if (BuiltinID == AArch64::BI__writex18byte ||11489BuiltinID == AArch64::BI__writex18word ||11490BuiltinID == AArch64::BI__writex18dword ||11491BuiltinID == AArch64::BI__writex18qword) {11492// Read x18 as i8*11493LLVMContext &Context = CGM.getLLVMContext();11494llvm::Metadata *Ops[] = {llvm::MDString::get(Context, "x18")};11495llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);11496llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);11497llvm::Function *F =11498CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});11499llvm::Value *X18 = Builder.CreateCall(F, Metadata);11500X18 = Builder.CreateIntToPtr(X18, Int8PtrTy);1150111502// Store val at x18 + offset11503Value *Offset = Builder.CreateZExt(EmitScalarExpr(E->getArg(0)), Int64Ty);11504Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);11505Value *Val = EmitScalarExpr(E->getArg(1));11506StoreInst *Store = Builder.CreateAlignedStore(Val, Ptr, CharUnits::One());11507return Store;11508}1150911510if (BuiltinID == AArch64::BI__readx18byte ||11511BuiltinID == AArch64::BI__readx18word ||11512BuiltinID == AArch64::BI__readx18dword ||11513BuiltinID == AArch64::BI__readx18qword) {11514llvm::Type *IntTy = ConvertType(E->getType());1151511516// Read x18 as i8*11517LLVMContext &Context = CGM.getLLVMContext();11518llvm::Metadata *Ops[] = {llvm::MDString::get(Context, "x18")};11519llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);11520llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);11521llvm::Function *F =11522CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});11523llvm::Value *X18 = Builder.CreateCall(F, Metadata);11524X18 = Builder.CreateIntToPtr(X18, Int8PtrTy);1152511526// Load x18 + offset11527Value *Offset = Builder.CreateZExt(EmitScalarExpr(E->getArg(0)), Int64Ty);11528Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset);11529LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One());11530return Load;11531}1153211533if (BuiltinID == AArch64::BI_CopyDoubleFromInt64 ||11534BuiltinID == AArch64::BI_CopyFloatFromInt32 ||11535BuiltinID == AArch64::BI_CopyInt32FromFloat ||11536BuiltinID == AArch64::BI_CopyInt64FromDouble) {11537Value *Arg = EmitScalarExpr(E->getArg(0));11538llvm::Type *RetTy = ConvertType(E->getType());11539return Builder.CreateBitCast(Arg, RetTy);11540}1154111542if (BuiltinID == AArch64::BI_CountLeadingOnes ||11543BuiltinID == AArch64::BI_CountLeadingOnes64 ||11544BuiltinID == AArch64::BI_CountLeadingZeros ||11545BuiltinID == AArch64::BI_CountLeadingZeros64) {11546Value *Arg = EmitScalarExpr(E->getArg(0));11547llvm::Type *ArgType = Arg->getType();1154811549if (BuiltinID == AArch64::BI_CountLeadingOnes ||11550BuiltinID == AArch64::BI_CountLeadingOnes64)11551Arg = Builder.CreateXor(Arg, Constant::getAllOnesValue(ArgType));1155211553Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);11554Value *Result = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});1155511556if (BuiltinID == AArch64::BI_CountLeadingOnes64 ||11557BuiltinID == AArch64::BI_CountLeadingZeros64)11558Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());11559return Result;11560}1156111562if (BuiltinID == AArch64::BI_CountLeadingSigns ||11563BuiltinID == AArch64::BI_CountLeadingSigns64) {11564Value *Arg = EmitScalarExpr(E->getArg(0));1156511566Function *F = (BuiltinID == AArch64::BI_CountLeadingSigns)11567? CGM.getIntrinsic(Intrinsic::aarch64_cls)11568: CGM.getIntrinsic(Intrinsic::aarch64_cls64);1156911570Value *Result = Builder.CreateCall(F, Arg, "cls");11571if (BuiltinID == AArch64::BI_CountLeadingSigns64)11572Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());11573return Result;11574}1157511576if (BuiltinID == AArch64::BI_CountOneBits ||11577BuiltinID == AArch64::BI_CountOneBits64) {11578Value *ArgValue = EmitScalarExpr(E->getArg(0));11579llvm::Type *ArgType = ArgValue->getType();11580Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);1158111582Value *Result = Builder.CreateCall(F, ArgValue);11583if (BuiltinID == AArch64::BI_CountOneBits64)11584Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());11585return Result;11586}1158711588if (BuiltinID == AArch64::BI__prefetch) {11589Value *Address = EmitScalarExpr(E->getArg(0));11590Value *RW = llvm::ConstantInt::get(Int32Ty, 0);11591Value *Locality = ConstantInt::get(Int32Ty, 3);11592Value *Data = llvm::ConstantInt::get(Int32Ty, 1);11593Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());11594return Builder.CreateCall(F, {Address, RW, Locality, Data});11595}1159611597if (BuiltinID == AArch64::BI__hlt) {11598Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hlt);11599Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))});1160011601// Return 0 for convenience, even though MSVC returns some other undefined11602// value.11603return ConstantInt::get(Builder.getInt32Ty(), 0);11604}1160511606// Handle MSVC intrinsics before argument evaluation to prevent double11607// evaluation.11608if (std::optional<MSVCIntrin> MsvcIntId =11609translateAarch64ToMsvcIntrin(BuiltinID))11610return EmitMSVCBuiltinExpr(*MsvcIntId, E);1161111612// Some intrinsics are equivalent - if they are use the base intrinsic ID.11613auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) {11614return P.first == BuiltinID;11615});11616if (It != end(NEONEquivalentIntrinsicMap))11617BuiltinID = It->second;1161811619// Find out if any arguments are required to be integer constant11620// expressions.11621unsigned ICEArguments = 0;11622ASTContext::GetBuiltinTypeError Error;11623getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);11624assert(Error == ASTContext::GE_None && "Should not codegen an error");1162511626llvm::SmallVector<Value*, 4> Ops;11627Address PtrOp0 = Address::invalid();11628for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {11629if (i == 0) {11630switch (BuiltinID) {11631case NEON::BI__builtin_neon_vld1_v:11632case NEON::BI__builtin_neon_vld1q_v:11633case NEON::BI__builtin_neon_vld1_dup_v:11634case NEON::BI__builtin_neon_vld1q_dup_v:11635case NEON::BI__builtin_neon_vld1_lane_v:11636case NEON::BI__builtin_neon_vld1q_lane_v:11637case NEON::BI__builtin_neon_vst1_v:11638case NEON::BI__builtin_neon_vst1q_v:11639case NEON::BI__builtin_neon_vst1_lane_v:11640case NEON::BI__builtin_neon_vst1q_lane_v:11641case NEON::BI__builtin_neon_vldap1_lane_s64:11642case NEON::BI__builtin_neon_vldap1q_lane_s64:11643case NEON::BI__builtin_neon_vstl1_lane_s64:11644case NEON::BI__builtin_neon_vstl1q_lane_s64:11645// Get the alignment for the argument in addition to the value;11646// we'll use it later.11647PtrOp0 = EmitPointerWithAlignment(E->getArg(0));11648Ops.push_back(PtrOp0.emitRawPointer(*this));11649continue;11650}11651}11652Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));11653}1165411655auto SISDMap = ArrayRef(AArch64SISDIntrinsicMap);11656const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap(11657SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);1165811659if (Builtin) {11660Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));11661Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);11662assert(Result && "SISD intrinsic should have been handled");11663return Result;11664}1166511666const Expr *Arg = E->getArg(E->getNumArgs()-1);11667NeonTypeFlags Type(0);11668if (std::optional<llvm::APSInt> Result =11669Arg->getIntegerConstantExpr(getContext()))11670// Determine the type of this overloaded NEON intrinsic.11671Type = NeonTypeFlags(Result->getZExtValue());1167211673bool usgn = Type.isUnsigned();11674bool quad = Type.isQuad();1167511676// Handle non-overloaded intrinsics first.11677switch (BuiltinID) {11678default: break;11679case NEON::BI__builtin_neon_vabsh_f16:11680Ops.push_back(EmitScalarExpr(E->getArg(0)));11681return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, HalfTy), Ops, "vabs");11682case NEON::BI__builtin_neon_vaddq_p128: {11683llvm::Type *Ty = GetNeonType(this, NeonTypeFlags::Poly128);11684Ops.push_back(EmitScalarExpr(E->getArg(1)));11685Ops[0] = Builder.CreateBitCast(Ops[0], Ty);11686Ops[1] = Builder.CreateBitCast(Ops[1], Ty);11687Ops[0] = Builder.CreateXor(Ops[0], Ops[1]);11688llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);11689return Builder.CreateBitCast(Ops[0], Int128Ty);11690}11691case NEON::BI__builtin_neon_vldrq_p128: {11692llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);11693Value *Ptr = EmitScalarExpr(E->getArg(0));11694return Builder.CreateAlignedLoad(Int128Ty, Ptr,11695CharUnits::fromQuantity(16));11696}11697case NEON::BI__builtin_neon_vstrq_p128: {11698Value *Ptr = Ops[0];11699return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);11700}11701case NEON::BI__builtin_neon_vcvts_f32_u32:11702case NEON::BI__builtin_neon_vcvtd_f64_u64:11703usgn = true;11704[[fallthrough]];11705case NEON::BI__builtin_neon_vcvts_f32_s32:11706case NEON::BI__builtin_neon_vcvtd_f64_s64: {11707Ops.push_back(EmitScalarExpr(E->getArg(0)));11708bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;11709llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;11710llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;11711Ops[0] = Builder.CreateBitCast(Ops[0], InTy);11712if (usgn)11713return Builder.CreateUIToFP(Ops[0], FTy);11714return Builder.CreateSIToFP(Ops[0], FTy);11715}11716case NEON::BI__builtin_neon_vcvth_f16_u16:11717case NEON::BI__builtin_neon_vcvth_f16_u32:11718case NEON::BI__builtin_neon_vcvth_f16_u64:11719usgn = true;11720[[fallthrough]];11721case NEON::BI__builtin_neon_vcvth_f16_s16:11722case NEON::BI__builtin_neon_vcvth_f16_s32:11723case NEON::BI__builtin_neon_vcvth_f16_s64: {11724Ops.push_back(EmitScalarExpr(E->getArg(0)));11725llvm::Type *FTy = HalfTy;11726llvm::Type *InTy;11727if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64)11728InTy = Int64Ty;11729else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32)11730InTy = Int32Ty;11731else11732InTy = Int16Ty;11733Ops[0] = Builder.CreateBitCast(Ops[0], InTy);11734if (usgn)11735return Builder.CreateUIToFP(Ops[0], FTy);11736return Builder.CreateSIToFP(Ops[0], FTy);11737}11738case NEON::BI__builtin_neon_vcvtah_u16_f16:11739case NEON::BI__builtin_neon_vcvtmh_u16_f16:11740case NEON::BI__builtin_neon_vcvtnh_u16_f16:11741case NEON::BI__builtin_neon_vcvtph_u16_f16:11742case NEON::BI__builtin_neon_vcvth_u16_f16:11743case NEON::BI__builtin_neon_vcvtah_s16_f16:11744case NEON::BI__builtin_neon_vcvtmh_s16_f16:11745case NEON::BI__builtin_neon_vcvtnh_s16_f16:11746case NEON::BI__builtin_neon_vcvtph_s16_f16:11747case NEON::BI__builtin_neon_vcvth_s16_f16: {11748unsigned Int;11749llvm::Type* InTy = Int32Ty;11750llvm::Type* FTy = HalfTy;11751llvm::Type *Tys[2] = {InTy, FTy};11752Ops.push_back(EmitScalarExpr(E->getArg(0)));11753switch (BuiltinID) {11754default: llvm_unreachable("missing builtin ID in switch!");11755case NEON::BI__builtin_neon_vcvtah_u16_f16:11756Int = Intrinsic::aarch64_neon_fcvtau; break;11757case NEON::BI__builtin_neon_vcvtmh_u16_f16:11758Int = Intrinsic::aarch64_neon_fcvtmu; break;11759case NEON::BI__builtin_neon_vcvtnh_u16_f16:11760Int = Intrinsic::aarch64_neon_fcvtnu; break;11761case NEON::BI__builtin_neon_vcvtph_u16_f16:11762Int = Intrinsic::aarch64_neon_fcvtpu; break;11763case NEON::BI__builtin_neon_vcvth_u16_f16:11764Int = Intrinsic::aarch64_neon_fcvtzu; break;11765case NEON::BI__builtin_neon_vcvtah_s16_f16:11766Int = Intrinsic::aarch64_neon_fcvtas; break;11767case NEON::BI__builtin_neon_vcvtmh_s16_f16:11768Int = Intrinsic::aarch64_neon_fcvtms; break;11769case NEON::BI__builtin_neon_vcvtnh_s16_f16:11770Int = Intrinsic::aarch64_neon_fcvtns; break;11771case NEON::BI__builtin_neon_vcvtph_s16_f16:11772Int = Intrinsic::aarch64_neon_fcvtps; break;11773case NEON::BI__builtin_neon_vcvth_s16_f16:11774Int = Intrinsic::aarch64_neon_fcvtzs; break;11775}11776Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");11777return Builder.CreateTrunc(Ops[0], Int16Ty);11778}11779case NEON::BI__builtin_neon_vcaleh_f16:11780case NEON::BI__builtin_neon_vcalth_f16:11781case NEON::BI__builtin_neon_vcageh_f16:11782case NEON::BI__builtin_neon_vcagth_f16: {11783unsigned Int;11784llvm::Type* InTy = Int32Ty;11785llvm::Type* FTy = HalfTy;11786llvm::Type *Tys[2] = {InTy, FTy};11787Ops.push_back(EmitScalarExpr(E->getArg(1)));11788switch (BuiltinID) {11789default: llvm_unreachable("missing builtin ID in switch!");11790case NEON::BI__builtin_neon_vcageh_f16:11791Int = Intrinsic::aarch64_neon_facge; break;11792case NEON::BI__builtin_neon_vcagth_f16:11793Int = Intrinsic::aarch64_neon_facgt; break;11794case NEON::BI__builtin_neon_vcaleh_f16:11795Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]); break;11796case NEON::BI__builtin_neon_vcalth_f16:11797Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]); break;11798}11799Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "facg");11800return Builder.CreateTrunc(Ops[0], Int16Ty);11801}11802case NEON::BI__builtin_neon_vcvth_n_s16_f16:11803case NEON::BI__builtin_neon_vcvth_n_u16_f16: {11804unsigned Int;11805llvm::Type* InTy = Int32Ty;11806llvm::Type* FTy = HalfTy;11807llvm::Type *Tys[2] = {InTy, FTy};11808Ops.push_back(EmitScalarExpr(E->getArg(1)));11809switch (BuiltinID) {11810default: llvm_unreachable("missing builtin ID in switch!");11811case NEON::BI__builtin_neon_vcvth_n_s16_f16:11812Int = Intrinsic::aarch64_neon_vcvtfp2fxs; break;11813case NEON::BI__builtin_neon_vcvth_n_u16_f16:11814Int = Intrinsic::aarch64_neon_vcvtfp2fxu; break;11815}11816Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");11817return Builder.CreateTrunc(Ops[0], Int16Ty);11818}11819case NEON::BI__builtin_neon_vcvth_n_f16_s16:11820case NEON::BI__builtin_neon_vcvth_n_f16_u16: {11821unsigned Int;11822llvm::Type* FTy = HalfTy;11823llvm::Type* InTy = Int32Ty;11824llvm::Type *Tys[2] = {FTy, InTy};11825Ops.push_back(EmitScalarExpr(E->getArg(1)));11826switch (BuiltinID) {11827default: llvm_unreachable("missing builtin ID in switch!");11828case NEON::BI__builtin_neon_vcvth_n_f16_s16:11829Int = Intrinsic::aarch64_neon_vcvtfxs2fp;11830Ops[0] = Builder.CreateSExt(Ops[0], InTy, "sext");11831break;11832case NEON::BI__builtin_neon_vcvth_n_f16_u16:11833Int = Intrinsic::aarch64_neon_vcvtfxu2fp;11834Ops[0] = Builder.CreateZExt(Ops[0], InTy);11835break;11836}11837return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");11838}11839case NEON::BI__builtin_neon_vpaddd_s64: {11840auto *Ty = llvm::FixedVectorType::get(Int64Ty, 2);11841Value *Vec = EmitScalarExpr(E->getArg(0));11842// The vector is v2f64, so make sure it's bitcast to that.11843Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");11844llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);11845llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);11846Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");11847Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");11848// Pairwise addition of a v2f64 into a scalar f64.11849return Builder.CreateAdd(Op0, Op1, "vpaddd");11850}11851case NEON::BI__builtin_neon_vpaddd_f64: {11852auto *Ty = llvm::FixedVectorType::get(DoubleTy, 2);11853Value *Vec = EmitScalarExpr(E->getArg(0));11854// The vector is v2f64, so make sure it's bitcast to that.11855Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");11856llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);11857llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);11858Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");11859Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");11860// Pairwise addition of a v2f64 into a scalar f64.11861return Builder.CreateFAdd(Op0, Op1, "vpaddd");11862}11863case NEON::BI__builtin_neon_vpadds_f32: {11864auto *Ty = llvm::FixedVectorType::get(FloatTy, 2);11865Value *Vec = EmitScalarExpr(E->getArg(0));11866// The vector is v2f32, so make sure it's bitcast to that.11867Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");11868llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);11869llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);11870Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");11871Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");11872// Pairwise addition of a v2f32 into a scalar f32.11873return Builder.CreateFAdd(Op0, Op1, "vpaddd");11874}11875case NEON::BI__builtin_neon_vceqzd_s64:11876case NEON::BI__builtin_neon_vceqzd_f64:11877case NEON::BI__builtin_neon_vceqzs_f32:11878case NEON::BI__builtin_neon_vceqzh_f16:11879Ops.push_back(EmitScalarExpr(E->getArg(0)));11880return EmitAArch64CompareBuiltinExpr(11881Ops[0], ConvertType(E->getCallReturnType(getContext())),11882ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");11883case NEON::BI__builtin_neon_vcgezd_s64:11884case NEON::BI__builtin_neon_vcgezd_f64:11885case NEON::BI__builtin_neon_vcgezs_f32:11886case NEON::BI__builtin_neon_vcgezh_f16:11887Ops.push_back(EmitScalarExpr(E->getArg(0)));11888return EmitAArch64CompareBuiltinExpr(11889Ops[0], ConvertType(E->getCallReturnType(getContext())),11890ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");11891case NEON::BI__builtin_neon_vclezd_s64:11892case NEON::BI__builtin_neon_vclezd_f64:11893case NEON::BI__builtin_neon_vclezs_f32:11894case NEON::BI__builtin_neon_vclezh_f16:11895Ops.push_back(EmitScalarExpr(E->getArg(0)));11896return EmitAArch64CompareBuiltinExpr(11897Ops[0], ConvertType(E->getCallReturnType(getContext())),11898ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");11899case NEON::BI__builtin_neon_vcgtzd_s64:11900case NEON::BI__builtin_neon_vcgtzd_f64:11901case NEON::BI__builtin_neon_vcgtzs_f32:11902case NEON::BI__builtin_neon_vcgtzh_f16:11903Ops.push_back(EmitScalarExpr(E->getArg(0)));11904return EmitAArch64CompareBuiltinExpr(11905Ops[0], ConvertType(E->getCallReturnType(getContext())),11906ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");11907case NEON::BI__builtin_neon_vcltzd_s64:11908case NEON::BI__builtin_neon_vcltzd_f64:11909case NEON::BI__builtin_neon_vcltzs_f32:11910case NEON::BI__builtin_neon_vcltzh_f16:11911Ops.push_back(EmitScalarExpr(E->getArg(0)));11912return EmitAArch64CompareBuiltinExpr(11913Ops[0], ConvertType(E->getCallReturnType(getContext())),11914ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");1191511916case NEON::BI__builtin_neon_vceqzd_u64: {11917Ops.push_back(EmitScalarExpr(E->getArg(0)));11918Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);11919Ops[0] =11920Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));11921return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");11922}11923case NEON::BI__builtin_neon_vceqd_f64:11924case NEON::BI__builtin_neon_vcled_f64:11925case NEON::BI__builtin_neon_vcltd_f64:11926case NEON::BI__builtin_neon_vcged_f64:11927case NEON::BI__builtin_neon_vcgtd_f64: {11928llvm::CmpInst::Predicate P;11929switch (BuiltinID) {11930default: llvm_unreachable("missing builtin ID in switch!");11931case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;11932case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;11933case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;11934case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;11935case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;11936}11937Ops.push_back(EmitScalarExpr(E->getArg(1)));11938Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);11939Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);11940if (P == llvm::FCmpInst::FCMP_OEQ)11941Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);11942else11943Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);11944return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");11945}11946case NEON::BI__builtin_neon_vceqs_f32:11947case NEON::BI__builtin_neon_vcles_f32:11948case NEON::BI__builtin_neon_vclts_f32:11949case NEON::BI__builtin_neon_vcges_f32:11950case NEON::BI__builtin_neon_vcgts_f32: {11951llvm::CmpInst::Predicate P;11952switch (BuiltinID) {11953default: llvm_unreachable("missing builtin ID in switch!");11954case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;11955case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;11956case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;11957case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;11958case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;11959}11960Ops.push_back(EmitScalarExpr(E->getArg(1)));11961Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);11962Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);11963if (P == llvm::FCmpInst::FCMP_OEQ)11964Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);11965else11966Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);11967return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");11968}11969case NEON::BI__builtin_neon_vceqh_f16:11970case NEON::BI__builtin_neon_vcleh_f16:11971case NEON::BI__builtin_neon_vclth_f16:11972case NEON::BI__builtin_neon_vcgeh_f16:11973case NEON::BI__builtin_neon_vcgth_f16: {11974llvm::CmpInst::Predicate P;11975switch (BuiltinID) {11976default: llvm_unreachable("missing builtin ID in switch!");11977case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ; break;11978case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE; break;11979case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT; break;11980case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE; break;11981case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT; break;11982}11983Ops.push_back(EmitScalarExpr(E->getArg(1)));11984Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);11985Ops[1] = Builder.CreateBitCast(Ops[1], HalfTy);11986if (P == llvm::FCmpInst::FCMP_OEQ)11987Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);11988else11989Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]);11990return Builder.CreateSExt(Ops[0], Int16Ty, "vcmpd");11991}11992case NEON::BI__builtin_neon_vceqd_s64:11993case NEON::BI__builtin_neon_vceqd_u64:11994case NEON::BI__builtin_neon_vcgtd_s64:11995case NEON::BI__builtin_neon_vcgtd_u64:11996case NEON::BI__builtin_neon_vcltd_s64:11997case NEON::BI__builtin_neon_vcltd_u64:11998case NEON::BI__builtin_neon_vcged_u64:11999case NEON::BI__builtin_neon_vcged_s64:12000case NEON::BI__builtin_neon_vcled_u64:12001case NEON::BI__builtin_neon_vcled_s64: {12002llvm::CmpInst::Predicate P;12003switch (BuiltinID) {12004default: llvm_unreachable("missing builtin ID in switch!");12005case NEON::BI__builtin_neon_vceqd_s64:12006case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;12007case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;12008case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;12009case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;12010case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;12011case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;12012case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;12013case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;12014case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;12015}12016Ops.push_back(EmitScalarExpr(E->getArg(1)));12017Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);12018Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);12019Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);12020return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");12021}12022case NEON::BI__builtin_neon_vtstd_s64:12023case NEON::BI__builtin_neon_vtstd_u64: {12024Ops.push_back(EmitScalarExpr(E->getArg(1)));12025Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);12026Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);12027Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);12028Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],12029llvm::Constant::getNullValue(Int64Ty));12030return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");12031}12032case NEON::BI__builtin_neon_vset_lane_i8:12033case NEON::BI__builtin_neon_vset_lane_i16:12034case NEON::BI__builtin_neon_vset_lane_i32:12035case NEON::BI__builtin_neon_vset_lane_i64:12036case NEON::BI__builtin_neon_vset_lane_bf16:12037case NEON::BI__builtin_neon_vset_lane_f32:12038case NEON::BI__builtin_neon_vsetq_lane_i8:12039case NEON::BI__builtin_neon_vsetq_lane_i16:12040case NEON::BI__builtin_neon_vsetq_lane_i32:12041case NEON::BI__builtin_neon_vsetq_lane_i64:12042case NEON::BI__builtin_neon_vsetq_lane_bf16:12043case NEON::BI__builtin_neon_vsetq_lane_f32:12044Ops.push_back(EmitScalarExpr(E->getArg(2)));12045return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");12046case NEON::BI__builtin_neon_vset_lane_f64:12047// The vector type needs a cast for the v1f64 variant.12048Ops[1] =12049Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 1));12050Ops.push_back(EmitScalarExpr(E->getArg(2)));12051return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");12052case NEON::BI__builtin_neon_vsetq_lane_f64:12053// The vector type needs a cast for the v2f64 variant.12054Ops[1] =12055Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 2));12056Ops.push_back(EmitScalarExpr(E->getArg(2)));12057return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");1205812059case NEON::BI__builtin_neon_vget_lane_i8:12060case NEON::BI__builtin_neon_vdupb_lane_i8:12061Ops[0] =12062Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 8));12063return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),12064"vget_lane");12065case NEON::BI__builtin_neon_vgetq_lane_i8:12066case NEON::BI__builtin_neon_vdupb_laneq_i8:12067Ops[0] =12068Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 16));12069return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),12070"vgetq_lane");12071case NEON::BI__builtin_neon_vget_lane_i16:12072case NEON::BI__builtin_neon_vduph_lane_i16:12073Ops[0] =12074Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 4));12075return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),12076"vget_lane");12077case NEON::BI__builtin_neon_vgetq_lane_i16:12078case NEON::BI__builtin_neon_vduph_laneq_i16:12079Ops[0] =12080Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 8));12081return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),12082"vgetq_lane");12083case NEON::BI__builtin_neon_vget_lane_i32:12084case NEON::BI__builtin_neon_vdups_lane_i32:12085Ops[0] =12086Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 2));12087return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),12088"vget_lane");12089case NEON::BI__builtin_neon_vdups_lane_f32:12090Ops[0] =12091Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));12092return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),12093"vdups_lane");12094case NEON::BI__builtin_neon_vgetq_lane_i32:12095case NEON::BI__builtin_neon_vdups_laneq_i32:12096Ops[0] =12097Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 4));12098return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),12099"vgetq_lane");12100case NEON::BI__builtin_neon_vget_lane_i64:12101case NEON::BI__builtin_neon_vdupd_lane_i64:12102Ops[0] =12103Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 1));12104return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),12105"vget_lane");12106case NEON::BI__builtin_neon_vdupd_lane_f64:12107Ops[0] =12108Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));12109return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),12110"vdupd_lane");12111case NEON::BI__builtin_neon_vgetq_lane_i64:12112case NEON::BI__builtin_neon_vdupd_laneq_i64:12113Ops[0] =12114Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2));12115return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),12116"vgetq_lane");12117case NEON::BI__builtin_neon_vget_lane_f32:12118Ops[0] =12119Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));12120return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),12121"vget_lane");12122case NEON::BI__builtin_neon_vget_lane_f64:12123Ops[0] =12124Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));12125return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),12126"vget_lane");12127case NEON::BI__builtin_neon_vgetq_lane_f32:12128case NEON::BI__builtin_neon_vdups_laneq_f32:12129Ops[0] =12130Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 4));12131return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),12132"vgetq_lane");12133case NEON::BI__builtin_neon_vgetq_lane_f64:12134case NEON::BI__builtin_neon_vdupd_laneq_f64:12135Ops[0] =12136Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 2));12137return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),12138"vgetq_lane");12139case NEON::BI__builtin_neon_vaddh_f16:12140Ops.push_back(EmitScalarExpr(E->getArg(1)));12141return Builder.CreateFAdd(Ops[0], Ops[1], "vaddh");12142case NEON::BI__builtin_neon_vsubh_f16:12143Ops.push_back(EmitScalarExpr(E->getArg(1)));12144return Builder.CreateFSub(Ops[0], Ops[1], "vsubh");12145case NEON::BI__builtin_neon_vmulh_f16:12146Ops.push_back(EmitScalarExpr(E->getArg(1)));12147return Builder.CreateFMul(Ops[0], Ops[1], "vmulh");12148case NEON::BI__builtin_neon_vdivh_f16:12149Ops.push_back(EmitScalarExpr(E->getArg(1)));12150return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh");12151case NEON::BI__builtin_neon_vfmah_f16:12152// NEON intrinsic puts accumulator first, unlike the LLVM fma.12153return emitCallMaybeConstrainedFPBuiltin(12154*this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,12155{EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]});12156case NEON::BI__builtin_neon_vfmsh_f16: {12157Value* Neg = Builder.CreateFNeg(EmitScalarExpr(E->getArg(1)), "vsubh");1215812159// NEON intrinsic puts accumulator first, unlike the LLVM fma.12160return emitCallMaybeConstrainedFPBuiltin(12161*this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy,12162{Neg, EmitScalarExpr(E->getArg(2)), Ops[0]});12163}12164case NEON::BI__builtin_neon_vaddd_s64:12165case NEON::BI__builtin_neon_vaddd_u64:12166return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");12167case NEON::BI__builtin_neon_vsubd_s64:12168case NEON::BI__builtin_neon_vsubd_u64:12169return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");12170case NEON::BI__builtin_neon_vqdmlalh_s16:12171case NEON::BI__builtin_neon_vqdmlslh_s16: {12172SmallVector<Value *, 2> ProductOps;12173ProductOps.push_back(vectorWrapScalar16(Ops[1]));12174ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));12175auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);12176Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),12177ProductOps, "vqdmlXl");12178Constant *CI = ConstantInt::get(SizeTy, 0);12179Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");1218012181unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s1612182? Intrinsic::aarch64_neon_sqadd12183: Intrinsic::aarch64_neon_sqsub;12184return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");12185}12186case NEON::BI__builtin_neon_vqshlud_n_s64: {12187Ops.push_back(EmitScalarExpr(E->getArg(1)));12188Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);12189return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),12190Ops, "vqshlu_n");12191}12192case NEON::BI__builtin_neon_vqshld_n_u64:12193case NEON::BI__builtin_neon_vqshld_n_s64: {12194unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u6412195? Intrinsic::aarch64_neon_uqshl12196: Intrinsic::aarch64_neon_sqshl;12197Ops.push_back(EmitScalarExpr(E->getArg(1)));12198Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);12199return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");12200}12201case NEON::BI__builtin_neon_vrshrd_n_u64:12202case NEON::BI__builtin_neon_vrshrd_n_s64: {12203unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u6412204? Intrinsic::aarch64_neon_urshl12205: Intrinsic::aarch64_neon_srshl;12206Ops.push_back(EmitScalarExpr(E->getArg(1)));12207int SV = cast<ConstantInt>(Ops[1])->getSExtValue();12208Ops[1] = ConstantInt::get(Int64Ty, -SV);12209return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");12210}12211case NEON::BI__builtin_neon_vrsrad_n_u64:12212case NEON::BI__builtin_neon_vrsrad_n_s64: {12213unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u6412214? Intrinsic::aarch64_neon_urshl12215: Intrinsic::aarch64_neon_srshl;12216Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);12217Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));12218Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),12219{Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});12220return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));12221}12222case NEON::BI__builtin_neon_vshld_n_s64:12223case NEON::BI__builtin_neon_vshld_n_u64: {12224llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));12225return Builder.CreateShl(12226Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");12227}12228case NEON::BI__builtin_neon_vshrd_n_s64: {12229llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));12230return Builder.CreateAShr(12231Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),12232Amt->getZExtValue())),12233"shrd_n");12234}12235case NEON::BI__builtin_neon_vshrd_n_u64: {12236llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));12237uint64_t ShiftAmt = Amt->getZExtValue();12238// Right-shifting an unsigned value by its size yields 0.12239if (ShiftAmt == 64)12240return ConstantInt::get(Int64Ty, 0);12241return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),12242"shrd_n");12243}12244case NEON::BI__builtin_neon_vsrad_n_s64: {12245llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));12246Ops[1] = Builder.CreateAShr(12247Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),12248Amt->getZExtValue())),12249"shrd_n");12250return Builder.CreateAdd(Ops[0], Ops[1]);12251}12252case NEON::BI__builtin_neon_vsrad_n_u64: {12253llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));12254uint64_t ShiftAmt = Amt->getZExtValue();12255// Right-shifting an unsigned value by its size yields 0.12256// As Op + 0 = Op, return Ops[0] directly.12257if (ShiftAmt == 64)12258return Ops[0];12259Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),12260"shrd_n");12261return Builder.CreateAdd(Ops[0], Ops[1]);12262}12263case NEON::BI__builtin_neon_vqdmlalh_lane_s16:12264case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:12265case NEON::BI__builtin_neon_vqdmlslh_lane_s16:12266case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {12267Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),12268"lane");12269SmallVector<Value *, 2> ProductOps;12270ProductOps.push_back(vectorWrapScalar16(Ops[1]));12271ProductOps.push_back(vectorWrapScalar16(Ops[2]));12272auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);12273Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),12274ProductOps, "vqdmlXl");12275Constant *CI = ConstantInt::get(SizeTy, 0);12276Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");12277Ops.pop_back();1227812279unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||12280BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)12281? Intrinsic::aarch64_neon_sqadd12282: Intrinsic::aarch64_neon_sqsub;12283return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");12284}12285case NEON::BI__builtin_neon_vqdmlals_s32:12286case NEON::BI__builtin_neon_vqdmlsls_s32: {12287SmallVector<Value *, 2> ProductOps;12288ProductOps.push_back(Ops[1]);12289ProductOps.push_back(EmitScalarExpr(E->getArg(2)));12290Ops[1] =12291EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),12292ProductOps, "vqdmlXl");1229312294unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s3212295? Intrinsic::aarch64_neon_sqadd12296: Intrinsic::aarch64_neon_sqsub;12297return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");12298}12299case NEON::BI__builtin_neon_vqdmlals_lane_s32:12300case NEON::BI__builtin_neon_vqdmlals_laneq_s32:12301case NEON::BI__builtin_neon_vqdmlsls_lane_s32:12302case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {12303Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),12304"lane");12305SmallVector<Value *, 2> ProductOps;12306ProductOps.push_back(Ops[1]);12307ProductOps.push_back(Ops[2]);12308Ops[1] =12309EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),12310ProductOps, "vqdmlXl");12311Ops.pop_back();1231212313unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||12314BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)12315? Intrinsic::aarch64_neon_sqadd12316: Intrinsic::aarch64_neon_sqsub;12317return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");12318}12319case NEON::BI__builtin_neon_vget_lane_bf16:12320case NEON::BI__builtin_neon_vduph_lane_bf16:12321case NEON::BI__builtin_neon_vduph_lane_f16: {12322return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),12323"vget_lane");12324}12325case NEON::BI__builtin_neon_vgetq_lane_bf16:12326case NEON::BI__builtin_neon_vduph_laneq_bf16:12327case NEON::BI__builtin_neon_vduph_laneq_f16: {12328return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),12329"vgetq_lane");12330}1233112332case clang::AArch64::BI_InterlockedAdd:12333case clang::AArch64::BI_InterlockedAdd64: {12334Address DestAddr = CheckAtomicAlignment(*this, E);12335Value *Val = EmitScalarExpr(E->getArg(1));12336AtomicRMWInst *RMWI =12337Builder.CreateAtomicRMW(AtomicRMWInst::Add, DestAddr, Val,12338llvm::AtomicOrdering::SequentiallyConsistent);12339return Builder.CreateAdd(RMWI, Val);12340}12341}1234212343llvm::FixedVectorType *VTy = GetNeonType(this, Type);12344llvm::Type *Ty = VTy;12345if (!Ty)12346return nullptr;1234712348// Not all intrinsics handled by the common case work for AArch64 yet, so only12349// defer to common code if it's been added to our special map.12350Builtin = findARMVectorIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID,12351AArch64SIMDIntrinsicsProvenSorted);1235212353if (Builtin)12354return EmitCommonNeonBuiltinExpr(12355Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,12356Builtin->NameHint, Builtin->TypeModifier, E, Ops,12357/*never use addresses*/ Address::invalid(), Address::invalid(), Arch);1235812359if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops, Arch))12360return V;1236112362unsigned Int;12363switch (BuiltinID) {12364default: return nullptr;12365case NEON::BI__builtin_neon_vbsl_v:12366case NEON::BI__builtin_neon_vbslq_v: {12367llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);12368Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");12369Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");12370Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");1237112372Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");12373Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");12374Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");12375return Builder.CreateBitCast(Ops[0], Ty);12376}12377case NEON::BI__builtin_neon_vfma_lane_v:12378case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types12379// The ARM builtins (and instructions) have the addend as the first12380// operand, but the 'fma' intrinsics have it last. Swap it around here.12381Value *Addend = Ops[0];12382Value *Multiplicand = Ops[1];12383Value *LaneSource = Ops[2];12384Ops[0] = Multiplicand;12385Ops[1] = LaneSource;12386Ops[2] = Addend;1238712388// Now adjust things to handle the lane access.12389auto *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v12390? llvm::FixedVectorType::get(VTy->getElementType(),12391VTy->getNumElements() / 2)12392: VTy;12393llvm::Constant *cst = cast<Constant>(Ops[3]);12394Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(), cst);12395Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);12396Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");1239712398Ops.pop_back();12399Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_fma12400: Intrinsic::fma;12401return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");12402}12403case NEON::BI__builtin_neon_vfma_laneq_v: {12404auto *VTy = cast<llvm::FixedVectorType>(Ty);12405// v1f64 fma should be mapped to Neon scalar f64 fma12406if (VTy && VTy->getElementType() == DoubleTy) {12407Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);12408Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);12409llvm::FixedVectorType *VTy =12410GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, true));12411Ops[2] = Builder.CreateBitCast(Ops[2], VTy);12412Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");12413Value *Result;12414Result = emitCallMaybeConstrainedFPBuiltin(12415*this, Intrinsic::fma, Intrinsic::experimental_constrained_fma,12416DoubleTy, {Ops[1], Ops[2], Ops[0]});12417return Builder.CreateBitCast(Result, Ty);12418}12419Ops[0] = Builder.CreateBitCast(Ops[0], Ty);12420Ops[1] = Builder.CreateBitCast(Ops[1], Ty);1242112422auto *STy = llvm::FixedVectorType::get(VTy->getElementType(),12423VTy->getNumElements() * 2);12424Ops[2] = Builder.CreateBitCast(Ops[2], STy);12425Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(),12426cast<ConstantInt>(Ops[3]));12427Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");1242812429return emitCallMaybeConstrainedFPBuiltin(12430*this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,12431{Ops[2], Ops[1], Ops[0]});12432}12433case NEON::BI__builtin_neon_vfmaq_laneq_v: {12434Ops[0] = Builder.CreateBitCast(Ops[0], Ty);12435Ops[1] = Builder.CreateBitCast(Ops[1], Ty);1243612437Ops[2] = Builder.CreateBitCast(Ops[2], Ty);12438Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));12439return emitCallMaybeConstrainedFPBuiltin(12440*this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,12441{Ops[2], Ops[1], Ops[0]});12442}12443case NEON::BI__builtin_neon_vfmah_lane_f16:12444case NEON::BI__builtin_neon_vfmas_lane_f32:12445case NEON::BI__builtin_neon_vfmah_laneq_f16:12446case NEON::BI__builtin_neon_vfmas_laneq_f32:12447case NEON::BI__builtin_neon_vfmad_lane_f64:12448case NEON::BI__builtin_neon_vfmad_laneq_f64: {12449Ops.push_back(EmitScalarExpr(E->getArg(3)));12450llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));12451Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");12452return emitCallMaybeConstrainedFPBuiltin(12453*this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty,12454{Ops[1], Ops[2], Ops[0]});12455}12456case NEON::BI__builtin_neon_vmull_v:12457// FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.12458Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;12459if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;12460return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");12461case NEON::BI__builtin_neon_vmax_v:12462case NEON::BI__builtin_neon_vmaxq_v:12463// FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.12464Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;12465if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;12466return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");12467case NEON::BI__builtin_neon_vmaxh_f16: {12468Ops.push_back(EmitScalarExpr(E->getArg(1)));12469Int = Intrinsic::aarch64_neon_fmax;12470return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmax");12471}12472case NEON::BI__builtin_neon_vmin_v:12473case NEON::BI__builtin_neon_vminq_v:12474// FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.12475Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;12476if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;12477return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");12478case NEON::BI__builtin_neon_vminh_f16: {12479Ops.push_back(EmitScalarExpr(E->getArg(1)));12480Int = Intrinsic::aarch64_neon_fmin;12481return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmin");12482}12483case NEON::BI__builtin_neon_vabd_v:12484case NEON::BI__builtin_neon_vabdq_v:12485// FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.12486Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;12487if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;12488return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");12489case NEON::BI__builtin_neon_vpadal_v:12490case NEON::BI__builtin_neon_vpadalq_v: {12491unsigned ArgElts = VTy->getNumElements();12492llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());12493unsigned BitWidth = EltTy->getBitWidth();12494auto *ArgTy = llvm::FixedVectorType::get(12495llvm::IntegerType::get(getLLVMContext(), BitWidth / 2), 2 * ArgElts);12496llvm::Type* Tys[2] = { VTy, ArgTy };12497Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;12498SmallVector<llvm::Value*, 1> TmpOps;12499TmpOps.push_back(Ops[1]);12500Function *F = CGM.getIntrinsic(Int, Tys);12501llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");12502llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());12503return Builder.CreateAdd(tmp, addend);12504}12505case NEON::BI__builtin_neon_vpmin_v:12506case NEON::BI__builtin_neon_vpminq_v:12507// FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.12508Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;12509if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;12510return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");12511case NEON::BI__builtin_neon_vpmax_v:12512case NEON::BI__builtin_neon_vpmaxq_v:12513// FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.12514Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;12515if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;12516return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");12517case NEON::BI__builtin_neon_vminnm_v:12518case NEON::BI__builtin_neon_vminnmq_v:12519Int = Intrinsic::aarch64_neon_fminnm;12520return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");12521case NEON::BI__builtin_neon_vminnmh_f16:12522Ops.push_back(EmitScalarExpr(E->getArg(1)));12523Int = Intrinsic::aarch64_neon_fminnm;12524return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vminnm");12525case NEON::BI__builtin_neon_vmaxnm_v:12526case NEON::BI__builtin_neon_vmaxnmq_v:12527Int = Intrinsic::aarch64_neon_fmaxnm;12528return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");12529case NEON::BI__builtin_neon_vmaxnmh_f16:12530Ops.push_back(EmitScalarExpr(E->getArg(1)));12531Int = Intrinsic::aarch64_neon_fmaxnm;12532return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmaxnm");12533case NEON::BI__builtin_neon_vrecpss_f32: {12534Ops.push_back(EmitScalarExpr(E->getArg(1)));12535return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),12536Ops, "vrecps");12537}12538case NEON::BI__builtin_neon_vrecpsd_f64:12539Ops.push_back(EmitScalarExpr(E->getArg(1)));12540return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),12541Ops, "vrecps");12542case NEON::BI__builtin_neon_vrecpsh_f16:12543Ops.push_back(EmitScalarExpr(E->getArg(1)));12544return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, HalfTy),12545Ops, "vrecps");12546case NEON::BI__builtin_neon_vqshrun_n_v:12547Int = Intrinsic::aarch64_neon_sqshrun;12548return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");12549case NEON::BI__builtin_neon_vqrshrun_n_v:12550Int = Intrinsic::aarch64_neon_sqrshrun;12551return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");12552case NEON::BI__builtin_neon_vqshrn_n_v:12553Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;12554return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");12555case NEON::BI__builtin_neon_vrshrn_n_v:12556Int = Intrinsic::aarch64_neon_rshrn;12557return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");12558case NEON::BI__builtin_neon_vqrshrn_n_v:12559Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;12560return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");12561case NEON::BI__builtin_neon_vrndah_f16: {12562Ops.push_back(EmitScalarExpr(E->getArg(0)));12563Int = Builder.getIsFPConstrained()12564? Intrinsic::experimental_constrained_round12565: Intrinsic::round;12566return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrnda");12567}12568case NEON::BI__builtin_neon_vrnda_v:12569case NEON::BI__builtin_neon_vrndaq_v: {12570Int = Builder.getIsFPConstrained()12571? Intrinsic::experimental_constrained_round12572: Intrinsic::round;12573return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");12574}12575case NEON::BI__builtin_neon_vrndih_f16: {12576Ops.push_back(EmitScalarExpr(E->getArg(0)));12577Int = Builder.getIsFPConstrained()12578? Intrinsic::experimental_constrained_nearbyint12579: Intrinsic::nearbyint;12580return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndi");12581}12582case NEON::BI__builtin_neon_vrndmh_f16: {12583Ops.push_back(EmitScalarExpr(E->getArg(0)));12584Int = Builder.getIsFPConstrained()12585? Intrinsic::experimental_constrained_floor12586: Intrinsic::floor;12587return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndm");12588}12589case NEON::BI__builtin_neon_vrndm_v:12590case NEON::BI__builtin_neon_vrndmq_v: {12591Int = Builder.getIsFPConstrained()12592? Intrinsic::experimental_constrained_floor12593: Intrinsic::floor;12594return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");12595}12596case NEON::BI__builtin_neon_vrndnh_f16: {12597Ops.push_back(EmitScalarExpr(E->getArg(0)));12598Int = Builder.getIsFPConstrained()12599? Intrinsic::experimental_constrained_roundeven12600: Intrinsic::roundeven;12601return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn");12602}12603case NEON::BI__builtin_neon_vrndn_v:12604case NEON::BI__builtin_neon_vrndnq_v: {12605Int = Builder.getIsFPConstrained()12606? Intrinsic::experimental_constrained_roundeven12607: Intrinsic::roundeven;12608return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");12609}12610case NEON::BI__builtin_neon_vrndns_f32: {12611Ops.push_back(EmitScalarExpr(E->getArg(0)));12612Int = Builder.getIsFPConstrained()12613? Intrinsic::experimental_constrained_roundeven12614: Intrinsic::roundeven;12615return EmitNeonCall(CGM.getIntrinsic(Int, FloatTy), Ops, "vrndn");12616}12617case NEON::BI__builtin_neon_vrndph_f16: {12618Ops.push_back(EmitScalarExpr(E->getArg(0)));12619Int = Builder.getIsFPConstrained()12620? Intrinsic::experimental_constrained_ceil12621: Intrinsic::ceil;12622return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndp");12623}12624case NEON::BI__builtin_neon_vrndp_v:12625case NEON::BI__builtin_neon_vrndpq_v: {12626Int = Builder.getIsFPConstrained()12627? Intrinsic::experimental_constrained_ceil12628: Intrinsic::ceil;12629return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");12630}12631case NEON::BI__builtin_neon_vrndxh_f16: {12632Ops.push_back(EmitScalarExpr(E->getArg(0)));12633Int = Builder.getIsFPConstrained()12634? Intrinsic::experimental_constrained_rint12635: Intrinsic::rint;12636return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndx");12637}12638case NEON::BI__builtin_neon_vrndx_v:12639case NEON::BI__builtin_neon_vrndxq_v: {12640Int = Builder.getIsFPConstrained()12641? Intrinsic::experimental_constrained_rint12642: Intrinsic::rint;12643return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");12644}12645case NEON::BI__builtin_neon_vrndh_f16: {12646Ops.push_back(EmitScalarExpr(E->getArg(0)));12647Int = Builder.getIsFPConstrained()12648? Intrinsic::experimental_constrained_trunc12649: Intrinsic::trunc;12650return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz");12651}12652case NEON::BI__builtin_neon_vrnd32x_f32:12653case NEON::BI__builtin_neon_vrnd32xq_f32:12654case NEON::BI__builtin_neon_vrnd32x_f64:12655case NEON::BI__builtin_neon_vrnd32xq_f64: {12656Ops.push_back(EmitScalarExpr(E->getArg(0)));12657Int = Intrinsic::aarch64_neon_frint32x;12658return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32x");12659}12660case NEON::BI__builtin_neon_vrnd32z_f32:12661case NEON::BI__builtin_neon_vrnd32zq_f32:12662case NEON::BI__builtin_neon_vrnd32z_f64:12663case NEON::BI__builtin_neon_vrnd32zq_f64: {12664Ops.push_back(EmitScalarExpr(E->getArg(0)));12665Int = Intrinsic::aarch64_neon_frint32z;12666return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32z");12667}12668case NEON::BI__builtin_neon_vrnd64x_f32:12669case NEON::BI__builtin_neon_vrnd64xq_f32:12670case NEON::BI__builtin_neon_vrnd64x_f64:12671case NEON::BI__builtin_neon_vrnd64xq_f64: {12672Ops.push_back(EmitScalarExpr(E->getArg(0)));12673Int = Intrinsic::aarch64_neon_frint64x;12674return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64x");12675}12676case NEON::BI__builtin_neon_vrnd64z_f32:12677case NEON::BI__builtin_neon_vrnd64zq_f32:12678case NEON::BI__builtin_neon_vrnd64z_f64:12679case NEON::BI__builtin_neon_vrnd64zq_f64: {12680Ops.push_back(EmitScalarExpr(E->getArg(0)));12681Int = Intrinsic::aarch64_neon_frint64z;12682return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64z");12683}12684case NEON::BI__builtin_neon_vrnd_v:12685case NEON::BI__builtin_neon_vrndq_v: {12686Int = Builder.getIsFPConstrained()12687? Intrinsic::experimental_constrained_trunc12688: Intrinsic::trunc;12689return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");12690}12691case NEON::BI__builtin_neon_vcvt_f64_v:12692case NEON::BI__builtin_neon_vcvtq_f64_v:12693Ops[0] = Builder.CreateBitCast(Ops[0], Ty);12694Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));12695return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")12696: Builder.CreateSIToFP(Ops[0], Ty, "vcvt");12697case NEON::BI__builtin_neon_vcvt_f64_f32: {12698assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&12699"unexpected vcvt_f64_f32 builtin");12700NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);12701Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));1270212703return Builder.CreateFPExt(Ops[0], Ty, "vcvt");12704}12705case NEON::BI__builtin_neon_vcvt_f32_f64: {12706assert(Type.getEltType() == NeonTypeFlags::Float32 &&12707"unexpected vcvt_f32_f64 builtin");12708NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);12709Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));1271012711return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");12712}12713case NEON::BI__builtin_neon_vcvt_s32_v:12714case NEON::BI__builtin_neon_vcvt_u32_v:12715case NEON::BI__builtin_neon_vcvt_s64_v:12716case NEON::BI__builtin_neon_vcvt_u64_v:12717case NEON::BI__builtin_neon_vcvt_s16_f16:12718case NEON::BI__builtin_neon_vcvt_u16_f16:12719case NEON::BI__builtin_neon_vcvtq_s32_v:12720case NEON::BI__builtin_neon_vcvtq_u32_v:12721case NEON::BI__builtin_neon_vcvtq_s64_v:12722case NEON::BI__builtin_neon_vcvtq_u64_v:12723case NEON::BI__builtin_neon_vcvtq_s16_f16:12724case NEON::BI__builtin_neon_vcvtq_u16_f16: {12725Int =12726usgn ? Intrinsic::aarch64_neon_fcvtzu : Intrinsic::aarch64_neon_fcvtzs;12727llvm::Type *Tys[2] = {Ty, GetFloatNeonType(this, Type)};12728return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtz");12729}12730case NEON::BI__builtin_neon_vcvta_s16_f16:12731case NEON::BI__builtin_neon_vcvta_u16_f16:12732case NEON::BI__builtin_neon_vcvta_s32_v:12733case NEON::BI__builtin_neon_vcvtaq_s16_f16:12734case NEON::BI__builtin_neon_vcvtaq_s32_v:12735case NEON::BI__builtin_neon_vcvta_u32_v:12736case NEON::BI__builtin_neon_vcvtaq_u16_f16:12737case NEON::BI__builtin_neon_vcvtaq_u32_v:12738case NEON::BI__builtin_neon_vcvta_s64_v:12739case NEON::BI__builtin_neon_vcvtaq_s64_v:12740case NEON::BI__builtin_neon_vcvta_u64_v:12741case NEON::BI__builtin_neon_vcvtaq_u64_v: {12742Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;12743llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };12744return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");12745}12746case NEON::BI__builtin_neon_vcvtm_s16_f16:12747case NEON::BI__builtin_neon_vcvtm_s32_v:12748case NEON::BI__builtin_neon_vcvtmq_s16_f16:12749case NEON::BI__builtin_neon_vcvtmq_s32_v:12750case NEON::BI__builtin_neon_vcvtm_u16_f16:12751case NEON::BI__builtin_neon_vcvtm_u32_v:12752case NEON::BI__builtin_neon_vcvtmq_u16_f16:12753case NEON::BI__builtin_neon_vcvtmq_u32_v:12754case NEON::BI__builtin_neon_vcvtm_s64_v:12755case NEON::BI__builtin_neon_vcvtmq_s64_v:12756case NEON::BI__builtin_neon_vcvtm_u64_v:12757case NEON::BI__builtin_neon_vcvtmq_u64_v: {12758Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;12759llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };12760return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");12761}12762case NEON::BI__builtin_neon_vcvtn_s16_f16:12763case NEON::BI__builtin_neon_vcvtn_s32_v:12764case NEON::BI__builtin_neon_vcvtnq_s16_f16:12765case NEON::BI__builtin_neon_vcvtnq_s32_v:12766case NEON::BI__builtin_neon_vcvtn_u16_f16:12767case NEON::BI__builtin_neon_vcvtn_u32_v:12768case NEON::BI__builtin_neon_vcvtnq_u16_f16:12769case NEON::BI__builtin_neon_vcvtnq_u32_v:12770case NEON::BI__builtin_neon_vcvtn_s64_v:12771case NEON::BI__builtin_neon_vcvtnq_s64_v:12772case NEON::BI__builtin_neon_vcvtn_u64_v:12773case NEON::BI__builtin_neon_vcvtnq_u64_v: {12774Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;12775llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };12776return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");12777}12778case NEON::BI__builtin_neon_vcvtp_s16_f16:12779case NEON::BI__builtin_neon_vcvtp_s32_v:12780case NEON::BI__builtin_neon_vcvtpq_s16_f16:12781case NEON::BI__builtin_neon_vcvtpq_s32_v:12782case NEON::BI__builtin_neon_vcvtp_u16_f16:12783case NEON::BI__builtin_neon_vcvtp_u32_v:12784case NEON::BI__builtin_neon_vcvtpq_u16_f16:12785case NEON::BI__builtin_neon_vcvtpq_u32_v:12786case NEON::BI__builtin_neon_vcvtp_s64_v:12787case NEON::BI__builtin_neon_vcvtpq_s64_v:12788case NEON::BI__builtin_neon_vcvtp_u64_v:12789case NEON::BI__builtin_neon_vcvtpq_u64_v: {12790Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;12791llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };12792return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");12793}12794case NEON::BI__builtin_neon_vmulx_v:12795case NEON::BI__builtin_neon_vmulxq_v: {12796Int = Intrinsic::aarch64_neon_fmulx;12797return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");12798}12799case NEON::BI__builtin_neon_vmulxh_lane_f16:12800case NEON::BI__builtin_neon_vmulxh_laneq_f16: {12801// vmulx_lane should be mapped to Neon scalar mulx after12802// extracting the scalar element12803Ops.push_back(EmitScalarExpr(E->getArg(2)));12804Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");12805Ops.pop_back();12806Int = Intrinsic::aarch64_neon_fmulx;12807return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmulx");12808}12809case NEON::BI__builtin_neon_vmul_lane_v:12810case NEON::BI__builtin_neon_vmul_laneq_v: {12811// v1f64 vmul_lane should be mapped to Neon scalar mul lane12812bool Quad = false;12813if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)12814Quad = true;12815Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);12816llvm::FixedVectorType *VTy =12817GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, Quad));12818Ops[1] = Builder.CreateBitCast(Ops[1], VTy);12819Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");12820Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);12821return Builder.CreateBitCast(Result, Ty);12822}12823case NEON::BI__builtin_neon_vnegd_s64:12824return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");12825case NEON::BI__builtin_neon_vnegh_f16:12826return Builder.CreateFNeg(EmitScalarExpr(E->getArg(0)), "vnegh");12827case NEON::BI__builtin_neon_vpmaxnm_v:12828case NEON::BI__builtin_neon_vpmaxnmq_v: {12829Int = Intrinsic::aarch64_neon_fmaxnmp;12830return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");12831}12832case NEON::BI__builtin_neon_vpminnm_v:12833case NEON::BI__builtin_neon_vpminnmq_v: {12834Int = Intrinsic::aarch64_neon_fminnmp;12835return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");12836}12837case NEON::BI__builtin_neon_vsqrth_f16: {12838Ops.push_back(EmitScalarExpr(E->getArg(0)));12839Int = Builder.getIsFPConstrained()12840? Intrinsic::experimental_constrained_sqrt12841: Intrinsic::sqrt;12842return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vsqrt");12843}12844case NEON::BI__builtin_neon_vsqrt_v:12845case NEON::BI__builtin_neon_vsqrtq_v: {12846Int = Builder.getIsFPConstrained()12847? Intrinsic::experimental_constrained_sqrt12848: Intrinsic::sqrt;12849Ops[0] = Builder.CreateBitCast(Ops[0], Ty);12850return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");12851}12852case NEON::BI__builtin_neon_vrbit_v:12853case NEON::BI__builtin_neon_vrbitq_v: {12854Int = Intrinsic::bitreverse;12855return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");12856}12857case NEON::BI__builtin_neon_vaddv_u8:12858// FIXME: These are handled by the AArch64 scalar code.12859usgn = true;12860[[fallthrough]];12861case NEON::BI__builtin_neon_vaddv_s8: {12862Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;12863Ty = Int32Ty;12864VTy = llvm::FixedVectorType::get(Int8Ty, 8);12865llvm::Type *Tys[2] = { Ty, VTy };12866Ops.push_back(EmitScalarExpr(E->getArg(0)));12867Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");12868return Builder.CreateTrunc(Ops[0], Int8Ty);12869}12870case NEON::BI__builtin_neon_vaddv_u16:12871usgn = true;12872[[fallthrough]];12873case NEON::BI__builtin_neon_vaddv_s16: {12874Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;12875Ty = Int32Ty;12876VTy = llvm::FixedVectorType::get(Int16Ty, 4);12877llvm::Type *Tys[2] = { Ty, VTy };12878Ops.push_back(EmitScalarExpr(E->getArg(0)));12879Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");12880return Builder.CreateTrunc(Ops[0], Int16Ty);12881}12882case NEON::BI__builtin_neon_vaddvq_u8:12883usgn = true;12884[[fallthrough]];12885case NEON::BI__builtin_neon_vaddvq_s8: {12886Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;12887Ty = Int32Ty;12888VTy = llvm::FixedVectorType::get(Int8Ty, 16);12889llvm::Type *Tys[2] = { Ty, VTy };12890Ops.push_back(EmitScalarExpr(E->getArg(0)));12891Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");12892return Builder.CreateTrunc(Ops[0], Int8Ty);12893}12894case NEON::BI__builtin_neon_vaddvq_u16:12895usgn = true;12896[[fallthrough]];12897case NEON::BI__builtin_neon_vaddvq_s16: {12898Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;12899Ty = Int32Ty;12900VTy = llvm::FixedVectorType::get(Int16Ty, 8);12901llvm::Type *Tys[2] = { Ty, VTy };12902Ops.push_back(EmitScalarExpr(E->getArg(0)));12903Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");12904return Builder.CreateTrunc(Ops[0], Int16Ty);12905}12906case NEON::BI__builtin_neon_vmaxv_u8: {12907Int = Intrinsic::aarch64_neon_umaxv;12908Ty = Int32Ty;12909VTy = llvm::FixedVectorType::get(Int8Ty, 8);12910llvm::Type *Tys[2] = { Ty, VTy };12911Ops.push_back(EmitScalarExpr(E->getArg(0)));12912Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");12913return Builder.CreateTrunc(Ops[0], Int8Ty);12914}12915case NEON::BI__builtin_neon_vmaxv_u16: {12916Int = Intrinsic::aarch64_neon_umaxv;12917Ty = Int32Ty;12918VTy = llvm::FixedVectorType::get(Int16Ty, 4);12919llvm::Type *Tys[2] = { Ty, VTy };12920Ops.push_back(EmitScalarExpr(E->getArg(0)));12921Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");12922return Builder.CreateTrunc(Ops[0], Int16Ty);12923}12924case NEON::BI__builtin_neon_vmaxvq_u8: {12925Int = Intrinsic::aarch64_neon_umaxv;12926Ty = Int32Ty;12927VTy = llvm::FixedVectorType::get(Int8Ty, 16);12928llvm::Type *Tys[2] = { Ty, VTy };12929Ops.push_back(EmitScalarExpr(E->getArg(0)));12930Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");12931return Builder.CreateTrunc(Ops[0], Int8Ty);12932}12933case NEON::BI__builtin_neon_vmaxvq_u16: {12934Int = Intrinsic::aarch64_neon_umaxv;12935Ty = Int32Ty;12936VTy = llvm::FixedVectorType::get(Int16Ty, 8);12937llvm::Type *Tys[2] = { Ty, VTy };12938Ops.push_back(EmitScalarExpr(E->getArg(0)));12939Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");12940return Builder.CreateTrunc(Ops[0], Int16Ty);12941}12942case NEON::BI__builtin_neon_vmaxv_s8: {12943Int = Intrinsic::aarch64_neon_smaxv;12944Ty = Int32Ty;12945VTy = llvm::FixedVectorType::get(Int8Ty, 8);12946llvm::Type *Tys[2] = { Ty, VTy };12947Ops.push_back(EmitScalarExpr(E->getArg(0)));12948Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");12949return Builder.CreateTrunc(Ops[0], Int8Ty);12950}12951case NEON::BI__builtin_neon_vmaxv_s16: {12952Int = Intrinsic::aarch64_neon_smaxv;12953Ty = Int32Ty;12954VTy = llvm::FixedVectorType::get(Int16Ty, 4);12955llvm::Type *Tys[2] = { Ty, VTy };12956Ops.push_back(EmitScalarExpr(E->getArg(0)));12957Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");12958return Builder.CreateTrunc(Ops[0], Int16Ty);12959}12960case NEON::BI__builtin_neon_vmaxvq_s8: {12961Int = Intrinsic::aarch64_neon_smaxv;12962Ty = Int32Ty;12963VTy = llvm::FixedVectorType::get(Int8Ty, 16);12964llvm::Type *Tys[2] = { Ty, VTy };12965Ops.push_back(EmitScalarExpr(E->getArg(0)));12966Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");12967return Builder.CreateTrunc(Ops[0], Int8Ty);12968}12969case NEON::BI__builtin_neon_vmaxvq_s16: {12970Int = Intrinsic::aarch64_neon_smaxv;12971Ty = Int32Ty;12972VTy = llvm::FixedVectorType::get(Int16Ty, 8);12973llvm::Type *Tys[2] = { Ty, VTy };12974Ops.push_back(EmitScalarExpr(E->getArg(0)));12975Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");12976return Builder.CreateTrunc(Ops[0], Int16Ty);12977}12978case NEON::BI__builtin_neon_vmaxv_f16: {12979Int = Intrinsic::aarch64_neon_fmaxv;12980Ty = HalfTy;12981VTy = llvm::FixedVectorType::get(HalfTy, 4);12982llvm::Type *Tys[2] = { Ty, VTy };12983Ops.push_back(EmitScalarExpr(E->getArg(0)));12984Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");12985return Builder.CreateTrunc(Ops[0], HalfTy);12986}12987case NEON::BI__builtin_neon_vmaxvq_f16: {12988Int = Intrinsic::aarch64_neon_fmaxv;12989Ty = HalfTy;12990VTy = llvm::FixedVectorType::get(HalfTy, 8);12991llvm::Type *Tys[2] = { Ty, VTy };12992Ops.push_back(EmitScalarExpr(E->getArg(0)));12993Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");12994return Builder.CreateTrunc(Ops[0], HalfTy);12995}12996case NEON::BI__builtin_neon_vminv_u8: {12997Int = Intrinsic::aarch64_neon_uminv;12998Ty = Int32Ty;12999VTy = llvm::FixedVectorType::get(Int8Ty, 8);13000llvm::Type *Tys[2] = { Ty, VTy };13001Ops.push_back(EmitScalarExpr(E->getArg(0)));13002Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");13003return Builder.CreateTrunc(Ops[0], Int8Ty);13004}13005case NEON::BI__builtin_neon_vminv_u16: {13006Int = Intrinsic::aarch64_neon_uminv;13007Ty = Int32Ty;13008VTy = llvm::FixedVectorType::get(Int16Ty, 4);13009llvm::Type *Tys[2] = { Ty, VTy };13010Ops.push_back(EmitScalarExpr(E->getArg(0)));13011Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");13012return Builder.CreateTrunc(Ops[0], Int16Ty);13013}13014case NEON::BI__builtin_neon_vminvq_u8: {13015Int = Intrinsic::aarch64_neon_uminv;13016Ty = Int32Ty;13017VTy = llvm::FixedVectorType::get(Int8Ty, 16);13018llvm::Type *Tys[2] = { Ty, VTy };13019Ops.push_back(EmitScalarExpr(E->getArg(0)));13020Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");13021return Builder.CreateTrunc(Ops[0], Int8Ty);13022}13023case NEON::BI__builtin_neon_vminvq_u16: {13024Int = Intrinsic::aarch64_neon_uminv;13025Ty = Int32Ty;13026VTy = llvm::FixedVectorType::get(Int16Ty, 8);13027llvm::Type *Tys[2] = { Ty, VTy };13028Ops.push_back(EmitScalarExpr(E->getArg(0)));13029Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");13030return Builder.CreateTrunc(Ops[0], Int16Ty);13031}13032case NEON::BI__builtin_neon_vminv_s8: {13033Int = Intrinsic::aarch64_neon_sminv;13034Ty = Int32Ty;13035VTy = llvm::FixedVectorType::get(Int8Ty, 8);13036llvm::Type *Tys[2] = { Ty, VTy };13037Ops.push_back(EmitScalarExpr(E->getArg(0)));13038Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");13039return Builder.CreateTrunc(Ops[0], Int8Ty);13040}13041case NEON::BI__builtin_neon_vminv_s16: {13042Int = Intrinsic::aarch64_neon_sminv;13043Ty = Int32Ty;13044VTy = llvm::FixedVectorType::get(Int16Ty, 4);13045llvm::Type *Tys[2] = { Ty, VTy };13046Ops.push_back(EmitScalarExpr(E->getArg(0)));13047Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");13048return Builder.CreateTrunc(Ops[0], Int16Ty);13049}13050case NEON::BI__builtin_neon_vminvq_s8: {13051Int = Intrinsic::aarch64_neon_sminv;13052Ty = Int32Ty;13053VTy = llvm::FixedVectorType::get(Int8Ty, 16);13054llvm::Type *Tys[2] = { Ty, VTy };13055Ops.push_back(EmitScalarExpr(E->getArg(0)));13056Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");13057return Builder.CreateTrunc(Ops[0], Int8Ty);13058}13059case NEON::BI__builtin_neon_vminvq_s16: {13060Int = Intrinsic::aarch64_neon_sminv;13061Ty = Int32Ty;13062VTy = llvm::FixedVectorType::get(Int16Ty, 8);13063llvm::Type *Tys[2] = { Ty, VTy };13064Ops.push_back(EmitScalarExpr(E->getArg(0)));13065Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");13066return Builder.CreateTrunc(Ops[0], Int16Ty);13067}13068case NEON::BI__builtin_neon_vminv_f16: {13069Int = Intrinsic::aarch64_neon_fminv;13070Ty = HalfTy;13071VTy = llvm::FixedVectorType::get(HalfTy, 4);13072llvm::Type *Tys[2] = { Ty, VTy };13073Ops.push_back(EmitScalarExpr(E->getArg(0)));13074Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");13075return Builder.CreateTrunc(Ops[0], HalfTy);13076}13077case NEON::BI__builtin_neon_vminvq_f16: {13078Int = Intrinsic::aarch64_neon_fminv;13079Ty = HalfTy;13080VTy = llvm::FixedVectorType::get(HalfTy, 8);13081llvm::Type *Tys[2] = { Ty, VTy };13082Ops.push_back(EmitScalarExpr(E->getArg(0)));13083Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");13084return Builder.CreateTrunc(Ops[0], HalfTy);13085}13086case NEON::BI__builtin_neon_vmaxnmv_f16: {13087Int = Intrinsic::aarch64_neon_fmaxnmv;13088Ty = HalfTy;13089VTy = llvm::FixedVectorType::get(HalfTy, 4);13090llvm::Type *Tys[2] = { Ty, VTy };13091Ops.push_back(EmitScalarExpr(E->getArg(0)));13092Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");13093return Builder.CreateTrunc(Ops[0], HalfTy);13094}13095case NEON::BI__builtin_neon_vmaxnmvq_f16: {13096Int = Intrinsic::aarch64_neon_fmaxnmv;13097Ty = HalfTy;13098VTy = llvm::FixedVectorType::get(HalfTy, 8);13099llvm::Type *Tys[2] = { Ty, VTy };13100Ops.push_back(EmitScalarExpr(E->getArg(0)));13101Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");13102return Builder.CreateTrunc(Ops[0], HalfTy);13103}13104case NEON::BI__builtin_neon_vminnmv_f16: {13105Int = Intrinsic::aarch64_neon_fminnmv;13106Ty = HalfTy;13107VTy = llvm::FixedVectorType::get(HalfTy, 4);13108llvm::Type *Tys[2] = { Ty, VTy };13109Ops.push_back(EmitScalarExpr(E->getArg(0)));13110Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");13111return Builder.CreateTrunc(Ops[0], HalfTy);13112}13113case NEON::BI__builtin_neon_vminnmvq_f16: {13114Int = Intrinsic::aarch64_neon_fminnmv;13115Ty = HalfTy;13116VTy = llvm::FixedVectorType::get(HalfTy, 8);13117llvm::Type *Tys[2] = { Ty, VTy };13118Ops.push_back(EmitScalarExpr(E->getArg(0)));13119Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");13120return Builder.CreateTrunc(Ops[0], HalfTy);13121}13122case NEON::BI__builtin_neon_vmul_n_f64: {13123Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);13124Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);13125return Builder.CreateFMul(Ops[0], RHS);13126}13127case NEON::BI__builtin_neon_vaddlv_u8: {13128Int = Intrinsic::aarch64_neon_uaddlv;13129Ty = Int32Ty;13130VTy = llvm::FixedVectorType::get(Int8Ty, 8);13131llvm::Type *Tys[2] = { Ty, VTy };13132Ops.push_back(EmitScalarExpr(E->getArg(0)));13133Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");13134return Builder.CreateTrunc(Ops[0], Int16Ty);13135}13136case NEON::BI__builtin_neon_vaddlv_u16: {13137Int = Intrinsic::aarch64_neon_uaddlv;13138Ty = Int32Ty;13139VTy = llvm::FixedVectorType::get(Int16Ty, 4);13140llvm::Type *Tys[2] = { Ty, VTy };13141Ops.push_back(EmitScalarExpr(E->getArg(0)));13142return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");13143}13144case NEON::BI__builtin_neon_vaddlvq_u8: {13145Int = Intrinsic::aarch64_neon_uaddlv;13146Ty = Int32Ty;13147VTy = llvm::FixedVectorType::get(Int8Ty, 16);13148llvm::Type *Tys[2] = { Ty, VTy };13149Ops.push_back(EmitScalarExpr(E->getArg(0)));13150Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");13151return Builder.CreateTrunc(Ops[0], Int16Ty);13152}13153case NEON::BI__builtin_neon_vaddlvq_u16: {13154Int = Intrinsic::aarch64_neon_uaddlv;13155Ty = Int32Ty;13156VTy = llvm::FixedVectorType::get(Int16Ty, 8);13157llvm::Type *Tys[2] = { Ty, VTy };13158Ops.push_back(EmitScalarExpr(E->getArg(0)));13159return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");13160}13161case NEON::BI__builtin_neon_vaddlv_s8: {13162Int = Intrinsic::aarch64_neon_saddlv;13163Ty = Int32Ty;13164VTy = llvm::FixedVectorType::get(Int8Ty, 8);13165llvm::Type *Tys[2] = { Ty, VTy };13166Ops.push_back(EmitScalarExpr(E->getArg(0)));13167Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");13168return Builder.CreateTrunc(Ops[0], Int16Ty);13169}13170case NEON::BI__builtin_neon_vaddlv_s16: {13171Int = Intrinsic::aarch64_neon_saddlv;13172Ty = Int32Ty;13173VTy = llvm::FixedVectorType::get(Int16Ty, 4);13174llvm::Type *Tys[2] = { Ty, VTy };13175Ops.push_back(EmitScalarExpr(E->getArg(0)));13176return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");13177}13178case NEON::BI__builtin_neon_vaddlvq_s8: {13179Int = Intrinsic::aarch64_neon_saddlv;13180Ty = Int32Ty;13181VTy = llvm::FixedVectorType::get(Int8Ty, 16);13182llvm::Type *Tys[2] = { Ty, VTy };13183Ops.push_back(EmitScalarExpr(E->getArg(0)));13184Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");13185return Builder.CreateTrunc(Ops[0], Int16Ty);13186}13187case NEON::BI__builtin_neon_vaddlvq_s16: {13188Int = Intrinsic::aarch64_neon_saddlv;13189Ty = Int32Ty;13190VTy = llvm::FixedVectorType::get(Int16Ty, 8);13191llvm::Type *Tys[2] = { Ty, VTy };13192Ops.push_back(EmitScalarExpr(E->getArg(0)));13193return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");13194}13195case NEON::BI__builtin_neon_vsri_n_v:13196case NEON::BI__builtin_neon_vsriq_n_v: {13197Int = Intrinsic::aarch64_neon_vsri;13198llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);13199return EmitNeonCall(Intrin, Ops, "vsri_n");13200}13201case NEON::BI__builtin_neon_vsli_n_v:13202case NEON::BI__builtin_neon_vsliq_n_v: {13203Int = Intrinsic::aarch64_neon_vsli;13204llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);13205return EmitNeonCall(Intrin, Ops, "vsli_n");13206}13207case NEON::BI__builtin_neon_vsra_n_v:13208case NEON::BI__builtin_neon_vsraq_n_v:13209Ops[0] = Builder.CreateBitCast(Ops[0], Ty);13210Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");13211return Builder.CreateAdd(Ops[0], Ops[1]);13212case NEON::BI__builtin_neon_vrsra_n_v:13213case NEON::BI__builtin_neon_vrsraq_n_v: {13214Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;13215SmallVector<llvm::Value*,2> TmpOps;13216TmpOps.push_back(Ops[1]);13217TmpOps.push_back(Ops[2]);13218Function* F = CGM.getIntrinsic(Int, Ty);13219llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);13220Ops[0] = Builder.CreateBitCast(Ops[0], VTy);13221return Builder.CreateAdd(Ops[0], tmp);13222}13223case NEON::BI__builtin_neon_vld1_v:13224case NEON::BI__builtin_neon_vld1q_v: {13225return Builder.CreateAlignedLoad(VTy, Ops[0], PtrOp0.getAlignment());13226}13227case NEON::BI__builtin_neon_vst1_v:13228case NEON::BI__builtin_neon_vst1q_v:13229Ops[1] = Builder.CreateBitCast(Ops[1], VTy);13230return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());13231case NEON::BI__builtin_neon_vld1_lane_v:13232case NEON::BI__builtin_neon_vld1q_lane_v: {13233Ops[1] = Builder.CreateBitCast(Ops[1], Ty);13234Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],13235PtrOp0.getAlignment());13236return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");13237}13238case NEON::BI__builtin_neon_vldap1_lane_s64:13239case NEON::BI__builtin_neon_vldap1q_lane_s64: {13240Ops[1] = Builder.CreateBitCast(Ops[1], Ty);13241llvm::LoadInst *LI = Builder.CreateAlignedLoad(13242VTy->getElementType(), Ops[0], PtrOp0.getAlignment());13243LI->setAtomic(llvm::AtomicOrdering::Acquire);13244Ops[0] = LI;13245return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vldap1_lane");13246}13247case NEON::BI__builtin_neon_vld1_dup_v:13248case NEON::BI__builtin_neon_vld1q_dup_v: {13249Value *V = PoisonValue::get(Ty);13250Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0],13251PtrOp0.getAlignment());13252llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);13253Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);13254return EmitNeonSplat(Ops[0], CI);13255}13256case NEON::BI__builtin_neon_vst1_lane_v:13257case NEON::BI__builtin_neon_vst1q_lane_v:13258Ops[1] = Builder.CreateBitCast(Ops[1], Ty);13259Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);13260return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());13261case NEON::BI__builtin_neon_vstl1_lane_s64:13262case NEON::BI__builtin_neon_vstl1q_lane_s64: {13263Ops[1] = Builder.CreateBitCast(Ops[1], Ty);13264Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);13265llvm::StoreInst *SI =13266Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment());13267SI->setAtomic(llvm::AtomicOrdering::Release);13268return SI;13269}13270case NEON::BI__builtin_neon_vld2_v:13271case NEON::BI__builtin_neon_vld2q_v: {13272llvm::Type *Tys[2] = {VTy, UnqualPtrTy};13273Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);13274Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");13275return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);13276}13277case NEON::BI__builtin_neon_vld3_v:13278case NEON::BI__builtin_neon_vld3q_v: {13279llvm::Type *Tys[2] = {VTy, UnqualPtrTy};13280Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);13281Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");13282return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);13283}13284case NEON::BI__builtin_neon_vld4_v:13285case NEON::BI__builtin_neon_vld4q_v: {13286llvm::Type *Tys[2] = {VTy, UnqualPtrTy};13287Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);13288Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");13289return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);13290}13291case NEON::BI__builtin_neon_vld2_dup_v:13292case NEON::BI__builtin_neon_vld2q_dup_v: {13293llvm::Type *Tys[2] = {VTy, UnqualPtrTy};13294Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);13295Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");13296return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);13297}13298case NEON::BI__builtin_neon_vld3_dup_v:13299case NEON::BI__builtin_neon_vld3q_dup_v: {13300llvm::Type *Tys[2] = {VTy, UnqualPtrTy};13301Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);13302Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");13303return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);13304}13305case NEON::BI__builtin_neon_vld4_dup_v:13306case NEON::BI__builtin_neon_vld4q_dup_v: {13307llvm::Type *Tys[2] = {VTy, UnqualPtrTy};13308Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);13309Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");13310return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);13311}13312case NEON::BI__builtin_neon_vld2_lane_v:13313case NEON::BI__builtin_neon_vld2q_lane_v: {13314llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };13315Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);13316std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());13317Ops[1] = Builder.CreateBitCast(Ops[1], Ty);13318Ops[2] = Builder.CreateBitCast(Ops[2], Ty);13319Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);13320Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld2_lane");13321return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);13322}13323case NEON::BI__builtin_neon_vld3_lane_v:13324case NEON::BI__builtin_neon_vld3q_lane_v: {13325llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };13326Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);13327std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());13328Ops[1] = Builder.CreateBitCast(Ops[1], Ty);13329Ops[2] = Builder.CreateBitCast(Ops[2], Ty);13330Ops[3] = Builder.CreateBitCast(Ops[3], Ty);13331Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);13332Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld3_lane");13333return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);13334}13335case NEON::BI__builtin_neon_vld4_lane_v:13336case NEON::BI__builtin_neon_vld4q_lane_v: {13337llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };13338Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);13339std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end());13340Ops[1] = Builder.CreateBitCast(Ops[1], Ty);13341Ops[2] = Builder.CreateBitCast(Ops[2], Ty);13342Ops[3] = Builder.CreateBitCast(Ops[3], Ty);13343Ops[4] = Builder.CreateBitCast(Ops[4], Ty);13344Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);13345Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld4_lane");13346return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);13347}13348case NEON::BI__builtin_neon_vst2_v:13349case NEON::BI__builtin_neon_vst2q_v: {13350std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());13351llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };13352return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),13353Ops, "");13354}13355case NEON::BI__builtin_neon_vst2_lane_v:13356case NEON::BI__builtin_neon_vst2q_lane_v: {13357std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());13358Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);13359llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };13360return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),13361Ops, "");13362}13363case NEON::BI__builtin_neon_vst3_v:13364case NEON::BI__builtin_neon_vst3q_v: {13365std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());13366llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };13367return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),13368Ops, "");13369}13370case NEON::BI__builtin_neon_vst3_lane_v:13371case NEON::BI__builtin_neon_vst3q_lane_v: {13372std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());13373Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);13374llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };13375return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),13376Ops, "");13377}13378case NEON::BI__builtin_neon_vst4_v:13379case NEON::BI__builtin_neon_vst4q_v: {13380std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());13381llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };13382return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),13383Ops, "");13384}13385case NEON::BI__builtin_neon_vst4_lane_v:13386case NEON::BI__builtin_neon_vst4q_lane_v: {13387std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());13388Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);13389llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };13390return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),13391Ops, "");13392}13393case NEON::BI__builtin_neon_vtrn_v:13394case NEON::BI__builtin_neon_vtrnq_v: {13395Ops[1] = Builder.CreateBitCast(Ops[1], Ty);13396Ops[2] = Builder.CreateBitCast(Ops[2], Ty);13397Value *SV = nullptr;1339813399for (unsigned vi = 0; vi != 2; ++vi) {13400SmallVector<int, 16> Indices;13401for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {13402Indices.push_back(i+vi);13403Indices.push_back(i+e+vi);13404}13405Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);13406SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");13407SV = Builder.CreateDefaultAlignedStore(SV, Addr);13408}13409return SV;13410}13411case NEON::BI__builtin_neon_vuzp_v:13412case NEON::BI__builtin_neon_vuzpq_v: {13413Ops[1] = Builder.CreateBitCast(Ops[1], Ty);13414Ops[2] = Builder.CreateBitCast(Ops[2], Ty);13415Value *SV = nullptr;1341613417for (unsigned vi = 0; vi != 2; ++vi) {13418SmallVector<int, 16> Indices;13419for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)13420Indices.push_back(2*i+vi);1342113422Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);13423SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");13424SV = Builder.CreateDefaultAlignedStore(SV, Addr);13425}13426return SV;13427}13428case NEON::BI__builtin_neon_vzip_v:13429case NEON::BI__builtin_neon_vzipq_v: {13430Ops[1] = Builder.CreateBitCast(Ops[1], Ty);13431Ops[2] = Builder.CreateBitCast(Ops[2], Ty);13432Value *SV = nullptr;1343313434for (unsigned vi = 0; vi != 2; ++vi) {13435SmallVector<int, 16> Indices;13436for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {13437Indices.push_back((i + vi*e) >> 1);13438Indices.push_back(((i + vi*e) >> 1)+e);13439}13440Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);13441SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");13442SV = Builder.CreateDefaultAlignedStore(SV, Addr);13443}13444return SV;13445}13446case NEON::BI__builtin_neon_vqtbl1q_v: {13447return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),13448Ops, "vtbl1");13449}13450case NEON::BI__builtin_neon_vqtbl2q_v: {13451return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),13452Ops, "vtbl2");13453}13454case NEON::BI__builtin_neon_vqtbl3q_v: {13455return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),13456Ops, "vtbl3");13457}13458case NEON::BI__builtin_neon_vqtbl4q_v: {13459return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),13460Ops, "vtbl4");13461}13462case NEON::BI__builtin_neon_vqtbx1q_v: {13463return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),13464Ops, "vtbx1");13465}13466case NEON::BI__builtin_neon_vqtbx2q_v: {13467return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),13468Ops, "vtbx2");13469}13470case NEON::BI__builtin_neon_vqtbx3q_v: {13471return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),13472Ops, "vtbx3");13473}13474case NEON::BI__builtin_neon_vqtbx4q_v: {13475return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),13476Ops, "vtbx4");13477}13478case NEON::BI__builtin_neon_vsqadd_v:13479case NEON::BI__builtin_neon_vsqaddq_v: {13480Int = Intrinsic::aarch64_neon_usqadd;13481return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");13482}13483case NEON::BI__builtin_neon_vuqadd_v:13484case NEON::BI__builtin_neon_vuqaddq_v: {13485Int = Intrinsic::aarch64_neon_suqadd;13486return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");13487}13488}13489}1349013491Value *CodeGenFunction::EmitBPFBuiltinExpr(unsigned BuiltinID,13492const CallExpr *E) {13493assert((BuiltinID == BPF::BI__builtin_preserve_field_info ||13494BuiltinID == BPF::BI__builtin_btf_type_id ||13495BuiltinID == BPF::BI__builtin_preserve_type_info ||13496BuiltinID == BPF::BI__builtin_preserve_enum_value) &&13497"unexpected BPF builtin");1349813499// A sequence number, injected into IR builtin functions, to13500// prevent CSE given the only difference of the function13501// may just be the debuginfo metadata.13502static uint32_t BuiltinSeqNum;1350313504switch (BuiltinID) {13505default:13506llvm_unreachable("Unexpected BPF builtin");13507case BPF::BI__builtin_preserve_field_info: {13508const Expr *Arg = E->getArg(0);13509bool IsBitField = Arg->IgnoreParens()->getObjectKind() == OK_BitField;1351013511if (!getDebugInfo()) {13512CGM.Error(E->getExprLoc(),13513"using __builtin_preserve_field_info() without -g");13514return IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this)13515: EmitLValue(Arg).emitRawPointer(*this);13516}1351713518// Enable underlying preserve_*_access_index() generation.13519bool OldIsInPreservedAIRegion = IsInPreservedAIRegion;13520IsInPreservedAIRegion = true;13521Value *FieldAddr = IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this)13522: EmitLValue(Arg).emitRawPointer(*this);13523IsInPreservedAIRegion = OldIsInPreservedAIRegion;1352413525ConstantInt *C = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));13526Value *InfoKind = ConstantInt::get(Int64Ty, C->getSExtValue());1352713528// Built the IR for the preserve_field_info intrinsic.13529llvm::Function *FnGetFieldInfo = llvm::Intrinsic::getDeclaration(13530&CGM.getModule(), llvm::Intrinsic::bpf_preserve_field_info,13531{FieldAddr->getType()});13532return Builder.CreateCall(FnGetFieldInfo, {FieldAddr, InfoKind});13533}13534case BPF::BI__builtin_btf_type_id:13535case BPF::BI__builtin_preserve_type_info: {13536if (!getDebugInfo()) {13537CGM.Error(E->getExprLoc(), "using builtin function without -g");13538return nullptr;13539}1354013541const Expr *Arg0 = E->getArg(0);13542llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(13543Arg0->getType(), Arg0->getExprLoc());1354413545ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));13546Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());13547Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);1354813549llvm::Function *FnDecl;13550if (BuiltinID == BPF::BI__builtin_btf_type_id)13551FnDecl = llvm::Intrinsic::getDeclaration(13552&CGM.getModule(), llvm::Intrinsic::bpf_btf_type_id, {});13553else13554FnDecl = llvm::Intrinsic::getDeclaration(13555&CGM.getModule(), llvm::Intrinsic::bpf_preserve_type_info, {});13556CallInst *Fn = Builder.CreateCall(FnDecl, {SeqNumVal, FlagValue});13557Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);13558return Fn;13559}13560case BPF::BI__builtin_preserve_enum_value: {13561if (!getDebugInfo()) {13562CGM.Error(E->getExprLoc(), "using builtin function without -g");13563return nullptr;13564}1356513566const Expr *Arg0 = E->getArg(0);13567llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType(13568Arg0->getType(), Arg0->getExprLoc());1356913570// Find enumerator13571const auto *UO = cast<UnaryOperator>(Arg0->IgnoreParens());13572const auto *CE = cast<CStyleCastExpr>(UO->getSubExpr());13573const auto *DR = cast<DeclRefExpr>(CE->getSubExpr());13574const auto *Enumerator = cast<EnumConstantDecl>(DR->getDecl());1357513576auto InitVal = Enumerator->getInitVal();13577std::string InitValStr;13578if (InitVal.isNegative() || InitVal > uint64_t(INT64_MAX))13579InitValStr = std::to_string(InitVal.getSExtValue());13580else13581InitValStr = std::to_string(InitVal.getZExtValue());13582std::string EnumStr = Enumerator->getNameAsString() + ":" + InitValStr;13583Value *EnumStrVal = Builder.CreateGlobalStringPtr(EnumStr);1358413585ConstantInt *Flag = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));13586Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue());13587Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++);1358813589llvm::Function *IntrinsicFn = llvm::Intrinsic::getDeclaration(13590&CGM.getModule(), llvm::Intrinsic::bpf_preserve_enum_value, {});13591CallInst *Fn =13592Builder.CreateCall(IntrinsicFn, {SeqNumVal, EnumStrVal, FlagValue});13593Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo);13594return Fn;13595}13596}13597}1359813599llvm::Value *CodeGenFunction::13600BuildVector(ArrayRef<llvm::Value*> Ops) {13601assert((Ops.size() & (Ops.size() - 1)) == 0 &&13602"Not a power-of-two sized vector!");13603bool AllConstants = true;13604for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)13605AllConstants &= isa<Constant>(Ops[i]);1360613607// If this is a constant vector, create a ConstantVector.13608if (AllConstants) {13609SmallVector<llvm::Constant*, 16> CstOps;13610for (unsigned i = 0, e = Ops.size(); i != e; ++i)13611CstOps.push_back(cast<Constant>(Ops[i]));13612return llvm::ConstantVector::get(CstOps);13613}1361413615// Otherwise, insertelement the values to build the vector.13616Value *Result = llvm::PoisonValue::get(13617llvm::FixedVectorType::get(Ops[0]->getType(), Ops.size()));1361813619for (unsigned i = 0, e = Ops.size(); i != e; ++i)13620Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt64(i));1362113622return Result;13623}1362413625// Convert the mask from an integer type to a vector of i1.13626static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask,13627unsigned NumElts) {1362813629auto *MaskTy = llvm::FixedVectorType::get(13630CGF.Builder.getInt1Ty(),13631cast<IntegerType>(Mask->getType())->getBitWidth());13632Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy);1363313634// If we have less than 8 elements, then the starting mask was an i8 and13635// we need to extract down to the right number of elements.13636if (NumElts < 8) {13637int Indices[4];13638for (unsigned i = 0; i != NumElts; ++i)13639Indices[i] = i;13640MaskVec = CGF.Builder.CreateShuffleVector(13641MaskVec, MaskVec, ArrayRef(Indices, NumElts), "extract");13642}13643return MaskVec;13644}1364513646static Value *EmitX86MaskedStore(CodeGenFunction &CGF, ArrayRef<Value *> Ops,13647Align Alignment) {13648Value *Ptr = Ops[0];1364913650Value *MaskVec = getMaskVecValue(13651CGF, Ops[2],13652cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements());1365313654return CGF.Builder.CreateMaskedStore(Ops[1], Ptr, Alignment, MaskVec);13655}1365613657static Value *EmitX86MaskedLoad(CodeGenFunction &CGF, ArrayRef<Value *> Ops,13658Align Alignment) {13659llvm::Type *Ty = Ops[1]->getType();13660Value *Ptr = Ops[0];1366113662Value *MaskVec = getMaskVecValue(13663CGF, Ops[2], cast<llvm::FixedVectorType>(Ty)->getNumElements());1366413665return CGF.Builder.CreateMaskedLoad(Ty, Ptr, Alignment, MaskVec, Ops[1]);13666}1366713668static Value *EmitX86ExpandLoad(CodeGenFunction &CGF,13669ArrayRef<Value *> Ops) {13670auto *ResultTy = cast<llvm::VectorType>(Ops[1]->getType());13671Value *Ptr = Ops[0];1367213673Value *MaskVec = getMaskVecValue(13674CGF, Ops[2], cast<FixedVectorType>(ResultTy)->getNumElements());1367513676llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_expandload,13677ResultTy);13678return CGF.Builder.CreateCall(F, { Ptr, MaskVec, Ops[1] });13679}1368013681static Value *EmitX86CompressExpand(CodeGenFunction &CGF,13682ArrayRef<Value *> Ops,13683bool IsCompress) {13684auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType());1368513686Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements());1368713688Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress13689: Intrinsic::x86_avx512_mask_expand;13690llvm::Function *F = CGF.CGM.getIntrinsic(IID, ResultTy);13691return CGF.Builder.CreateCall(F, { Ops[0], Ops[1], MaskVec });13692}1369313694static Value *EmitX86CompressStore(CodeGenFunction &CGF,13695ArrayRef<Value *> Ops) {13696auto *ResultTy = cast<llvm::FixedVectorType>(Ops[1]->getType());13697Value *Ptr = Ops[0];1369813699Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements());1370013701llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_compressstore,13702ResultTy);13703return CGF.Builder.CreateCall(F, { Ops[1], Ptr, MaskVec });13704}1370513706static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc,13707ArrayRef<Value *> Ops,13708bool InvertLHS = false) {13709unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();13710Value *LHS = getMaskVecValue(CGF, Ops[0], NumElts);13711Value *RHS = getMaskVecValue(CGF, Ops[1], NumElts);1371213713if (InvertLHS)13714LHS = CGF.Builder.CreateNot(LHS);1371513716return CGF.Builder.CreateBitCast(CGF.Builder.CreateBinOp(Opc, LHS, RHS),13717Ops[0]->getType());13718}1371913720static Value *EmitX86FunnelShift(CodeGenFunction &CGF, Value *Op0, Value *Op1,13721Value *Amt, bool IsRight) {13722llvm::Type *Ty = Op0->getType();1372313724// Amount may be scalar immediate, in which case create a splat vector.13725// Funnel shifts amounts are treated as modulo and types are all power-of-2 so13726// we only care about the lowest log2 bits anyway.13727if (Amt->getType() != Ty) {13728unsigned NumElts = cast<llvm::FixedVectorType>(Ty)->getNumElements();13729Amt = CGF.Builder.CreateIntCast(Amt, Ty->getScalarType(), false);13730Amt = CGF.Builder.CreateVectorSplat(NumElts, Amt);13731}1373213733unsigned IID = IsRight ? Intrinsic::fshr : Intrinsic::fshl;13734Function *F = CGF.CGM.getIntrinsic(IID, Ty);13735return CGF.Builder.CreateCall(F, {Op0, Op1, Amt});13736}1373713738static Value *EmitX86vpcom(CodeGenFunction &CGF, ArrayRef<Value *> Ops,13739bool IsSigned) {13740Value *Op0 = Ops[0];13741Value *Op1 = Ops[1];13742llvm::Type *Ty = Op0->getType();13743uint64_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;1374413745CmpInst::Predicate Pred;13746switch (Imm) {13747case 0x0:13748Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;13749break;13750case 0x1:13751Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;13752break;13753case 0x2:13754Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;13755break;13756case 0x3:13757Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;13758break;13759case 0x4:13760Pred = ICmpInst::ICMP_EQ;13761break;13762case 0x5:13763Pred = ICmpInst::ICMP_NE;13764break;13765case 0x6:13766return llvm::Constant::getNullValue(Ty); // FALSE13767case 0x7:13768return llvm::Constant::getAllOnesValue(Ty); // TRUE13769default:13770llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate");13771}1377213773Value *Cmp = CGF.Builder.CreateICmp(Pred, Op0, Op1);13774Value *Res = CGF.Builder.CreateSExt(Cmp, Ty);13775return Res;13776}1377713778static Value *EmitX86Select(CodeGenFunction &CGF,13779Value *Mask, Value *Op0, Value *Op1) {1378013781// If the mask is all ones just return first argument.13782if (const auto *C = dyn_cast<Constant>(Mask))13783if (C->isAllOnesValue())13784return Op0;1378513786Mask = getMaskVecValue(13787CGF, Mask, cast<llvm::FixedVectorType>(Op0->getType())->getNumElements());1378813789return CGF.Builder.CreateSelect(Mask, Op0, Op1);13790}1379113792static Value *EmitX86ScalarSelect(CodeGenFunction &CGF,13793Value *Mask, Value *Op0, Value *Op1) {13794// If the mask is all ones just return first argument.13795if (const auto *C = dyn_cast<Constant>(Mask))13796if (C->isAllOnesValue())13797return Op0;1379813799auto *MaskTy = llvm::FixedVectorType::get(13800CGF.Builder.getInt1Ty(), Mask->getType()->getIntegerBitWidth());13801Mask = CGF.Builder.CreateBitCast(Mask, MaskTy);13802Mask = CGF.Builder.CreateExtractElement(Mask, (uint64_t)0);13803return CGF.Builder.CreateSelect(Mask, Op0, Op1);13804}1380513806static Value *EmitX86MaskedCompareResult(CodeGenFunction &CGF, Value *Cmp,13807unsigned NumElts, Value *MaskIn) {13808if (MaskIn) {13809const auto *C = dyn_cast<Constant>(MaskIn);13810if (!C || !C->isAllOnesValue())13811Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, MaskIn, NumElts));13812}1381313814if (NumElts < 8) {13815int Indices[8];13816for (unsigned i = 0; i != NumElts; ++i)13817Indices[i] = i;13818for (unsigned i = NumElts; i != 8; ++i)13819Indices[i] = i % NumElts + NumElts;13820Cmp = CGF.Builder.CreateShuffleVector(13821Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);13822}1382313824return CGF.Builder.CreateBitCast(Cmp,13825IntegerType::get(CGF.getLLVMContext(),13826std::max(NumElts, 8U)));13827}1382813829static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC,13830bool Signed, ArrayRef<Value *> Ops) {13831assert((Ops.size() == 2 || Ops.size() == 4) &&13832"Unexpected number of arguments");13833unsigned NumElts =13834cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();13835Value *Cmp;1383613837if (CC == 3) {13838Cmp = Constant::getNullValue(13839llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));13840} else if (CC == 7) {13841Cmp = Constant::getAllOnesValue(13842llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));13843} else {13844ICmpInst::Predicate Pred;13845switch (CC) {13846default: llvm_unreachable("Unknown condition code");13847case 0: Pred = ICmpInst::ICMP_EQ; break;13848case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;13849case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;13850case 4: Pred = ICmpInst::ICMP_NE; break;13851case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;13852case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;13853}13854Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);13855}1385613857Value *MaskIn = nullptr;13858if (Ops.size() == 4)13859MaskIn = Ops[3];1386013861return EmitX86MaskedCompareResult(CGF, Cmp, NumElts, MaskIn);13862}1386313864static Value *EmitX86ConvertToMask(CodeGenFunction &CGF, Value *In) {13865Value *Zero = Constant::getNullValue(In->getType());13866return EmitX86MaskedCompare(CGF, 1, true, { In, Zero });13867}1386813869static Value *EmitX86ConvertIntToFp(CodeGenFunction &CGF, const CallExpr *E,13870ArrayRef<Value *> Ops, bool IsSigned) {13871unsigned Rnd = cast<llvm::ConstantInt>(Ops[3])->getZExtValue();13872llvm::Type *Ty = Ops[1]->getType();1387313874Value *Res;13875if (Rnd != 4) {13876Intrinsic::ID IID = IsSigned ? Intrinsic::x86_avx512_sitofp_round13877: Intrinsic::x86_avx512_uitofp_round;13878Function *F = CGF.CGM.getIntrinsic(IID, { Ty, Ops[0]->getType() });13879Res = CGF.Builder.CreateCall(F, { Ops[0], Ops[3] });13880} else {13881CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);13882Res = IsSigned ? CGF.Builder.CreateSIToFP(Ops[0], Ty)13883: CGF.Builder.CreateUIToFP(Ops[0], Ty);13884}1388513886return EmitX86Select(CGF, Ops[2], Res, Ops[1]);13887}1388813889// Lowers X86 FMA intrinsics to IR.13890static Value *EmitX86FMAExpr(CodeGenFunction &CGF, const CallExpr *E,13891ArrayRef<Value *> Ops, unsigned BuiltinID,13892bool IsAddSub) {1389313894bool Subtract = false;13895Intrinsic::ID IID = Intrinsic::not_intrinsic;13896switch (BuiltinID) {13897default: break;13898case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:13899Subtract = true;13900[[fallthrough]];13901case clang::X86::BI__builtin_ia32_vfmaddph512_mask:13902case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:13903case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:13904IID = llvm::Intrinsic::x86_avx512fp16_vfmadd_ph_512;13905break;13906case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:13907Subtract = true;13908[[fallthrough]];13909case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:13910case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:13911case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:13912IID = llvm::Intrinsic::x86_avx512fp16_vfmaddsub_ph_512;13913break;13914case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:13915Subtract = true;13916[[fallthrough]];13917case clang::X86::BI__builtin_ia32_vfmaddps512_mask:13918case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:13919case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:13920IID = llvm::Intrinsic::x86_avx512_vfmadd_ps_512; break;13921case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:13922Subtract = true;13923[[fallthrough]];13924case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:13925case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:13926case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:13927IID = llvm::Intrinsic::x86_avx512_vfmadd_pd_512; break;13928case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:13929Subtract = true;13930[[fallthrough]];13931case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:13932case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:13933case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:13934IID = llvm::Intrinsic::x86_avx512_vfmaddsub_ps_512;13935break;13936case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:13937Subtract = true;13938[[fallthrough]];13939case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:13940case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:13941case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:13942IID = llvm::Intrinsic::x86_avx512_vfmaddsub_pd_512;13943break;13944}1394513946Value *A = Ops[0];13947Value *B = Ops[1];13948Value *C = Ops[2];1394913950if (Subtract)13951C = CGF.Builder.CreateFNeg(C);1395213953Value *Res;1395413955// Only handle in case of _MM_FROUND_CUR_DIRECTION/4 (no rounding).13956if (IID != Intrinsic::not_intrinsic &&13957(cast<llvm::ConstantInt>(Ops.back())->getZExtValue() != (uint64_t)4 ||13958IsAddSub)) {13959Function *Intr = CGF.CGM.getIntrinsic(IID);13960Res = CGF.Builder.CreateCall(Intr, {A, B, C, Ops.back() });13961} else {13962llvm::Type *Ty = A->getType();13963Function *FMA;13964if (CGF.Builder.getIsFPConstrained()) {13965CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);13966FMA = CGF.CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, Ty);13967Res = CGF.Builder.CreateConstrainedFPCall(FMA, {A, B, C});13968} else {13969FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty);13970Res = CGF.Builder.CreateCall(FMA, {A, B, C});13971}13972}1397313974// Handle any required masking.13975Value *MaskFalseVal = nullptr;13976switch (BuiltinID) {13977case clang::X86::BI__builtin_ia32_vfmaddph512_mask:13978case clang::X86::BI__builtin_ia32_vfmaddps512_mask:13979case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:13980case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:13981case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:13982case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:13983MaskFalseVal = Ops[0];13984break;13985case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:13986case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:13987case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:13988case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:13989case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:13990case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:13991MaskFalseVal = Constant::getNullValue(Ops[0]->getType());13992break;13993case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:13994case clang::X86::BI__builtin_ia32_vfmaddph512_mask3:13995case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:13996case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:13997case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:13998case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:13999case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3:14000case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3:14001case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:14002case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:14003case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:14004case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:14005MaskFalseVal = Ops[2];14006break;14007}1400814009if (MaskFalseVal)14010return EmitX86Select(CGF, Ops[3], Res, MaskFalseVal);1401114012return Res;14013}1401414015static Value *EmitScalarFMAExpr(CodeGenFunction &CGF, const CallExpr *E,14016MutableArrayRef<Value *> Ops, Value *Upper,14017bool ZeroMask = false, unsigned PTIdx = 0,14018bool NegAcc = false) {14019unsigned Rnd = 4;14020if (Ops.size() > 4)14021Rnd = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();1402214023if (NegAcc)14024Ops[2] = CGF.Builder.CreateFNeg(Ops[2]);1402514026Ops[0] = CGF.Builder.CreateExtractElement(Ops[0], (uint64_t)0);14027Ops[1] = CGF.Builder.CreateExtractElement(Ops[1], (uint64_t)0);14028Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], (uint64_t)0);14029Value *Res;14030if (Rnd != 4) {14031Intrinsic::ID IID;1403214033switch (Ops[0]->getType()->getPrimitiveSizeInBits()) {14034case 16:14035IID = Intrinsic::x86_avx512fp16_vfmadd_f16;14036break;14037case 32:14038IID = Intrinsic::x86_avx512_vfmadd_f32;14039break;14040case 64:14041IID = Intrinsic::x86_avx512_vfmadd_f64;14042break;14043default:14044llvm_unreachable("Unexpected size");14045}14046Res = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),14047{Ops[0], Ops[1], Ops[2], Ops[4]});14048} else if (CGF.Builder.getIsFPConstrained()) {14049CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E);14050Function *FMA = CGF.CGM.getIntrinsic(14051Intrinsic::experimental_constrained_fma, Ops[0]->getType());14052Res = CGF.Builder.CreateConstrainedFPCall(FMA, Ops.slice(0, 3));14053} else {14054Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ops[0]->getType());14055Res = CGF.Builder.CreateCall(FMA, Ops.slice(0, 3));14056}14057// If we have more than 3 arguments, we need to do masking.14058if (Ops.size() > 3) {14059Value *PassThru = ZeroMask ? Constant::getNullValue(Res->getType())14060: Ops[PTIdx];1406114062// If we negated the accumulator and the its the PassThru value we need to14063// bypass the negate. Conveniently Upper should be the same thing in this14064// case.14065if (NegAcc && PTIdx == 2)14066PassThru = CGF.Builder.CreateExtractElement(Upper, (uint64_t)0);1406714068Res = EmitX86ScalarSelect(CGF, Ops[3], Res, PassThru);14069}14070return CGF.Builder.CreateInsertElement(Upper, Res, (uint64_t)0);14071}1407214073static Value *EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned,14074ArrayRef<Value *> Ops) {14075llvm::Type *Ty = Ops[0]->getType();14076// Arguments have a vXi32 type so cast to vXi64.14077Ty = llvm::FixedVectorType::get(CGF.Int64Ty,14078Ty->getPrimitiveSizeInBits() / 64);14079Value *LHS = CGF.Builder.CreateBitCast(Ops[0], Ty);14080Value *RHS = CGF.Builder.CreateBitCast(Ops[1], Ty);1408114082if (IsSigned) {14083// Shift left then arithmetic shift right.14084Constant *ShiftAmt = ConstantInt::get(Ty, 32);14085LHS = CGF.Builder.CreateShl(LHS, ShiftAmt);14086LHS = CGF.Builder.CreateAShr(LHS, ShiftAmt);14087RHS = CGF.Builder.CreateShl(RHS, ShiftAmt);14088RHS = CGF.Builder.CreateAShr(RHS, ShiftAmt);14089} else {14090// Clear the upper bits.14091Constant *Mask = ConstantInt::get(Ty, 0xffffffff);14092LHS = CGF.Builder.CreateAnd(LHS, Mask);14093RHS = CGF.Builder.CreateAnd(RHS, Mask);14094}1409514096return CGF.Builder.CreateMul(LHS, RHS);14097}1409814099// Emit a masked pternlog intrinsic. This only exists because the header has to14100// use a macro and we aren't able to pass the input argument to a pternlog14101// builtin and a select builtin without evaluating it twice.14102static Value *EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask,14103ArrayRef<Value *> Ops) {14104llvm::Type *Ty = Ops[0]->getType();1410514106unsigned VecWidth = Ty->getPrimitiveSizeInBits();14107unsigned EltWidth = Ty->getScalarSizeInBits();14108Intrinsic::ID IID;14109if (VecWidth == 128 && EltWidth == 32)14110IID = Intrinsic::x86_avx512_pternlog_d_128;14111else if (VecWidth == 256 && EltWidth == 32)14112IID = Intrinsic::x86_avx512_pternlog_d_256;14113else if (VecWidth == 512 && EltWidth == 32)14114IID = Intrinsic::x86_avx512_pternlog_d_512;14115else if (VecWidth == 128 && EltWidth == 64)14116IID = Intrinsic::x86_avx512_pternlog_q_128;14117else if (VecWidth == 256 && EltWidth == 64)14118IID = Intrinsic::x86_avx512_pternlog_q_256;14119else if (VecWidth == 512 && EltWidth == 64)14120IID = Intrinsic::x86_avx512_pternlog_q_512;14121else14122llvm_unreachable("Unexpected intrinsic");1412314124Value *Ternlog = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),14125Ops.drop_back());14126Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty) : Ops[0];14127return EmitX86Select(CGF, Ops[4], Ternlog, PassThru);14128}1412914130static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op,14131llvm::Type *DstTy) {14132unsigned NumberOfElements =14133cast<llvm::FixedVectorType>(DstTy)->getNumElements();14134Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements);14135return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");14136}1413714138Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) {14139const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();14140StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();14141return EmitX86CpuIs(CPUStr);14142}1414314144// Convert F16 halfs to floats.14145static Value *EmitX86CvtF16ToFloatExpr(CodeGenFunction &CGF,14146ArrayRef<Value *> Ops,14147llvm::Type *DstTy) {14148assert((Ops.size() == 1 || Ops.size() == 3 || Ops.size() == 4) &&14149"Unknown cvtph2ps intrinsic");1415014151// If the SAE intrinsic doesn't use default rounding then we can't upgrade.14152if (Ops.size() == 4 && cast<llvm::ConstantInt>(Ops[3])->getZExtValue() != 4) {14153Function *F =14154CGF.CGM.getIntrinsic(Intrinsic::x86_avx512_mask_vcvtph2ps_512);14155return CGF.Builder.CreateCall(F, {Ops[0], Ops[1], Ops[2], Ops[3]});14156}1415714158unsigned NumDstElts = cast<llvm::FixedVectorType>(DstTy)->getNumElements();14159Value *Src = Ops[0];1416014161// Extract the subvector.14162if (NumDstElts !=14163cast<llvm::FixedVectorType>(Src->getType())->getNumElements()) {14164assert(NumDstElts == 4 && "Unexpected vector size");14165Src = CGF.Builder.CreateShuffleVector(Src, ArrayRef<int>{0, 1, 2, 3});14166}1416714168// Bitcast from vXi16 to vXf16.14169auto *HalfTy = llvm::FixedVectorType::get(14170llvm::Type::getHalfTy(CGF.getLLVMContext()), NumDstElts);14171Src = CGF.Builder.CreateBitCast(Src, HalfTy);1417214173// Perform the fp-extension.14174Value *Res = CGF.Builder.CreateFPExt(Src, DstTy, "cvtph2ps");1417514176if (Ops.size() >= 3)14177Res = EmitX86Select(CGF, Ops[2], Res, Ops[1]);14178return Res;14179}1418014181Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) {1418214183llvm::Type *Int32Ty = Builder.getInt32Ty();1418414185// Matching the struct layout from the compiler-rt/libgcc structure that is14186// filled in:14187// unsigned int __cpu_vendor;14188// unsigned int __cpu_type;14189// unsigned int __cpu_subtype;14190// unsigned int __cpu_features[1];14191llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,14192llvm::ArrayType::get(Int32Ty, 1));1419314194// Grab the global __cpu_model.14195llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");14196cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);1419714198// Calculate the index needed to access the correct field based on the14199// range. Also adjust the expected value.14200unsigned Index;14201unsigned Value;14202std::tie(Index, Value) = StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)14203#define X86_VENDOR(ENUM, STRING) \14204.Case(STRING, {0u, static_cast<unsigned>(llvm::X86::ENUM)})14205#define X86_CPU_TYPE_ALIAS(ENUM, ALIAS) \14206.Case(ALIAS, {1u, static_cast<unsigned>(llvm::X86::ENUM)})14207#define X86_CPU_TYPE(ENUM, STR) \14208.Case(STR, {1u, static_cast<unsigned>(llvm::X86::ENUM)})14209#define X86_CPU_SUBTYPE_ALIAS(ENUM, ALIAS) \14210.Case(ALIAS, {2u, static_cast<unsigned>(llvm::X86::ENUM)})14211#define X86_CPU_SUBTYPE(ENUM, STR) \14212.Case(STR, {2u, static_cast<unsigned>(llvm::X86::ENUM)})14213#include "llvm/TargetParser/X86TargetParser.def"14214.Default({0, 0});14215assert(Value != 0 && "Invalid CPUStr passed to CpuIs");1421614217// Grab the appropriate field from __cpu_model.14218llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0),14219ConstantInt::get(Int32Ty, Index)};14220llvm::Value *CpuValue = Builder.CreateInBoundsGEP(STy, CpuModel, Idxs);14221CpuValue = Builder.CreateAlignedLoad(Int32Ty, CpuValue,14222CharUnits::fromQuantity(4));1422314224// Check the value of the field against the requested value.14225return Builder.CreateICmpEQ(CpuValue,14226llvm::ConstantInt::get(Int32Ty, Value));14227}1422814229Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) {14230const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();14231StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();14232if (!getContext().getTargetInfo().validateCpuSupports(FeatureStr))14233return Builder.getFalse();14234return EmitX86CpuSupports(FeatureStr);14235}1423614237Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) {14238return EmitX86CpuSupports(llvm::X86::getCpuSupportsMask(FeatureStrs));14239}1424014241llvm::Value *14242CodeGenFunction::EmitX86CpuSupports(std::array<uint32_t, 4> FeatureMask) {14243Value *Result = Builder.getTrue();14244if (FeatureMask[0] != 0) {14245// Matching the struct layout from the compiler-rt/libgcc structure that is14246// filled in:14247// unsigned int __cpu_vendor;14248// unsigned int __cpu_type;14249// unsigned int __cpu_subtype;14250// unsigned int __cpu_features[1];14251llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,14252llvm::ArrayType::get(Int32Ty, 1));1425314254// Grab the global __cpu_model.14255llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");14256cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);1425714258// Grab the first (0th) element from the field __cpu_features off of the14259// global in the struct STy.14260Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(3),14261Builder.getInt32(0)};14262Value *CpuFeatures = Builder.CreateInBoundsGEP(STy, CpuModel, Idxs);14263Value *Features = Builder.CreateAlignedLoad(Int32Ty, CpuFeatures,14264CharUnits::fromQuantity(4));1426514266// Check the value of the bit corresponding to the feature requested.14267Value *Mask = Builder.getInt32(FeatureMask[0]);14268Value *Bitset = Builder.CreateAnd(Features, Mask);14269Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);14270Result = Builder.CreateAnd(Result, Cmp);14271}1427214273llvm::Type *ATy = llvm::ArrayType::get(Int32Ty, 3);14274llvm::Constant *CpuFeatures2 =14275CGM.CreateRuntimeVariable(ATy, "__cpu_features2");14276cast<llvm::GlobalValue>(CpuFeatures2)->setDSOLocal(true);14277for (int i = 1; i != 4; ++i) {14278const uint32_t M = FeatureMask[i];14279if (!M)14280continue;14281Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(i - 1)};14282Value *Features = Builder.CreateAlignedLoad(14283Int32Ty, Builder.CreateInBoundsGEP(ATy, CpuFeatures2, Idxs),14284CharUnits::fromQuantity(4));14285// Check the value of the bit corresponding to the feature requested.14286Value *Mask = Builder.getInt32(M);14287Value *Bitset = Builder.CreateAnd(Features, Mask);14288Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);14289Result = Builder.CreateAnd(Result, Cmp);14290}1429114292return Result;14293}1429414295Value *CodeGenFunction::EmitAArch64CpuInit() {14296llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);14297llvm::FunctionCallee Func =14298CGM.CreateRuntimeFunction(FTy, "__init_cpu_features_resolver");14299cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);14300cast<llvm::GlobalValue>(Func.getCallee())14301->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);14302return Builder.CreateCall(Func);14303}1430414305Value *CodeGenFunction::EmitX86CpuInit() {14306llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy,14307/*Variadic*/ false);14308llvm::FunctionCallee Func =14309CGM.CreateRuntimeFunction(FTy, "__cpu_indicator_init");14310cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);14311cast<llvm::GlobalValue>(Func.getCallee())14312->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);14313return Builder.CreateCall(Func);14314}1431514316Value *CodeGenFunction::EmitAArch64CpuSupports(const CallExpr *E) {14317const Expr *ArgExpr = E->getArg(0)->IgnoreParenCasts();14318StringRef ArgStr = cast<StringLiteral>(ArgExpr)->getString();14319llvm::SmallVector<StringRef, 8> Features;14320ArgStr.split(Features, "+");14321for (auto &Feature : Features) {14322Feature = Feature.trim();14323if (!llvm::AArch64::parseFMVExtension(Feature))14324return Builder.getFalse();14325if (Feature != "default")14326Features.push_back(Feature);14327}14328return EmitAArch64CpuSupports(Features);14329}1433014331llvm::Value *14332CodeGenFunction::EmitAArch64CpuSupports(ArrayRef<StringRef> FeaturesStrs) {14333uint64_t FeaturesMask = llvm::AArch64::getCpuSupportsMask(FeaturesStrs);14334Value *Result = Builder.getTrue();14335if (FeaturesMask != 0) {14336// Get features from structure in runtime library14337// struct {14338// unsigned long long features;14339// } __aarch64_cpu_features;14340llvm::Type *STy = llvm::StructType::get(Int64Ty);14341llvm::Constant *AArch64CPUFeatures =14342CGM.CreateRuntimeVariable(STy, "__aarch64_cpu_features");14343cast<llvm::GlobalValue>(AArch64CPUFeatures)->setDSOLocal(true);14344llvm::Value *CpuFeatures = Builder.CreateGEP(14345STy, AArch64CPUFeatures,14346{ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 0)});14347Value *Features = Builder.CreateAlignedLoad(Int64Ty, CpuFeatures,14348CharUnits::fromQuantity(8));14349Value *Mask = Builder.getInt64(FeaturesMask);14350Value *Bitset = Builder.CreateAnd(Features, Mask);14351Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);14352Result = Builder.CreateAnd(Result, Cmp);14353}14354return Result;14355}1435614357Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,14358const CallExpr *E) {14359if (BuiltinID == Builtin::BI__builtin_cpu_is)14360return EmitX86CpuIs(E);14361if (BuiltinID == Builtin::BI__builtin_cpu_supports)14362return EmitX86CpuSupports(E);14363if (BuiltinID == Builtin::BI__builtin_cpu_init)14364return EmitX86CpuInit();1436514366// Handle MSVC intrinsics before argument evaluation to prevent double14367// evaluation.14368if (std::optional<MSVCIntrin> MsvcIntId = translateX86ToMsvcIntrin(BuiltinID))14369return EmitMSVCBuiltinExpr(*MsvcIntId, E);1437014371SmallVector<Value*, 4> Ops;14372bool IsMaskFCmp = false;14373bool IsConjFMA = false;1437414375// Find out if any arguments are required to be integer constant expressions.14376unsigned ICEArguments = 0;14377ASTContext::GetBuiltinTypeError Error;14378getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);14379assert(Error == ASTContext::GE_None && "Should not codegen an error");1438014381for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {14382Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));14383}1438414385// These exist so that the builtin that takes an immediate can be bounds14386// checked by clang to avoid passing bad immediates to the backend. Since14387// AVX has a larger immediate than SSE we would need separate builtins to14388// do the different bounds checking. Rather than create a clang specific14389// SSE only builtin, this implements eight separate builtins to match gcc14390// implementation.14391auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {14392Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));14393llvm::Function *F = CGM.getIntrinsic(ID);14394return Builder.CreateCall(F, Ops);14395};1439614397// For the vector forms of FP comparisons, translate the builtins directly to14398// IR.14399// TODO: The builtins could be removed if the SSE header files used vector14400// extension comparisons directly (vector ordered/unordered may need14401// additional support via __builtin_isnan()).14402auto getVectorFCmpIR = [this, &Ops, E](CmpInst::Predicate Pred,14403bool IsSignaling) {14404CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);14405Value *Cmp;14406if (IsSignaling)14407Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);14408else14409Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);14410llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());14411llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);14412Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);14413return Builder.CreateBitCast(Sext, FPVecTy);14414};1441514416switch (BuiltinID) {14417default: return nullptr;14418case X86::BI_mm_prefetch: {14419Value *Address = Ops[0];14420ConstantInt *C = cast<ConstantInt>(Ops[1]);14421Value *RW = ConstantInt::get(Int32Ty, (C->getZExtValue() >> 2) & 0x1);14422Value *Locality = ConstantInt::get(Int32Ty, C->getZExtValue() & 0x3);14423Value *Data = ConstantInt::get(Int32Ty, 1);14424Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());14425return Builder.CreateCall(F, {Address, RW, Locality, Data});14426}14427case X86::BI_mm_clflush: {14428return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush),14429Ops[0]);14430}14431case X86::BI_mm_lfence: {14432return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence));14433}14434case X86::BI_mm_mfence: {14435return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence));14436}14437case X86::BI_mm_sfence: {14438return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence));14439}14440case X86::BI_mm_pause: {14441return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause));14442}14443case X86::BI__rdtsc: {14444return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc));14445}14446case X86::BI__builtin_ia32_rdtscp: {14447Value *Call = Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtscp));14448Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),14449Ops[0]);14450return Builder.CreateExtractValue(Call, 0);14451}14452case X86::BI__builtin_ia32_lzcnt_u16:14453case X86::BI__builtin_ia32_lzcnt_u32:14454case X86::BI__builtin_ia32_lzcnt_u64: {14455Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());14456return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});14457}14458case X86::BI__builtin_ia32_tzcnt_u16:14459case X86::BI__builtin_ia32_tzcnt_u32:14460case X86::BI__builtin_ia32_tzcnt_u64: {14461Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType());14462return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});14463}14464case X86::BI__builtin_ia32_undef128:14465case X86::BI__builtin_ia32_undef256:14466case X86::BI__builtin_ia32_undef512:14467// The x86 definition of "undef" is not the same as the LLVM definition14468// (PR32176). We leave optimizing away an unnecessary zero constant to the14469// IR optimizer and backend.14470// TODO: If we had a "freeze" IR instruction to generate a fixed undef14471// value, we should use that here instead of a zero.14472return llvm::Constant::getNullValue(ConvertType(E->getType()));14473case X86::BI__builtin_ia32_vec_init_v8qi:14474case X86::BI__builtin_ia32_vec_init_v4hi:14475case X86::BI__builtin_ia32_vec_init_v2si:14476return Builder.CreateBitCast(BuildVector(Ops),14477llvm::Type::getX86_MMXTy(getLLVMContext()));14478case X86::BI__builtin_ia32_vec_ext_v2si:14479case X86::BI__builtin_ia32_vec_ext_v16qi:14480case X86::BI__builtin_ia32_vec_ext_v8hi:14481case X86::BI__builtin_ia32_vec_ext_v4si:14482case X86::BI__builtin_ia32_vec_ext_v4sf:14483case X86::BI__builtin_ia32_vec_ext_v2di:14484case X86::BI__builtin_ia32_vec_ext_v32qi:14485case X86::BI__builtin_ia32_vec_ext_v16hi:14486case X86::BI__builtin_ia32_vec_ext_v8si:14487case X86::BI__builtin_ia32_vec_ext_v4di: {14488unsigned NumElts =14489cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();14490uint64_t Index = cast<ConstantInt>(Ops[1])->getZExtValue();14491Index &= NumElts - 1;14492// These builtins exist so we can ensure the index is an ICE and in range.14493// Otherwise we could just do this in the header file.14494return Builder.CreateExtractElement(Ops[0], Index);14495}14496case X86::BI__builtin_ia32_vec_set_v16qi:14497case X86::BI__builtin_ia32_vec_set_v8hi:14498case X86::BI__builtin_ia32_vec_set_v4si:14499case X86::BI__builtin_ia32_vec_set_v2di:14500case X86::BI__builtin_ia32_vec_set_v32qi:14501case X86::BI__builtin_ia32_vec_set_v16hi:14502case X86::BI__builtin_ia32_vec_set_v8si:14503case X86::BI__builtin_ia32_vec_set_v4di: {14504unsigned NumElts =14505cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();14506unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();14507Index &= NumElts - 1;14508// These builtins exist so we can ensure the index is an ICE and in range.14509// Otherwise we could just do this in the header file.14510return Builder.CreateInsertElement(Ops[0], Ops[1], Index);14511}14512case X86::BI_mm_setcsr:14513case X86::BI__builtin_ia32_ldmxcsr: {14514RawAddress Tmp = CreateMemTemp(E->getArg(0)->getType());14515Builder.CreateStore(Ops[0], Tmp);14516return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),14517Tmp.getPointer());14518}14519case X86::BI_mm_getcsr:14520case X86::BI__builtin_ia32_stmxcsr: {14521RawAddress Tmp = CreateMemTemp(E->getType());14522Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),14523Tmp.getPointer());14524return Builder.CreateLoad(Tmp, "stmxcsr");14525}14526case X86::BI__builtin_ia32_xsave:14527case X86::BI__builtin_ia32_xsave64:14528case X86::BI__builtin_ia32_xrstor:14529case X86::BI__builtin_ia32_xrstor64:14530case X86::BI__builtin_ia32_xsaveopt:14531case X86::BI__builtin_ia32_xsaveopt64:14532case X86::BI__builtin_ia32_xrstors:14533case X86::BI__builtin_ia32_xrstors64:14534case X86::BI__builtin_ia32_xsavec:14535case X86::BI__builtin_ia32_xsavec64:14536case X86::BI__builtin_ia32_xsaves:14537case X86::BI__builtin_ia32_xsaves64:14538case X86::BI__builtin_ia32_xsetbv:14539case X86::BI_xsetbv: {14540Intrinsic::ID ID;14541#define INTRINSIC_X86_XSAVE_ID(NAME) \14542case X86::BI__builtin_ia32_##NAME: \14543ID = Intrinsic::x86_##NAME; \14544break14545switch (BuiltinID) {14546default: llvm_unreachable("Unsupported intrinsic!");14547INTRINSIC_X86_XSAVE_ID(xsave);14548INTRINSIC_X86_XSAVE_ID(xsave64);14549INTRINSIC_X86_XSAVE_ID(xrstor);14550INTRINSIC_X86_XSAVE_ID(xrstor64);14551INTRINSIC_X86_XSAVE_ID(xsaveopt);14552INTRINSIC_X86_XSAVE_ID(xsaveopt64);14553INTRINSIC_X86_XSAVE_ID(xrstors);14554INTRINSIC_X86_XSAVE_ID(xrstors64);14555INTRINSIC_X86_XSAVE_ID(xsavec);14556INTRINSIC_X86_XSAVE_ID(xsavec64);14557INTRINSIC_X86_XSAVE_ID(xsaves);14558INTRINSIC_X86_XSAVE_ID(xsaves64);14559INTRINSIC_X86_XSAVE_ID(xsetbv);14560case X86::BI_xsetbv:14561ID = Intrinsic::x86_xsetbv;14562break;14563}14564#undef INTRINSIC_X86_XSAVE_ID14565Value *Mhi = Builder.CreateTrunc(14566Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty);14567Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty);14568Ops[1] = Mhi;14569Ops.push_back(Mlo);14570return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);14571}14572case X86::BI__builtin_ia32_xgetbv:14573case X86::BI_xgetbv:14574return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_xgetbv), Ops);14575case X86::BI__builtin_ia32_storedqudi128_mask:14576case X86::BI__builtin_ia32_storedqusi128_mask:14577case X86::BI__builtin_ia32_storedquhi128_mask:14578case X86::BI__builtin_ia32_storedquqi128_mask:14579case X86::BI__builtin_ia32_storeupd128_mask:14580case X86::BI__builtin_ia32_storeups128_mask:14581case X86::BI__builtin_ia32_storedqudi256_mask:14582case X86::BI__builtin_ia32_storedqusi256_mask:14583case X86::BI__builtin_ia32_storedquhi256_mask:14584case X86::BI__builtin_ia32_storedquqi256_mask:14585case X86::BI__builtin_ia32_storeupd256_mask:14586case X86::BI__builtin_ia32_storeups256_mask:14587case X86::BI__builtin_ia32_storedqudi512_mask:14588case X86::BI__builtin_ia32_storedqusi512_mask:14589case X86::BI__builtin_ia32_storedquhi512_mask:14590case X86::BI__builtin_ia32_storedquqi512_mask:14591case X86::BI__builtin_ia32_storeupd512_mask:14592case X86::BI__builtin_ia32_storeups512_mask:14593return EmitX86MaskedStore(*this, Ops, Align(1));1459414595case X86::BI__builtin_ia32_storesh128_mask:14596case X86::BI__builtin_ia32_storess128_mask:14597case X86::BI__builtin_ia32_storesd128_mask:14598return EmitX86MaskedStore(*this, Ops, Align(1));1459914600case X86::BI__builtin_ia32_vpopcntb_128:14601case X86::BI__builtin_ia32_vpopcntd_128:14602case X86::BI__builtin_ia32_vpopcntq_128:14603case X86::BI__builtin_ia32_vpopcntw_128:14604case X86::BI__builtin_ia32_vpopcntb_256:14605case X86::BI__builtin_ia32_vpopcntd_256:14606case X86::BI__builtin_ia32_vpopcntq_256:14607case X86::BI__builtin_ia32_vpopcntw_256:14608case X86::BI__builtin_ia32_vpopcntb_512:14609case X86::BI__builtin_ia32_vpopcntd_512:14610case X86::BI__builtin_ia32_vpopcntq_512:14611case X86::BI__builtin_ia32_vpopcntw_512: {14612llvm::Type *ResultType = ConvertType(E->getType());14613llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);14614return Builder.CreateCall(F, Ops);14615}14616case X86::BI__builtin_ia32_cvtmask2b128:14617case X86::BI__builtin_ia32_cvtmask2b256:14618case X86::BI__builtin_ia32_cvtmask2b512:14619case X86::BI__builtin_ia32_cvtmask2w128:14620case X86::BI__builtin_ia32_cvtmask2w256:14621case X86::BI__builtin_ia32_cvtmask2w512:14622case X86::BI__builtin_ia32_cvtmask2d128:14623case X86::BI__builtin_ia32_cvtmask2d256:14624case X86::BI__builtin_ia32_cvtmask2d512:14625case X86::BI__builtin_ia32_cvtmask2q128:14626case X86::BI__builtin_ia32_cvtmask2q256:14627case X86::BI__builtin_ia32_cvtmask2q512:14628return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType()));1462914630case X86::BI__builtin_ia32_cvtb2mask128:14631case X86::BI__builtin_ia32_cvtb2mask256:14632case X86::BI__builtin_ia32_cvtb2mask512:14633case X86::BI__builtin_ia32_cvtw2mask128:14634case X86::BI__builtin_ia32_cvtw2mask256:14635case X86::BI__builtin_ia32_cvtw2mask512:14636case X86::BI__builtin_ia32_cvtd2mask128:14637case X86::BI__builtin_ia32_cvtd2mask256:14638case X86::BI__builtin_ia32_cvtd2mask512:14639case X86::BI__builtin_ia32_cvtq2mask128:14640case X86::BI__builtin_ia32_cvtq2mask256:14641case X86::BI__builtin_ia32_cvtq2mask512:14642return EmitX86ConvertToMask(*this, Ops[0]);1464314644case X86::BI__builtin_ia32_cvtdq2ps512_mask:14645case X86::BI__builtin_ia32_cvtqq2ps512_mask:14646case X86::BI__builtin_ia32_cvtqq2pd512_mask:14647case X86::BI__builtin_ia32_vcvtw2ph512_mask:14648case X86::BI__builtin_ia32_vcvtdq2ph512_mask:14649case X86::BI__builtin_ia32_vcvtqq2ph512_mask:14650return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ true);14651case X86::BI__builtin_ia32_cvtudq2ps512_mask:14652case X86::BI__builtin_ia32_cvtuqq2ps512_mask:14653case X86::BI__builtin_ia32_cvtuqq2pd512_mask:14654case X86::BI__builtin_ia32_vcvtuw2ph512_mask:14655case X86::BI__builtin_ia32_vcvtudq2ph512_mask:14656case X86::BI__builtin_ia32_vcvtuqq2ph512_mask:14657return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ false);1465814659case X86::BI__builtin_ia32_vfmaddss3:14660case X86::BI__builtin_ia32_vfmaddsd3:14661case X86::BI__builtin_ia32_vfmaddsh3_mask:14662case X86::BI__builtin_ia32_vfmaddss3_mask:14663case X86::BI__builtin_ia32_vfmaddsd3_mask:14664return EmitScalarFMAExpr(*this, E, Ops, Ops[0]);14665case X86::BI__builtin_ia32_vfmaddss:14666case X86::BI__builtin_ia32_vfmaddsd:14667return EmitScalarFMAExpr(*this, E, Ops,14668Constant::getNullValue(Ops[0]->getType()));14669case X86::BI__builtin_ia32_vfmaddsh3_maskz:14670case X86::BI__builtin_ia32_vfmaddss3_maskz:14671case X86::BI__builtin_ia32_vfmaddsd3_maskz:14672return EmitScalarFMAExpr(*this, E, Ops, Ops[0], /*ZeroMask*/ true);14673case X86::BI__builtin_ia32_vfmaddsh3_mask3:14674case X86::BI__builtin_ia32_vfmaddss3_mask3:14675case X86::BI__builtin_ia32_vfmaddsd3_mask3:14676return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2);14677case X86::BI__builtin_ia32_vfmsubsh3_mask3:14678case X86::BI__builtin_ia32_vfmsubss3_mask3:14679case X86::BI__builtin_ia32_vfmsubsd3_mask3:14680return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2,14681/*NegAcc*/ true);14682case X86::BI__builtin_ia32_vfmaddph:14683case X86::BI__builtin_ia32_vfmaddps:14684case X86::BI__builtin_ia32_vfmaddpd:14685case X86::BI__builtin_ia32_vfmaddph256:14686case X86::BI__builtin_ia32_vfmaddps256:14687case X86::BI__builtin_ia32_vfmaddpd256:14688case X86::BI__builtin_ia32_vfmaddph512_mask:14689case X86::BI__builtin_ia32_vfmaddph512_maskz:14690case X86::BI__builtin_ia32_vfmaddph512_mask3:14691case X86::BI__builtin_ia32_vfmaddps512_mask:14692case X86::BI__builtin_ia32_vfmaddps512_maskz:14693case X86::BI__builtin_ia32_vfmaddps512_mask3:14694case X86::BI__builtin_ia32_vfmsubps512_mask3:14695case X86::BI__builtin_ia32_vfmaddpd512_mask:14696case X86::BI__builtin_ia32_vfmaddpd512_maskz:14697case X86::BI__builtin_ia32_vfmaddpd512_mask3:14698case X86::BI__builtin_ia32_vfmsubpd512_mask3:14699case X86::BI__builtin_ia32_vfmsubph512_mask3:14700return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ false);14701case X86::BI__builtin_ia32_vfmaddsubph512_mask:14702case X86::BI__builtin_ia32_vfmaddsubph512_maskz:14703case X86::BI__builtin_ia32_vfmaddsubph512_mask3:14704case X86::BI__builtin_ia32_vfmsubaddph512_mask3:14705case X86::BI__builtin_ia32_vfmaddsubps512_mask:14706case X86::BI__builtin_ia32_vfmaddsubps512_maskz:14707case X86::BI__builtin_ia32_vfmaddsubps512_mask3:14708case X86::BI__builtin_ia32_vfmsubaddps512_mask3:14709case X86::BI__builtin_ia32_vfmaddsubpd512_mask:14710case X86::BI__builtin_ia32_vfmaddsubpd512_maskz:14711case X86::BI__builtin_ia32_vfmaddsubpd512_mask3:14712case X86::BI__builtin_ia32_vfmsubaddpd512_mask3:14713return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ true);1471414715case X86::BI__builtin_ia32_movdqa32store128_mask:14716case X86::BI__builtin_ia32_movdqa64store128_mask:14717case X86::BI__builtin_ia32_storeaps128_mask:14718case X86::BI__builtin_ia32_storeapd128_mask:14719case X86::BI__builtin_ia32_movdqa32store256_mask:14720case X86::BI__builtin_ia32_movdqa64store256_mask:14721case X86::BI__builtin_ia32_storeaps256_mask:14722case X86::BI__builtin_ia32_storeapd256_mask:14723case X86::BI__builtin_ia32_movdqa32store512_mask:14724case X86::BI__builtin_ia32_movdqa64store512_mask:14725case X86::BI__builtin_ia32_storeaps512_mask:14726case X86::BI__builtin_ia32_storeapd512_mask:14727return EmitX86MaskedStore(14728*this, Ops,14729getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign());1473014731case X86::BI__builtin_ia32_loadups128_mask:14732case X86::BI__builtin_ia32_loadups256_mask:14733case X86::BI__builtin_ia32_loadups512_mask:14734case X86::BI__builtin_ia32_loadupd128_mask:14735case X86::BI__builtin_ia32_loadupd256_mask:14736case X86::BI__builtin_ia32_loadupd512_mask:14737case X86::BI__builtin_ia32_loaddquqi128_mask:14738case X86::BI__builtin_ia32_loaddquqi256_mask:14739case X86::BI__builtin_ia32_loaddquqi512_mask:14740case X86::BI__builtin_ia32_loaddquhi128_mask:14741case X86::BI__builtin_ia32_loaddquhi256_mask:14742case X86::BI__builtin_ia32_loaddquhi512_mask:14743case X86::BI__builtin_ia32_loaddqusi128_mask:14744case X86::BI__builtin_ia32_loaddqusi256_mask:14745case X86::BI__builtin_ia32_loaddqusi512_mask:14746case X86::BI__builtin_ia32_loaddqudi128_mask:14747case X86::BI__builtin_ia32_loaddqudi256_mask:14748case X86::BI__builtin_ia32_loaddqudi512_mask:14749return EmitX86MaskedLoad(*this, Ops, Align(1));1475014751case X86::BI__builtin_ia32_loadsh128_mask:14752case X86::BI__builtin_ia32_loadss128_mask:14753case X86::BI__builtin_ia32_loadsd128_mask:14754return EmitX86MaskedLoad(*this, Ops, Align(1));1475514756case X86::BI__builtin_ia32_loadaps128_mask:14757case X86::BI__builtin_ia32_loadaps256_mask:14758case X86::BI__builtin_ia32_loadaps512_mask:14759case X86::BI__builtin_ia32_loadapd128_mask:14760case X86::BI__builtin_ia32_loadapd256_mask:14761case X86::BI__builtin_ia32_loadapd512_mask:14762case X86::BI__builtin_ia32_movdqa32load128_mask:14763case X86::BI__builtin_ia32_movdqa32load256_mask:14764case X86::BI__builtin_ia32_movdqa32load512_mask:14765case X86::BI__builtin_ia32_movdqa64load128_mask:14766case X86::BI__builtin_ia32_movdqa64load256_mask:14767case X86::BI__builtin_ia32_movdqa64load512_mask:14768return EmitX86MaskedLoad(14769*this, Ops,14770getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign());1477114772case X86::BI__builtin_ia32_expandloaddf128_mask:14773case X86::BI__builtin_ia32_expandloaddf256_mask:14774case X86::BI__builtin_ia32_expandloaddf512_mask:14775case X86::BI__builtin_ia32_expandloadsf128_mask:14776case X86::BI__builtin_ia32_expandloadsf256_mask:14777case X86::BI__builtin_ia32_expandloadsf512_mask:14778case X86::BI__builtin_ia32_expandloaddi128_mask:14779case X86::BI__builtin_ia32_expandloaddi256_mask:14780case X86::BI__builtin_ia32_expandloaddi512_mask:14781case X86::BI__builtin_ia32_expandloadsi128_mask:14782case X86::BI__builtin_ia32_expandloadsi256_mask:14783case X86::BI__builtin_ia32_expandloadsi512_mask:14784case X86::BI__builtin_ia32_expandloadhi128_mask:14785case X86::BI__builtin_ia32_expandloadhi256_mask:14786case X86::BI__builtin_ia32_expandloadhi512_mask:14787case X86::BI__builtin_ia32_expandloadqi128_mask:14788case X86::BI__builtin_ia32_expandloadqi256_mask:14789case X86::BI__builtin_ia32_expandloadqi512_mask:14790return EmitX86ExpandLoad(*this, Ops);1479114792case X86::BI__builtin_ia32_compressstoredf128_mask:14793case X86::BI__builtin_ia32_compressstoredf256_mask:14794case X86::BI__builtin_ia32_compressstoredf512_mask:14795case X86::BI__builtin_ia32_compressstoresf128_mask:14796case X86::BI__builtin_ia32_compressstoresf256_mask:14797case X86::BI__builtin_ia32_compressstoresf512_mask:14798case X86::BI__builtin_ia32_compressstoredi128_mask:14799case X86::BI__builtin_ia32_compressstoredi256_mask:14800case X86::BI__builtin_ia32_compressstoredi512_mask:14801case X86::BI__builtin_ia32_compressstoresi128_mask:14802case X86::BI__builtin_ia32_compressstoresi256_mask:14803case X86::BI__builtin_ia32_compressstoresi512_mask:14804case X86::BI__builtin_ia32_compressstorehi128_mask:14805case X86::BI__builtin_ia32_compressstorehi256_mask:14806case X86::BI__builtin_ia32_compressstorehi512_mask:14807case X86::BI__builtin_ia32_compressstoreqi128_mask:14808case X86::BI__builtin_ia32_compressstoreqi256_mask:14809case X86::BI__builtin_ia32_compressstoreqi512_mask:14810return EmitX86CompressStore(*this, Ops);1481114812case X86::BI__builtin_ia32_expanddf128_mask:14813case X86::BI__builtin_ia32_expanddf256_mask:14814case X86::BI__builtin_ia32_expanddf512_mask:14815case X86::BI__builtin_ia32_expandsf128_mask:14816case X86::BI__builtin_ia32_expandsf256_mask:14817case X86::BI__builtin_ia32_expandsf512_mask:14818case X86::BI__builtin_ia32_expanddi128_mask:14819case X86::BI__builtin_ia32_expanddi256_mask:14820case X86::BI__builtin_ia32_expanddi512_mask:14821case X86::BI__builtin_ia32_expandsi128_mask:14822case X86::BI__builtin_ia32_expandsi256_mask:14823case X86::BI__builtin_ia32_expandsi512_mask:14824case X86::BI__builtin_ia32_expandhi128_mask:14825case X86::BI__builtin_ia32_expandhi256_mask:14826case X86::BI__builtin_ia32_expandhi512_mask:14827case X86::BI__builtin_ia32_expandqi128_mask:14828case X86::BI__builtin_ia32_expandqi256_mask:14829case X86::BI__builtin_ia32_expandqi512_mask:14830return EmitX86CompressExpand(*this, Ops, /*IsCompress*/false);1483114832case X86::BI__builtin_ia32_compressdf128_mask:14833case X86::BI__builtin_ia32_compressdf256_mask:14834case X86::BI__builtin_ia32_compressdf512_mask:14835case X86::BI__builtin_ia32_compresssf128_mask:14836case X86::BI__builtin_ia32_compresssf256_mask:14837case X86::BI__builtin_ia32_compresssf512_mask:14838case X86::BI__builtin_ia32_compressdi128_mask:14839case X86::BI__builtin_ia32_compressdi256_mask:14840case X86::BI__builtin_ia32_compressdi512_mask:14841case X86::BI__builtin_ia32_compresssi128_mask:14842case X86::BI__builtin_ia32_compresssi256_mask:14843case X86::BI__builtin_ia32_compresssi512_mask:14844case X86::BI__builtin_ia32_compresshi128_mask:14845case X86::BI__builtin_ia32_compresshi256_mask:14846case X86::BI__builtin_ia32_compresshi512_mask:14847case X86::BI__builtin_ia32_compressqi128_mask:14848case X86::BI__builtin_ia32_compressqi256_mask:14849case X86::BI__builtin_ia32_compressqi512_mask:14850return EmitX86CompressExpand(*this, Ops, /*IsCompress*/true);1485114852case X86::BI__builtin_ia32_gather3div2df:14853case X86::BI__builtin_ia32_gather3div2di:14854case X86::BI__builtin_ia32_gather3div4df:14855case X86::BI__builtin_ia32_gather3div4di:14856case X86::BI__builtin_ia32_gather3div4sf:14857case X86::BI__builtin_ia32_gather3div4si:14858case X86::BI__builtin_ia32_gather3div8sf:14859case X86::BI__builtin_ia32_gather3div8si:14860case X86::BI__builtin_ia32_gather3siv2df:14861case X86::BI__builtin_ia32_gather3siv2di:14862case X86::BI__builtin_ia32_gather3siv4df:14863case X86::BI__builtin_ia32_gather3siv4di:14864case X86::BI__builtin_ia32_gather3siv4sf:14865case X86::BI__builtin_ia32_gather3siv4si:14866case X86::BI__builtin_ia32_gather3siv8sf:14867case X86::BI__builtin_ia32_gather3siv8si:14868case X86::BI__builtin_ia32_gathersiv8df:14869case X86::BI__builtin_ia32_gathersiv16sf:14870case X86::BI__builtin_ia32_gatherdiv8df:14871case X86::BI__builtin_ia32_gatherdiv16sf:14872case X86::BI__builtin_ia32_gathersiv8di:14873case X86::BI__builtin_ia32_gathersiv16si:14874case X86::BI__builtin_ia32_gatherdiv8di:14875case X86::BI__builtin_ia32_gatherdiv16si: {14876Intrinsic::ID IID;14877switch (BuiltinID) {14878default: llvm_unreachable("Unexpected builtin");14879case X86::BI__builtin_ia32_gather3div2df:14880IID = Intrinsic::x86_avx512_mask_gather3div2_df;14881break;14882case X86::BI__builtin_ia32_gather3div2di:14883IID = Intrinsic::x86_avx512_mask_gather3div2_di;14884break;14885case X86::BI__builtin_ia32_gather3div4df:14886IID = Intrinsic::x86_avx512_mask_gather3div4_df;14887break;14888case X86::BI__builtin_ia32_gather3div4di:14889IID = Intrinsic::x86_avx512_mask_gather3div4_di;14890break;14891case X86::BI__builtin_ia32_gather3div4sf:14892IID = Intrinsic::x86_avx512_mask_gather3div4_sf;14893break;14894case X86::BI__builtin_ia32_gather3div4si:14895IID = Intrinsic::x86_avx512_mask_gather3div4_si;14896break;14897case X86::BI__builtin_ia32_gather3div8sf:14898IID = Intrinsic::x86_avx512_mask_gather3div8_sf;14899break;14900case X86::BI__builtin_ia32_gather3div8si:14901IID = Intrinsic::x86_avx512_mask_gather3div8_si;14902break;14903case X86::BI__builtin_ia32_gather3siv2df:14904IID = Intrinsic::x86_avx512_mask_gather3siv2_df;14905break;14906case X86::BI__builtin_ia32_gather3siv2di:14907IID = Intrinsic::x86_avx512_mask_gather3siv2_di;14908break;14909case X86::BI__builtin_ia32_gather3siv4df:14910IID = Intrinsic::x86_avx512_mask_gather3siv4_df;14911break;14912case X86::BI__builtin_ia32_gather3siv4di:14913IID = Intrinsic::x86_avx512_mask_gather3siv4_di;14914break;14915case X86::BI__builtin_ia32_gather3siv4sf:14916IID = Intrinsic::x86_avx512_mask_gather3siv4_sf;14917break;14918case X86::BI__builtin_ia32_gather3siv4si:14919IID = Intrinsic::x86_avx512_mask_gather3siv4_si;14920break;14921case X86::BI__builtin_ia32_gather3siv8sf:14922IID = Intrinsic::x86_avx512_mask_gather3siv8_sf;14923break;14924case X86::BI__builtin_ia32_gather3siv8si:14925IID = Intrinsic::x86_avx512_mask_gather3siv8_si;14926break;14927case X86::BI__builtin_ia32_gathersiv8df:14928IID = Intrinsic::x86_avx512_mask_gather_dpd_512;14929break;14930case X86::BI__builtin_ia32_gathersiv16sf:14931IID = Intrinsic::x86_avx512_mask_gather_dps_512;14932break;14933case X86::BI__builtin_ia32_gatherdiv8df:14934IID = Intrinsic::x86_avx512_mask_gather_qpd_512;14935break;14936case X86::BI__builtin_ia32_gatherdiv16sf:14937IID = Intrinsic::x86_avx512_mask_gather_qps_512;14938break;14939case X86::BI__builtin_ia32_gathersiv8di:14940IID = Intrinsic::x86_avx512_mask_gather_dpq_512;14941break;14942case X86::BI__builtin_ia32_gathersiv16si:14943IID = Intrinsic::x86_avx512_mask_gather_dpi_512;14944break;14945case X86::BI__builtin_ia32_gatherdiv8di:14946IID = Intrinsic::x86_avx512_mask_gather_qpq_512;14947break;14948case X86::BI__builtin_ia32_gatherdiv16si:14949IID = Intrinsic::x86_avx512_mask_gather_qpi_512;14950break;14951}1495214953unsigned MinElts = std::min(14954cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements(),14955cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements());14956Ops[3] = getMaskVecValue(*this, Ops[3], MinElts);14957Function *Intr = CGM.getIntrinsic(IID);14958return Builder.CreateCall(Intr, Ops);14959}1496014961case X86::BI__builtin_ia32_scattersiv8df:14962case X86::BI__builtin_ia32_scattersiv16sf:14963case X86::BI__builtin_ia32_scatterdiv8df:14964case X86::BI__builtin_ia32_scatterdiv16sf:14965case X86::BI__builtin_ia32_scattersiv8di:14966case X86::BI__builtin_ia32_scattersiv16si:14967case X86::BI__builtin_ia32_scatterdiv8di:14968case X86::BI__builtin_ia32_scatterdiv16si:14969case X86::BI__builtin_ia32_scatterdiv2df:14970case X86::BI__builtin_ia32_scatterdiv2di:14971case X86::BI__builtin_ia32_scatterdiv4df:14972case X86::BI__builtin_ia32_scatterdiv4di:14973case X86::BI__builtin_ia32_scatterdiv4sf:14974case X86::BI__builtin_ia32_scatterdiv4si:14975case X86::BI__builtin_ia32_scatterdiv8sf:14976case X86::BI__builtin_ia32_scatterdiv8si:14977case X86::BI__builtin_ia32_scattersiv2df:14978case X86::BI__builtin_ia32_scattersiv2di:14979case X86::BI__builtin_ia32_scattersiv4df:14980case X86::BI__builtin_ia32_scattersiv4di:14981case X86::BI__builtin_ia32_scattersiv4sf:14982case X86::BI__builtin_ia32_scattersiv4si:14983case X86::BI__builtin_ia32_scattersiv8sf:14984case X86::BI__builtin_ia32_scattersiv8si: {14985Intrinsic::ID IID;14986switch (BuiltinID) {14987default: llvm_unreachable("Unexpected builtin");14988case X86::BI__builtin_ia32_scattersiv8df:14989IID = Intrinsic::x86_avx512_mask_scatter_dpd_512;14990break;14991case X86::BI__builtin_ia32_scattersiv16sf:14992IID = Intrinsic::x86_avx512_mask_scatter_dps_512;14993break;14994case X86::BI__builtin_ia32_scatterdiv8df:14995IID = Intrinsic::x86_avx512_mask_scatter_qpd_512;14996break;14997case X86::BI__builtin_ia32_scatterdiv16sf:14998IID = Intrinsic::x86_avx512_mask_scatter_qps_512;14999break;15000case X86::BI__builtin_ia32_scattersiv8di:15001IID = Intrinsic::x86_avx512_mask_scatter_dpq_512;15002break;15003case X86::BI__builtin_ia32_scattersiv16si:15004IID = Intrinsic::x86_avx512_mask_scatter_dpi_512;15005break;15006case X86::BI__builtin_ia32_scatterdiv8di:15007IID = Intrinsic::x86_avx512_mask_scatter_qpq_512;15008break;15009case X86::BI__builtin_ia32_scatterdiv16si:15010IID = Intrinsic::x86_avx512_mask_scatter_qpi_512;15011break;15012case X86::BI__builtin_ia32_scatterdiv2df:15013IID = Intrinsic::x86_avx512_mask_scatterdiv2_df;15014break;15015case X86::BI__builtin_ia32_scatterdiv2di:15016IID = Intrinsic::x86_avx512_mask_scatterdiv2_di;15017break;15018case X86::BI__builtin_ia32_scatterdiv4df:15019IID = Intrinsic::x86_avx512_mask_scatterdiv4_df;15020break;15021case X86::BI__builtin_ia32_scatterdiv4di:15022IID = Intrinsic::x86_avx512_mask_scatterdiv4_di;15023break;15024case X86::BI__builtin_ia32_scatterdiv4sf:15025IID = Intrinsic::x86_avx512_mask_scatterdiv4_sf;15026break;15027case X86::BI__builtin_ia32_scatterdiv4si:15028IID = Intrinsic::x86_avx512_mask_scatterdiv4_si;15029break;15030case X86::BI__builtin_ia32_scatterdiv8sf:15031IID = Intrinsic::x86_avx512_mask_scatterdiv8_sf;15032break;15033case X86::BI__builtin_ia32_scatterdiv8si:15034IID = Intrinsic::x86_avx512_mask_scatterdiv8_si;15035break;15036case X86::BI__builtin_ia32_scattersiv2df:15037IID = Intrinsic::x86_avx512_mask_scattersiv2_df;15038break;15039case X86::BI__builtin_ia32_scattersiv2di:15040IID = Intrinsic::x86_avx512_mask_scattersiv2_di;15041break;15042case X86::BI__builtin_ia32_scattersiv4df:15043IID = Intrinsic::x86_avx512_mask_scattersiv4_df;15044break;15045case X86::BI__builtin_ia32_scattersiv4di:15046IID = Intrinsic::x86_avx512_mask_scattersiv4_di;15047break;15048case X86::BI__builtin_ia32_scattersiv4sf:15049IID = Intrinsic::x86_avx512_mask_scattersiv4_sf;15050break;15051case X86::BI__builtin_ia32_scattersiv4si:15052IID = Intrinsic::x86_avx512_mask_scattersiv4_si;15053break;15054case X86::BI__builtin_ia32_scattersiv8sf:15055IID = Intrinsic::x86_avx512_mask_scattersiv8_sf;15056break;15057case X86::BI__builtin_ia32_scattersiv8si:15058IID = Intrinsic::x86_avx512_mask_scattersiv8_si;15059break;15060}1506115062unsigned MinElts = std::min(15063cast<llvm::FixedVectorType>(Ops[2]->getType())->getNumElements(),15064cast<llvm::FixedVectorType>(Ops[3]->getType())->getNumElements());15065Ops[1] = getMaskVecValue(*this, Ops[1], MinElts);15066Function *Intr = CGM.getIntrinsic(IID);15067return Builder.CreateCall(Intr, Ops);15068}1506915070case X86::BI__builtin_ia32_vextractf128_pd256:15071case X86::BI__builtin_ia32_vextractf128_ps256:15072case X86::BI__builtin_ia32_vextractf128_si256:15073case X86::BI__builtin_ia32_extract128i256:15074case X86::BI__builtin_ia32_extractf64x4_mask:15075case X86::BI__builtin_ia32_extractf32x4_mask:15076case X86::BI__builtin_ia32_extracti64x4_mask:15077case X86::BI__builtin_ia32_extracti32x4_mask:15078case X86::BI__builtin_ia32_extractf32x8_mask:15079case X86::BI__builtin_ia32_extracti32x8_mask:15080case X86::BI__builtin_ia32_extractf32x4_256_mask:15081case X86::BI__builtin_ia32_extracti32x4_256_mask:15082case X86::BI__builtin_ia32_extractf64x2_256_mask:15083case X86::BI__builtin_ia32_extracti64x2_256_mask:15084case X86::BI__builtin_ia32_extractf64x2_512_mask:15085case X86::BI__builtin_ia32_extracti64x2_512_mask: {15086auto *DstTy = cast<llvm::FixedVectorType>(ConvertType(E->getType()));15087unsigned NumElts = DstTy->getNumElements();15088unsigned SrcNumElts =15089cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();15090unsigned SubVectors = SrcNumElts / NumElts;15091unsigned Index = cast<ConstantInt>(Ops[1])->getZExtValue();15092assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");15093Index &= SubVectors - 1; // Remove any extra bits.15094Index *= NumElts;1509515096int Indices[16];15097for (unsigned i = 0; i != NumElts; ++i)15098Indices[i] = i + Index;1509915100Value *Res = Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),15101"extract");1510215103if (Ops.size() == 4)15104Res = EmitX86Select(*this, Ops[3], Res, Ops[2]);1510515106return Res;15107}15108case X86::BI__builtin_ia32_vinsertf128_pd256:15109case X86::BI__builtin_ia32_vinsertf128_ps256:15110case X86::BI__builtin_ia32_vinsertf128_si256:15111case X86::BI__builtin_ia32_insert128i256:15112case X86::BI__builtin_ia32_insertf64x4:15113case X86::BI__builtin_ia32_insertf32x4:15114case X86::BI__builtin_ia32_inserti64x4:15115case X86::BI__builtin_ia32_inserti32x4:15116case X86::BI__builtin_ia32_insertf32x8:15117case X86::BI__builtin_ia32_inserti32x8:15118case X86::BI__builtin_ia32_insertf32x4_256:15119case X86::BI__builtin_ia32_inserti32x4_256:15120case X86::BI__builtin_ia32_insertf64x2_256:15121case X86::BI__builtin_ia32_inserti64x2_256:15122case X86::BI__builtin_ia32_insertf64x2_512:15123case X86::BI__builtin_ia32_inserti64x2_512: {15124unsigned DstNumElts =15125cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();15126unsigned SrcNumElts =15127cast<llvm::FixedVectorType>(Ops[1]->getType())->getNumElements();15128unsigned SubVectors = DstNumElts / SrcNumElts;15129unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();15130assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");15131Index &= SubVectors - 1; // Remove any extra bits.15132Index *= SrcNumElts;1513315134int Indices[16];15135for (unsigned i = 0; i != DstNumElts; ++i)15136Indices[i] = (i >= SrcNumElts) ? SrcNumElts + (i % SrcNumElts) : i;1513715138Value *Op1 = Builder.CreateShuffleVector(15139Ops[1], ArrayRef(Indices, DstNumElts), "widen");1514015141for (unsigned i = 0; i != DstNumElts; ++i) {15142if (i >= Index && i < (Index + SrcNumElts))15143Indices[i] = (i - Index) + DstNumElts;15144else15145Indices[i] = i;15146}1514715148return Builder.CreateShuffleVector(Ops[0], Op1,15149ArrayRef(Indices, DstNumElts), "insert");15150}15151case X86::BI__builtin_ia32_pmovqd512_mask:15152case X86::BI__builtin_ia32_pmovwb512_mask: {15153Value *Res = Builder.CreateTrunc(Ops[0], Ops[1]->getType());15154return EmitX86Select(*this, Ops[2], Res, Ops[1]);15155}15156case X86::BI__builtin_ia32_pmovdb512_mask:15157case X86::BI__builtin_ia32_pmovdw512_mask:15158case X86::BI__builtin_ia32_pmovqw512_mask: {15159if (const auto *C = dyn_cast<Constant>(Ops[2]))15160if (C->isAllOnesValue())15161return Builder.CreateTrunc(Ops[0], Ops[1]->getType());1516215163Intrinsic::ID IID;15164switch (BuiltinID) {15165default: llvm_unreachable("Unsupported intrinsic!");15166case X86::BI__builtin_ia32_pmovdb512_mask:15167IID = Intrinsic::x86_avx512_mask_pmov_db_512;15168break;15169case X86::BI__builtin_ia32_pmovdw512_mask:15170IID = Intrinsic::x86_avx512_mask_pmov_dw_512;15171break;15172case X86::BI__builtin_ia32_pmovqw512_mask:15173IID = Intrinsic::x86_avx512_mask_pmov_qw_512;15174break;15175}1517615177Function *Intr = CGM.getIntrinsic(IID);15178return Builder.CreateCall(Intr, Ops);15179}15180case X86::BI__builtin_ia32_pblendw128:15181case X86::BI__builtin_ia32_blendpd:15182case X86::BI__builtin_ia32_blendps:15183case X86::BI__builtin_ia32_blendpd256:15184case X86::BI__builtin_ia32_blendps256:15185case X86::BI__builtin_ia32_pblendw256:15186case X86::BI__builtin_ia32_pblendd128:15187case X86::BI__builtin_ia32_pblendd256: {15188unsigned NumElts =15189cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();15190unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();1519115192int Indices[16];15193// If there are more than 8 elements, the immediate is used twice so make15194// sure we handle that.15195for (unsigned i = 0; i != NumElts; ++i)15196Indices[i] = ((Imm >> (i % 8)) & 0x1) ? NumElts + i : i;1519715198return Builder.CreateShuffleVector(Ops[0], Ops[1],15199ArrayRef(Indices, NumElts), "blend");15200}15201case X86::BI__builtin_ia32_pshuflw:15202case X86::BI__builtin_ia32_pshuflw256:15203case X86::BI__builtin_ia32_pshuflw512: {15204uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();15205auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());15206unsigned NumElts = Ty->getNumElements();1520715208// Splat the 8-bits of immediate 4 times to help the loop wrap around.15209Imm = (Imm & 0xff) * 0x01010101;1521015211int Indices[32];15212for (unsigned l = 0; l != NumElts; l += 8) {15213for (unsigned i = 0; i != 4; ++i) {15214Indices[l + i] = l + (Imm & 3);15215Imm >>= 2;15216}15217for (unsigned i = 4; i != 8; ++i)15218Indices[l + i] = l + i;15219}1522015221return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),15222"pshuflw");15223}15224case X86::BI__builtin_ia32_pshufhw:15225case X86::BI__builtin_ia32_pshufhw256:15226case X86::BI__builtin_ia32_pshufhw512: {15227uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();15228auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());15229unsigned NumElts = Ty->getNumElements();1523015231// Splat the 8-bits of immediate 4 times to help the loop wrap around.15232Imm = (Imm & 0xff) * 0x01010101;1523315234int Indices[32];15235for (unsigned l = 0; l != NumElts; l += 8) {15236for (unsigned i = 0; i != 4; ++i)15237Indices[l + i] = l + i;15238for (unsigned i = 4; i != 8; ++i) {15239Indices[l + i] = l + 4 + (Imm & 3);15240Imm >>= 2;15241}15242}1524315244return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),15245"pshufhw");15246}15247case X86::BI__builtin_ia32_pshufd:15248case X86::BI__builtin_ia32_pshufd256:15249case X86::BI__builtin_ia32_pshufd512:15250case X86::BI__builtin_ia32_vpermilpd:15251case X86::BI__builtin_ia32_vpermilps:15252case X86::BI__builtin_ia32_vpermilpd256:15253case X86::BI__builtin_ia32_vpermilps256:15254case X86::BI__builtin_ia32_vpermilpd512:15255case X86::BI__builtin_ia32_vpermilps512: {15256uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();15257auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());15258unsigned NumElts = Ty->getNumElements();15259unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;15260unsigned NumLaneElts = NumElts / NumLanes;1526115262// Splat the 8-bits of immediate 4 times to help the loop wrap around.15263Imm = (Imm & 0xff) * 0x01010101;1526415265int Indices[16];15266for (unsigned l = 0; l != NumElts; l += NumLaneElts) {15267for (unsigned i = 0; i != NumLaneElts; ++i) {15268Indices[i + l] = (Imm % NumLaneElts) + l;15269Imm /= NumLaneElts;15270}15271}1527215273return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),15274"permil");15275}15276case X86::BI__builtin_ia32_shufpd:15277case X86::BI__builtin_ia32_shufpd256:15278case X86::BI__builtin_ia32_shufpd512:15279case X86::BI__builtin_ia32_shufps:15280case X86::BI__builtin_ia32_shufps256:15281case X86::BI__builtin_ia32_shufps512: {15282uint32_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();15283auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());15284unsigned NumElts = Ty->getNumElements();15285unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;15286unsigned NumLaneElts = NumElts / NumLanes;1528715288// Splat the 8-bits of immediate 4 times to help the loop wrap around.15289Imm = (Imm & 0xff) * 0x01010101;1529015291int Indices[16];15292for (unsigned l = 0; l != NumElts; l += NumLaneElts) {15293for (unsigned i = 0; i != NumLaneElts; ++i) {15294unsigned Index = Imm % NumLaneElts;15295Imm /= NumLaneElts;15296if (i >= (NumLaneElts / 2))15297Index += NumElts;15298Indices[l + i] = l + Index;15299}15300}1530115302return Builder.CreateShuffleVector(Ops[0], Ops[1],15303ArrayRef(Indices, NumElts), "shufp");15304}15305case X86::BI__builtin_ia32_permdi256:15306case X86::BI__builtin_ia32_permdf256:15307case X86::BI__builtin_ia32_permdi512:15308case X86::BI__builtin_ia32_permdf512: {15309unsigned Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();15310auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());15311unsigned NumElts = Ty->getNumElements();1531215313// These intrinsics operate on 256-bit lanes of four 64-bit elements.15314int Indices[8];15315for (unsigned l = 0; l != NumElts; l += 4)15316for (unsigned i = 0; i != 4; ++i)15317Indices[l + i] = l + ((Imm >> (2 * i)) & 0x3);1531815319return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts),15320"perm");15321}15322case X86::BI__builtin_ia32_palignr128:15323case X86::BI__builtin_ia32_palignr256:15324case X86::BI__builtin_ia32_palignr512: {15325unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;1532615327unsigned NumElts =15328cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();15329assert(NumElts % 16 == 0);1533015331// If palignr is shifting the pair of vectors more than the size of two15332// lanes, emit zero.15333if (ShiftVal >= 32)15334return llvm::Constant::getNullValue(ConvertType(E->getType()));1533515336// If palignr is shifting the pair of input vectors more than one lane,15337// but less than two lanes, convert to shifting in zeroes.15338if (ShiftVal > 16) {15339ShiftVal -= 16;15340Ops[1] = Ops[0];15341Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());15342}1534315344int Indices[64];15345// 256-bit palignr operates on 128-bit lanes so we need to handle that15346for (unsigned l = 0; l != NumElts; l += 16) {15347for (unsigned i = 0; i != 16; ++i) {15348unsigned Idx = ShiftVal + i;15349if (Idx >= 16)15350Idx += NumElts - 16; // End of lane, switch operand.15351Indices[l + i] = Idx + l;15352}15353}1535415355return Builder.CreateShuffleVector(Ops[1], Ops[0],15356ArrayRef(Indices, NumElts), "palignr");15357}15358case X86::BI__builtin_ia32_alignd128:15359case X86::BI__builtin_ia32_alignd256:15360case X86::BI__builtin_ia32_alignd512:15361case X86::BI__builtin_ia32_alignq128:15362case X86::BI__builtin_ia32_alignq256:15363case X86::BI__builtin_ia32_alignq512: {15364unsigned NumElts =15365cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();15366unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;1536715368// Mask the shift amount to width of a vector.15369ShiftVal &= NumElts - 1;1537015371int Indices[16];15372for (unsigned i = 0; i != NumElts; ++i)15373Indices[i] = i + ShiftVal;1537415375return Builder.CreateShuffleVector(Ops[1], Ops[0],15376ArrayRef(Indices, NumElts), "valign");15377}15378case X86::BI__builtin_ia32_shuf_f32x4_256:15379case X86::BI__builtin_ia32_shuf_f64x2_256:15380case X86::BI__builtin_ia32_shuf_i32x4_256:15381case X86::BI__builtin_ia32_shuf_i64x2_256:15382case X86::BI__builtin_ia32_shuf_f32x4:15383case X86::BI__builtin_ia32_shuf_f64x2:15384case X86::BI__builtin_ia32_shuf_i32x4:15385case X86::BI__builtin_ia32_shuf_i64x2: {15386unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();15387auto *Ty = cast<llvm::FixedVectorType>(Ops[0]->getType());15388unsigned NumElts = Ty->getNumElements();15389unsigned NumLanes = Ty->getPrimitiveSizeInBits() == 512 ? 4 : 2;15390unsigned NumLaneElts = NumElts / NumLanes;1539115392int Indices[16];15393for (unsigned l = 0; l != NumElts; l += NumLaneElts) {15394unsigned Index = (Imm % NumLanes) * NumLaneElts;15395Imm /= NumLanes; // Discard the bits we just used.15396if (l >= (NumElts / 2))15397Index += NumElts; // Switch to other source.15398for (unsigned i = 0; i != NumLaneElts; ++i) {15399Indices[l + i] = Index + i;15400}15401}1540215403return Builder.CreateShuffleVector(Ops[0], Ops[1],15404ArrayRef(Indices, NumElts), "shuf");15405}1540615407case X86::BI__builtin_ia32_vperm2f128_pd256:15408case X86::BI__builtin_ia32_vperm2f128_ps256:15409case X86::BI__builtin_ia32_vperm2f128_si256:15410case X86::BI__builtin_ia32_permti256: {15411unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();15412unsigned NumElts =15413cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();1541415415// This takes a very simple approach since there are two lanes and a15416// shuffle can have 2 inputs. So we reserve the first input for the first15417// lane and the second input for the second lane. This may result in15418// duplicate sources, but this can be dealt with in the backend.1541915420Value *OutOps[2];15421int Indices[8];15422for (unsigned l = 0; l != 2; ++l) {15423// Determine the source for this lane.15424if (Imm & (1 << ((l * 4) + 3)))15425OutOps[l] = llvm::ConstantAggregateZero::get(Ops[0]->getType());15426else if (Imm & (1 << ((l * 4) + 1)))15427OutOps[l] = Ops[1];15428else15429OutOps[l] = Ops[0];1543015431for (unsigned i = 0; i != NumElts/2; ++i) {15432// Start with ith element of the source for this lane.15433unsigned Idx = (l * NumElts) + i;15434// If bit 0 of the immediate half is set, switch to the high half of15435// the source.15436if (Imm & (1 << (l * 4)))15437Idx += NumElts/2;15438Indices[(l * (NumElts/2)) + i] = Idx;15439}15440}1544115442return Builder.CreateShuffleVector(OutOps[0], OutOps[1],15443ArrayRef(Indices, NumElts), "vperm");15444}1544515446case X86::BI__builtin_ia32_pslldqi128_byteshift:15447case X86::BI__builtin_ia32_pslldqi256_byteshift:15448case X86::BI__builtin_ia32_pslldqi512_byteshift: {15449unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;15450auto *ResultType = cast<llvm::FixedVectorType>(Ops[0]->getType());15451// Builtin type is vXi64 so multiply by 8 to get bytes.15452unsigned NumElts = ResultType->getNumElements() * 8;1545315454// If pslldq is shifting the vector more than 15 bytes, emit zero.15455if (ShiftVal >= 16)15456return llvm::Constant::getNullValue(ResultType);1545715458int Indices[64];15459// 256/512-bit pslldq operates on 128-bit lanes so we need to handle that15460for (unsigned l = 0; l != NumElts; l += 16) {15461for (unsigned i = 0; i != 16; ++i) {15462unsigned Idx = NumElts + i - ShiftVal;15463if (Idx < NumElts) Idx -= NumElts - 16; // end of lane, switch operand.15464Indices[l + i] = Idx + l;15465}15466}1546715468auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts);15469Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");15470Value *Zero = llvm::Constant::getNullValue(VecTy);15471Value *SV = Builder.CreateShuffleVector(15472Zero, Cast, ArrayRef(Indices, NumElts), "pslldq");15473return Builder.CreateBitCast(SV, Ops[0]->getType(), "cast");15474}15475case X86::BI__builtin_ia32_psrldqi128_byteshift:15476case X86::BI__builtin_ia32_psrldqi256_byteshift:15477case X86::BI__builtin_ia32_psrldqi512_byteshift: {15478unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;15479auto *ResultType = cast<llvm::FixedVectorType>(Ops[0]->getType());15480// Builtin type is vXi64 so multiply by 8 to get bytes.15481unsigned NumElts = ResultType->getNumElements() * 8;1548215483// If psrldq is shifting the vector more than 15 bytes, emit zero.15484if (ShiftVal >= 16)15485return llvm::Constant::getNullValue(ResultType);1548615487int Indices[64];15488// 256/512-bit psrldq operates on 128-bit lanes so we need to handle that15489for (unsigned l = 0; l != NumElts; l += 16) {15490for (unsigned i = 0; i != 16; ++i) {15491unsigned Idx = i + ShiftVal;15492if (Idx >= 16) Idx += NumElts - 16; // end of lane, switch operand.15493Indices[l + i] = Idx + l;15494}15495}1549615497auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts);15498Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");15499Value *Zero = llvm::Constant::getNullValue(VecTy);15500Value *SV = Builder.CreateShuffleVector(15501Cast, Zero, ArrayRef(Indices, NumElts), "psrldq");15502return Builder.CreateBitCast(SV, ResultType, "cast");15503}15504case X86::BI__builtin_ia32_kshiftliqi:15505case X86::BI__builtin_ia32_kshiftlihi:15506case X86::BI__builtin_ia32_kshiftlisi:15507case X86::BI__builtin_ia32_kshiftlidi: {15508unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;15509unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();1551015511if (ShiftVal >= NumElts)15512return llvm::Constant::getNullValue(Ops[0]->getType());1551315514Value *In = getMaskVecValue(*this, Ops[0], NumElts);1551515516int Indices[64];15517for (unsigned i = 0; i != NumElts; ++i)15518Indices[i] = NumElts + i - ShiftVal;1551915520Value *Zero = llvm::Constant::getNullValue(In->getType());15521Value *SV = Builder.CreateShuffleVector(15522Zero, In, ArrayRef(Indices, NumElts), "kshiftl");15523return Builder.CreateBitCast(SV, Ops[0]->getType());15524}15525case X86::BI__builtin_ia32_kshiftriqi:15526case X86::BI__builtin_ia32_kshiftrihi:15527case X86::BI__builtin_ia32_kshiftrisi:15528case X86::BI__builtin_ia32_kshiftridi: {15529unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;15530unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();1553115532if (ShiftVal >= NumElts)15533return llvm::Constant::getNullValue(Ops[0]->getType());1553415535Value *In = getMaskVecValue(*this, Ops[0], NumElts);1553615537int Indices[64];15538for (unsigned i = 0; i != NumElts; ++i)15539Indices[i] = i + ShiftVal;1554015541Value *Zero = llvm::Constant::getNullValue(In->getType());15542Value *SV = Builder.CreateShuffleVector(15543In, Zero, ArrayRef(Indices, NumElts), "kshiftr");15544return Builder.CreateBitCast(SV, Ops[0]->getType());15545}15546case X86::BI__builtin_ia32_movnti:15547case X86::BI__builtin_ia32_movnti64:15548case X86::BI__builtin_ia32_movntsd:15549case X86::BI__builtin_ia32_movntss: {15550llvm::MDNode *Node = llvm::MDNode::get(15551getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));1555215553Value *Ptr = Ops[0];15554Value *Src = Ops[1];1555515556// Extract the 0'th element of the source vector.15557if (BuiltinID == X86::BI__builtin_ia32_movntsd ||15558BuiltinID == X86::BI__builtin_ia32_movntss)15559Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract");1556015561// Unaligned nontemporal store of the scalar value.15562StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, Ptr);15563SI->setMetadata(llvm::LLVMContext::MD_nontemporal, Node);15564SI->setAlignment(llvm::Align(1));15565return SI;15566}15567// Rotate is a special case of funnel shift - 1st 2 args are the same.15568case X86::BI__builtin_ia32_vprotb:15569case X86::BI__builtin_ia32_vprotw:15570case X86::BI__builtin_ia32_vprotd:15571case X86::BI__builtin_ia32_vprotq:15572case X86::BI__builtin_ia32_vprotbi:15573case X86::BI__builtin_ia32_vprotwi:15574case X86::BI__builtin_ia32_vprotdi:15575case X86::BI__builtin_ia32_vprotqi:15576case X86::BI__builtin_ia32_prold128:15577case X86::BI__builtin_ia32_prold256:15578case X86::BI__builtin_ia32_prold512:15579case X86::BI__builtin_ia32_prolq128:15580case X86::BI__builtin_ia32_prolq256:15581case X86::BI__builtin_ia32_prolq512:15582case X86::BI__builtin_ia32_prolvd128:15583case X86::BI__builtin_ia32_prolvd256:15584case X86::BI__builtin_ia32_prolvd512:15585case X86::BI__builtin_ia32_prolvq128:15586case X86::BI__builtin_ia32_prolvq256:15587case X86::BI__builtin_ia32_prolvq512:15588return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], false);15589case X86::BI__builtin_ia32_prord128:15590case X86::BI__builtin_ia32_prord256:15591case X86::BI__builtin_ia32_prord512:15592case X86::BI__builtin_ia32_prorq128:15593case X86::BI__builtin_ia32_prorq256:15594case X86::BI__builtin_ia32_prorq512:15595case X86::BI__builtin_ia32_prorvd128:15596case X86::BI__builtin_ia32_prorvd256:15597case X86::BI__builtin_ia32_prorvd512:15598case X86::BI__builtin_ia32_prorvq128:15599case X86::BI__builtin_ia32_prorvq256:15600case X86::BI__builtin_ia32_prorvq512:15601return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], true);15602case X86::BI__builtin_ia32_selectb_128:15603case X86::BI__builtin_ia32_selectb_256:15604case X86::BI__builtin_ia32_selectb_512:15605case X86::BI__builtin_ia32_selectw_128:15606case X86::BI__builtin_ia32_selectw_256:15607case X86::BI__builtin_ia32_selectw_512:15608case X86::BI__builtin_ia32_selectd_128:15609case X86::BI__builtin_ia32_selectd_256:15610case X86::BI__builtin_ia32_selectd_512:15611case X86::BI__builtin_ia32_selectq_128:15612case X86::BI__builtin_ia32_selectq_256:15613case X86::BI__builtin_ia32_selectq_512:15614case X86::BI__builtin_ia32_selectph_128:15615case X86::BI__builtin_ia32_selectph_256:15616case X86::BI__builtin_ia32_selectph_512:15617case X86::BI__builtin_ia32_selectpbf_128:15618case X86::BI__builtin_ia32_selectpbf_256:15619case X86::BI__builtin_ia32_selectpbf_512:15620case X86::BI__builtin_ia32_selectps_128:15621case X86::BI__builtin_ia32_selectps_256:15622case X86::BI__builtin_ia32_selectps_512:15623case X86::BI__builtin_ia32_selectpd_128:15624case X86::BI__builtin_ia32_selectpd_256:15625case X86::BI__builtin_ia32_selectpd_512:15626return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);15627case X86::BI__builtin_ia32_selectsh_128:15628case X86::BI__builtin_ia32_selectsbf_128:15629case X86::BI__builtin_ia32_selectss_128:15630case X86::BI__builtin_ia32_selectsd_128: {15631Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);15632Value *B = Builder.CreateExtractElement(Ops[2], (uint64_t)0);15633A = EmitX86ScalarSelect(*this, Ops[0], A, B);15634return Builder.CreateInsertElement(Ops[1], A, (uint64_t)0);15635}15636case X86::BI__builtin_ia32_cmpb128_mask:15637case X86::BI__builtin_ia32_cmpb256_mask:15638case X86::BI__builtin_ia32_cmpb512_mask:15639case X86::BI__builtin_ia32_cmpw128_mask:15640case X86::BI__builtin_ia32_cmpw256_mask:15641case X86::BI__builtin_ia32_cmpw512_mask:15642case X86::BI__builtin_ia32_cmpd128_mask:15643case X86::BI__builtin_ia32_cmpd256_mask:15644case X86::BI__builtin_ia32_cmpd512_mask:15645case X86::BI__builtin_ia32_cmpq128_mask:15646case X86::BI__builtin_ia32_cmpq256_mask:15647case X86::BI__builtin_ia32_cmpq512_mask: {15648unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;15649return EmitX86MaskedCompare(*this, CC, true, Ops);15650}15651case X86::BI__builtin_ia32_ucmpb128_mask:15652case X86::BI__builtin_ia32_ucmpb256_mask:15653case X86::BI__builtin_ia32_ucmpb512_mask:15654case X86::BI__builtin_ia32_ucmpw128_mask:15655case X86::BI__builtin_ia32_ucmpw256_mask:15656case X86::BI__builtin_ia32_ucmpw512_mask:15657case X86::BI__builtin_ia32_ucmpd128_mask:15658case X86::BI__builtin_ia32_ucmpd256_mask:15659case X86::BI__builtin_ia32_ucmpd512_mask:15660case X86::BI__builtin_ia32_ucmpq128_mask:15661case X86::BI__builtin_ia32_ucmpq256_mask:15662case X86::BI__builtin_ia32_ucmpq512_mask: {15663unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;15664return EmitX86MaskedCompare(*this, CC, false, Ops);15665}15666case X86::BI__builtin_ia32_vpcomb:15667case X86::BI__builtin_ia32_vpcomw:15668case X86::BI__builtin_ia32_vpcomd:15669case X86::BI__builtin_ia32_vpcomq:15670return EmitX86vpcom(*this, Ops, true);15671case X86::BI__builtin_ia32_vpcomub:15672case X86::BI__builtin_ia32_vpcomuw:15673case X86::BI__builtin_ia32_vpcomud:15674case X86::BI__builtin_ia32_vpcomuq:15675return EmitX86vpcom(*this, Ops, false);1567615677case X86::BI__builtin_ia32_kortestcqi:15678case X86::BI__builtin_ia32_kortestchi:15679case X86::BI__builtin_ia32_kortestcsi:15680case X86::BI__builtin_ia32_kortestcdi: {15681Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);15682Value *C = llvm::Constant::getAllOnesValue(Ops[0]->getType());15683Value *Cmp = Builder.CreateICmpEQ(Or, C);15684return Builder.CreateZExt(Cmp, ConvertType(E->getType()));15685}15686case X86::BI__builtin_ia32_kortestzqi:15687case X86::BI__builtin_ia32_kortestzhi:15688case X86::BI__builtin_ia32_kortestzsi:15689case X86::BI__builtin_ia32_kortestzdi: {15690Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);15691Value *C = llvm::Constant::getNullValue(Ops[0]->getType());15692Value *Cmp = Builder.CreateICmpEQ(Or, C);15693return Builder.CreateZExt(Cmp, ConvertType(E->getType()));15694}1569515696case X86::BI__builtin_ia32_ktestcqi:15697case X86::BI__builtin_ia32_ktestzqi:15698case X86::BI__builtin_ia32_ktestchi:15699case X86::BI__builtin_ia32_ktestzhi:15700case X86::BI__builtin_ia32_ktestcsi:15701case X86::BI__builtin_ia32_ktestzsi:15702case X86::BI__builtin_ia32_ktestcdi:15703case X86::BI__builtin_ia32_ktestzdi: {15704Intrinsic::ID IID;15705switch (BuiltinID) {15706default: llvm_unreachable("Unsupported intrinsic!");15707case X86::BI__builtin_ia32_ktestcqi:15708IID = Intrinsic::x86_avx512_ktestc_b;15709break;15710case X86::BI__builtin_ia32_ktestzqi:15711IID = Intrinsic::x86_avx512_ktestz_b;15712break;15713case X86::BI__builtin_ia32_ktestchi:15714IID = Intrinsic::x86_avx512_ktestc_w;15715break;15716case X86::BI__builtin_ia32_ktestzhi:15717IID = Intrinsic::x86_avx512_ktestz_w;15718break;15719case X86::BI__builtin_ia32_ktestcsi:15720IID = Intrinsic::x86_avx512_ktestc_d;15721break;15722case X86::BI__builtin_ia32_ktestzsi:15723IID = Intrinsic::x86_avx512_ktestz_d;15724break;15725case X86::BI__builtin_ia32_ktestcdi:15726IID = Intrinsic::x86_avx512_ktestc_q;15727break;15728case X86::BI__builtin_ia32_ktestzdi:15729IID = Intrinsic::x86_avx512_ktestz_q;15730break;15731}1573215733unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();15734Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);15735Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);15736Function *Intr = CGM.getIntrinsic(IID);15737return Builder.CreateCall(Intr, {LHS, RHS});15738}1573915740case X86::BI__builtin_ia32_kaddqi:15741case X86::BI__builtin_ia32_kaddhi:15742case X86::BI__builtin_ia32_kaddsi:15743case X86::BI__builtin_ia32_kadddi: {15744Intrinsic::ID IID;15745switch (BuiltinID) {15746default: llvm_unreachable("Unsupported intrinsic!");15747case X86::BI__builtin_ia32_kaddqi:15748IID = Intrinsic::x86_avx512_kadd_b;15749break;15750case X86::BI__builtin_ia32_kaddhi:15751IID = Intrinsic::x86_avx512_kadd_w;15752break;15753case X86::BI__builtin_ia32_kaddsi:15754IID = Intrinsic::x86_avx512_kadd_d;15755break;15756case X86::BI__builtin_ia32_kadddi:15757IID = Intrinsic::x86_avx512_kadd_q;15758break;15759}1576015761unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();15762Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);15763Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);15764Function *Intr = CGM.getIntrinsic(IID);15765Value *Res = Builder.CreateCall(Intr, {LHS, RHS});15766return Builder.CreateBitCast(Res, Ops[0]->getType());15767}15768case X86::BI__builtin_ia32_kandqi:15769case X86::BI__builtin_ia32_kandhi:15770case X86::BI__builtin_ia32_kandsi:15771case X86::BI__builtin_ia32_kanddi:15772return EmitX86MaskLogic(*this, Instruction::And, Ops);15773case X86::BI__builtin_ia32_kandnqi:15774case X86::BI__builtin_ia32_kandnhi:15775case X86::BI__builtin_ia32_kandnsi:15776case X86::BI__builtin_ia32_kandndi:15777return EmitX86MaskLogic(*this, Instruction::And, Ops, true);15778case X86::BI__builtin_ia32_korqi:15779case X86::BI__builtin_ia32_korhi:15780case X86::BI__builtin_ia32_korsi:15781case X86::BI__builtin_ia32_kordi:15782return EmitX86MaskLogic(*this, Instruction::Or, Ops);15783case X86::BI__builtin_ia32_kxnorqi:15784case X86::BI__builtin_ia32_kxnorhi:15785case X86::BI__builtin_ia32_kxnorsi:15786case X86::BI__builtin_ia32_kxnordi:15787return EmitX86MaskLogic(*this, Instruction::Xor, Ops, true);15788case X86::BI__builtin_ia32_kxorqi:15789case X86::BI__builtin_ia32_kxorhi:15790case X86::BI__builtin_ia32_kxorsi:15791case X86::BI__builtin_ia32_kxordi:15792return EmitX86MaskLogic(*this, Instruction::Xor, Ops);15793case X86::BI__builtin_ia32_knotqi:15794case X86::BI__builtin_ia32_knothi:15795case X86::BI__builtin_ia32_knotsi:15796case X86::BI__builtin_ia32_knotdi: {15797unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();15798Value *Res = getMaskVecValue(*this, Ops[0], NumElts);15799return Builder.CreateBitCast(Builder.CreateNot(Res),15800Ops[0]->getType());15801}15802case X86::BI__builtin_ia32_kmovb:15803case X86::BI__builtin_ia32_kmovw:15804case X86::BI__builtin_ia32_kmovd:15805case X86::BI__builtin_ia32_kmovq: {15806// Bitcast to vXi1 type and then back to integer. This gets the mask15807// register type into the IR, but might be optimized out depending on15808// what's around it.15809unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();15810Value *Res = getMaskVecValue(*this, Ops[0], NumElts);15811return Builder.CreateBitCast(Res, Ops[0]->getType());15812}1581315814case X86::BI__builtin_ia32_kunpckdi:15815case X86::BI__builtin_ia32_kunpcksi:15816case X86::BI__builtin_ia32_kunpckhi: {15817unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();15818Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);15819Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);15820int Indices[64];15821for (unsigned i = 0; i != NumElts; ++i)15822Indices[i] = i;1582315824// First extract half of each vector. This gives better codegen than15825// doing it in a single shuffle.15826LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));15827RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));15828// Concat the vectors.15829// NOTE: Operands are swapped to match the intrinsic definition.15830Value *Res =15831Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));15832return Builder.CreateBitCast(Res, Ops[0]->getType());15833}1583415835case X86::BI__builtin_ia32_vplzcntd_128:15836case X86::BI__builtin_ia32_vplzcntd_256:15837case X86::BI__builtin_ia32_vplzcntd_512:15838case X86::BI__builtin_ia32_vplzcntq_128:15839case X86::BI__builtin_ia32_vplzcntq_256:15840case X86::BI__builtin_ia32_vplzcntq_512: {15841Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());15842return Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)});15843}15844case X86::BI__builtin_ia32_sqrtss:15845case X86::BI__builtin_ia32_sqrtsd: {15846Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0);15847Function *F;15848if (Builder.getIsFPConstrained()) {15849CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);15850F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,15851A->getType());15852A = Builder.CreateConstrainedFPCall(F, {A});15853} else {15854F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());15855A = Builder.CreateCall(F, {A});15856}15857return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);15858}15859case X86::BI__builtin_ia32_sqrtsh_round_mask:15860case X86::BI__builtin_ia32_sqrtsd_round_mask:15861case X86::BI__builtin_ia32_sqrtss_round_mask: {15862unsigned CC = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();15863// Support only if the rounding mode is 4 (AKA CUR_DIRECTION),15864// otherwise keep the intrinsic.15865if (CC != 4) {15866Intrinsic::ID IID;1586715868switch (BuiltinID) {15869default:15870llvm_unreachable("Unsupported intrinsic!");15871case X86::BI__builtin_ia32_sqrtsh_round_mask:15872IID = Intrinsic::x86_avx512fp16_mask_sqrt_sh;15873break;15874case X86::BI__builtin_ia32_sqrtsd_round_mask:15875IID = Intrinsic::x86_avx512_mask_sqrt_sd;15876break;15877case X86::BI__builtin_ia32_sqrtss_round_mask:15878IID = Intrinsic::x86_avx512_mask_sqrt_ss;15879break;15880}15881return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);15882}15883Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);15884Function *F;15885if (Builder.getIsFPConstrained()) {15886CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);15887F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,15888A->getType());15889A = Builder.CreateConstrainedFPCall(F, A);15890} else {15891F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());15892A = Builder.CreateCall(F, A);15893}15894Value *Src = Builder.CreateExtractElement(Ops[2], (uint64_t)0);15895A = EmitX86ScalarSelect(*this, Ops[3], A, Src);15896return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);15897}15898case X86::BI__builtin_ia32_sqrtpd256:15899case X86::BI__builtin_ia32_sqrtpd:15900case X86::BI__builtin_ia32_sqrtps256:15901case X86::BI__builtin_ia32_sqrtps:15902case X86::BI__builtin_ia32_sqrtph256:15903case X86::BI__builtin_ia32_sqrtph:15904case X86::BI__builtin_ia32_sqrtph512:15905case X86::BI__builtin_ia32_sqrtps512:15906case X86::BI__builtin_ia32_sqrtpd512: {15907if (Ops.size() == 2) {15908unsigned CC = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();15909// Support only if the rounding mode is 4 (AKA CUR_DIRECTION),15910// otherwise keep the intrinsic.15911if (CC != 4) {15912Intrinsic::ID IID;1591315914switch (BuiltinID) {15915default:15916llvm_unreachable("Unsupported intrinsic!");15917case X86::BI__builtin_ia32_sqrtph512:15918IID = Intrinsic::x86_avx512fp16_sqrt_ph_512;15919break;15920case X86::BI__builtin_ia32_sqrtps512:15921IID = Intrinsic::x86_avx512_sqrt_ps_512;15922break;15923case X86::BI__builtin_ia32_sqrtpd512:15924IID = Intrinsic::x86_avx512_sqrt_pd_512;15925break;15926}15927return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);15928}15929}15930if (Builder.getIsFPConstrained()) {15931CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);15932Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt,15933Ops[0]->getType());15934return Builder.CreateConstrainedFPCall(F, Ops[0]);15935} else {15936Function *F = CGM.getIntrinsic(Intrinsic::sqrt, Ops[0]->getType());15937return Builder.CreateCall(F, Ops[0]);15938}15939}1594015941case X86::BI__builtin_ia32_pmuludq128:15942case X86::BI__builtin_ia32_pmuludq256:15943case X86::BI__builtin_ia32_pmuludq512:15944return EmitX86Muldq(*this, /*IsSigned*/false, Ops);1594515946case X86::BI__builtin_ia32_pmuldq128:15947case X86::BI__builtin_ia32_pmuldq256:15948case X86::BI__builtin_ia32_pmuldq512:15949return EmitX86Muldq(*this, /*IsSigned*/true, Ops);1595015951case X86::BI__builtin_ia32_pternlogd512_mask:15952case X86::BI__builtin_ia32_pternlogq512_mask:15953case X86::BI__builtin_ia32_pternlogd128_mask:15954case X86::BI__builtin_ia32_pternlogd256_mask:15955case X86::BI__builtin_ia32_pternlogq128_mask:15956case X86::BI__builtin_ia32_pternlogq256_mask:15957return EmitX86Ternlog(*this, /*ZeroMask*/false, Ops);1595815959case X86::BI__builtin_ia32_pternlogd512_maskz:15960case X86::BI__builtin_ia32_pternlogq512_maskz:15961case X86::BI__builtin_ia32_pternlogd128_maskz:15962case X86::BI__builtin_ia32_pternlogd256_maskz:15963case X86::BI__builtin_ia32_pternlogq128_maskz:15964case X86::BI__builtin_ia32_pternlogq256_maskz:15965return EmitX86Ternlog(*this, /*ZeroMask*/true, Ops);1596615967case X86::BI__builtin_ia32_vpshldd128:15968case X86::BI__builtin_ia32_vpshldd256:15969case X86::BI__builtin_ia32_vpshldd512:15970case X86::BI__builtin_ia32_vpshldq128:15971case X86::BI__builtin_ia32_vpshldq256:15972case X86::BI__builtin_ia32_vpshldq512:15973case X86::BI__builtin_ia32_vpshldw128:15974case X86::BI__builtin_ia32_vpshldw256:15975case X86::BI__builtin_ia32_vpshldw512:15976return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);1597715978case X86::BI__builtin_ia32_vpshrdd128:15979case X86::BI__builtin_ia32_vpshrdd256:15980case X86::BI__builtin_ia32_vpshrdd512:15981case X86::BI__builtin_ia32_vpshrdq128:15982case X86::BI__builtin_ia32_vpshrdq256:15983case X86::BI__builtin_ia32_vpshrdq512:15984case X86::BI__builtin_ia32_vpshrdw128:15985case X86::BI__builtin_ia32_vpshrdw256:15986case X86::BI__builtin_ia32_vpshrdw512:15987// Ops 0 and 1 are swapped.15988return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);1598915990case X86::BI__builtin_ia32_vpshldvd128:15991case X86::BI__builtin_ia32_vpshldvd256:15992case X86::BI__builtin_ia32_vpshldvd512:15993case X86::BI__builtin_ia32_vpshldvq128:15994case X86::BI__builtin_ia32_vpshldvq256:15995case X86::BI__builtin_ia32_vpshldvq512:15996case X86::BI__builtin_ia32_vpshldvw128:15997case X86::BI__builtin_ia32_vpshldvw256:15998case X86::BI__builtin_ia32_vpshldvw512:15999return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);1600016001case X86::BI__builtin_ia32_vpshrdvd128:16002case X86::BI__builtin_ia32_vpshrdvd256:16003case X86::BI__builtin_ia32_vpshrdvd512:16004case X86::BI__builtin_ia32_vpshrdvq128:16005case X86::BI__builtin_ia32_vpshrdvq256:16006case X86::BI__builtin_ia32_vpshrdvq512:16007case X86::BI__builtin_ia32_vpshrdvw128:16008case X86::BI__builtin_ia32_vpshrdvw256:16009case X86::BI__builtin_ia32_vpshrdvw512:16010// Ops 0 and 1 are swapped.16011return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);1601216013// Reductions16014case X86::BI__builtin_ia32_reduce_fadd_pd512:16015case X86::BI__builtin_ia32_reduce_fadd_ps512:16016case X86::BI__builtin_ia32_reduce_fadd_ph512:16017case X86::BI__builtin_ia32_reduce_fadd_ph256:16018case X86::BI__builtin_ia32_reduce_fadd_ph128: {16019Function *F =16020CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Ops[1]->getType());16021IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);16022Builder.getFastMathFlags().setAllowReassoc();16023return Builder.CreateCall(F, {Ops[0], Ops[1]});16024}16025case X86::BI__builtin_ia32_reduce_fmul_pd512:16026case X86::BI__builtin_ia32_reduce_fmul_ps512:16027case X86::BI__builtin_ia32_reduce_fmul_ph512:16028case X86::BI__builtin_ia32_reduce_fmul_ph256:16029case X86::BI__builtin_ia32_reduce_fmul_ph128: {16030Function *F =16031CGM.getIntrinsic(Intrinsic::vector_reduce_fmul, Ops[1]->getType());16032IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);16033Builder.getFastMathFlags().setAllowReassoc();16034return Builder.CreateCall(F, {Ops[0], Ops[1]});16035}16036case X86::BI__builtin_ia32_reduce_fmax_pd512:16037case X86::BI__builtin_ia32_reduce_fmax_ps512:16038case X86::BI__builtin_ia32_reduce_fmax_ph512:16039case X86::BI__builtin_ia32_reduce_fmax_ph256:16040case X86::BI__builtin_ia32_reduce_fmax_ph128: {16041Function *F =16042CGM.getIntrinsic(Intrinsic::vector_reduce_fmax, Ops[0]->getType());16043IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);16044Builder.getFastMathFlags().setNoNaNs();16045return Builder.CreateCall(F, {Ops[0]});16046}16047case X86::BI__builtin_ia32_reduce_fmin_pd512:16048case X86::BI__builtin_ia32_reduce_fmin_ps512:16049case X86::BI__builtin_ia32_reduce_fmin_ph512:16050case X86::BI__builtin_ia32_reduce_fmin_ph256:16051case X86::BI__builtin_ia32_reduce_fmin_ph128: {16052Function *F =16053CGM.getIntrinsic(Intrinsic::vector_reduce_fmin, Ops[0]->getType());16054IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);16055Builder.getFastMathFlags().setNoNaNs();16056return Builder.CreateCall(F, {Ops[0]});16057}1605816059case X86::BI__builtin_ia32_rdrand16_step:16060case X86::BI__builtin_ia32_rdrand32_step:16061case X86::BI__builtin_ia32_rdrand64_step:16062case X86::BI__builtin_ia32_rdseed16_step:16063case X86::BI__builtin_ia32_rdseed32_step:16064case X86::BI__builtin_ia32_rdseed64_step: {16065Intrinsic::ID ID;16066switch (BuiltinID) {16067default: llvm_unreachable("Unsupported intrinsic!");16068case X86::BI__builtin_ia32_rdrand16_step:16069ID = Intrinsic::x86_rdrand_16;16070break;16071case X86::BI__builtin_ia32_rdrand32_step:16072ID = Intrinsic::x86_rdrand_32;16073break;16074case X86::BI__builtin_ia32_rdrand64_step:16075ID = Intrinsic::x86_rdrand_64;16076break;16077case X86::BI__builtin_ia32_rdseed16_step:16078ID = Intrinsic::x86_rdseed_16;16079break;16080case X86::BI__builtin_ia32_rdseed32_step:16081ID = Intrinsic::x86_rdseed_32;16082break;16083case X86::BI__builtin_ia32_rdseed64_step:16084ID = Intrinsic::x86_rdseed_64;16085break;16086}1608716088Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));16089Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),16090Ops[0]);16091return Builder.CreateExtractValue(Call, 1);16092}16093case X86::BI__builtin_ia32_addcarryx_u32:16094case X86::BI__builtin_ia32_addcarryx_u64:16095case X86::BI__builtin_ia32_subborrow_u32:16096case X86::BI__builtin_ia32_subborrow_u64: {16097Intrinsic::ID IID;16098switch (BuiltinID) {16099default: llvm_unreachable("Unsupported intrinsic!");16100case X86::BI__builtin_ia32_addcarryx_u32:16101IID = Intrinsic::x86_addcarry_32;16102break;16103case X86::BI__builtin_ia32_addcarryx_u64:16104IID = Intrinsic::x86_addcarry_64;16105break;16106case X86::BI__builtin_ia32_subborrow_u32:16107IID = Intrinsic::x86_subborrow_32;16108break;16109case X86::BI__builtin_ia32_subborrow_u64:16110IID = Intrinsic::x86_subborrow_64;16111break;16112}1611316114Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID),16115{ Ops[0], Ops[1], Ops[2] });16116Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),16117Ops[3]);16118return Builder.CreateExtractValue(Call, 0);16119}1612016121case X86::BI__builtin_ia32_fpclassps128_mask:16122case X86::BI__builtin_ia32_fpclassps256_mask:16123case X86::BI__builtin_ia32_fpclassps512_mask:16124case X86::BI__builtin_ia32_fpclassph128_mask:16125case X86::BI__builtin_ia32_fpclassph256_mask:16126case X86::BI__builtin_ia32_fpclassph512_mask:16127case X86::BI__builtin_ia32_fpclasspd128_mask:16128case X86::BI__builtin_ia32_fpclasspd256_mask:16129case X86::BI__builtin_ia32_fpclasspd512_mask: {16130unsigned NumElts =16131cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();16132Value *MaskIn = Ops[2];16133Ops.erase(&Ops[2]);1613416135Intrinsic::ID ID;16136switch (BuiltinID) {16137default: llvm_unreachable("Unsupported intrinsic!");16138case X86::BI__builtin_ia32_fpclassph128_mask:16139ID = Intrinsic::x86_avx512fp16_fpclass_ph_128;16140break;16141case X86::BI__builtin_ia32_fpclassph256_mask:16142ID = Intrinsic::x86_avx512fp16_fpclass_ph_256;16143break;16144case X86::BI__builtin_ia32_fpclassph512_mask:16145ID = Intrinsic::x86_avx512fp16_fpclass_ph_512;16146break;16147case X86::BI__builtin_ia32_fpclassps128_mask:16148ID = Intrinsic::x86_avx512_fpclass_ps_128;16149break;16150case X86::BI__builtin_ia32_fpclassps256_mask:16151ID = Intrinsic::x86_avx512_fpclass_ps_256;16152break;16153case X86::BI__builtin_ia32_fpclassps512_mask:16154ID = Intrinsic::x86_avx512_fpclass_ps_512;16155break;16156case X86::BI__builtin_ia32_fpclasspd128_mask:16157ID = Intrinsic::x86_avx512_fpclass_pd_128;16158break;16159case X86::BI__builtin_ia32_fpclasspd256_mask:16160ID = Intrinsic::x86_avx512_fpclass_pd_256;16161break;16162case X86::BI__builtin_ia32_fpclasspd512_mask:16163ID = Intrinsic::x86_avx512_fpclass_pd_512;16164break;16165}1616616167Value *Fpclass = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);16168return EmitX86MaskedCompareResult(*this, Fpclass, NumElts, MaskIn);16169}1617016171case X86::BI__builtin_ia32_vp2intersect_q_512:16172case X86::BI__builtin_ia32_vp2intersect_q_256:16173case X86::BI__builtin_ia32_vp2intersect_q_128:16174case X86::BI__builtin_ia32_vp2intersect_d_512:16175case X86::BI__builtin_ia32_vp2intersect_d_256:16176case X86::BI__builtin_ia32_vp2intersect_d_128: {16177unsigned NumElts =16178cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();16179Intrinsic::ID ID;1618016181switch (BuiltinID) {16182default: llvm_unreachable("Unsupported intrinsic!");16183case X86::BI__builtin_ia32_vp2intersect_q_512:16184ID = Intrinsic::x86_avx512_vp2intersect_q_512;16185break;16186case X86::BI__builtin_ia32_vp2intersect_q_256:16187ID = Intrinsic::x86_avx512_vp2intersect_q_256;16188break;16189case X86::BI__builtin_ia32_vp2intersect_q_128:16190ID = Intrinsic::x86_avx512_vp2intersect_q_128;16191break;16192case X86::BI__builtin_ia32_vp2intersect_d_512:16193ID = Intrinsic::x86_avx512_vp2intersect_d_512;16194break;16195case X86::BI__builtin_ia32_vp2intersect_d_256:16196ID = Intrinsic::x86_avx512_vp2intersect_d_256;16197break;16198case X86::BI__builtin_ia32_vp2intersect_d_128:16199ID = Intrinsic::x86_avx512_vp2intersect_d_128;16200break;16201}1620216203Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID), {Ops[0], Ops[1]});16204Value *Result = Builder.CreateExtractValue(Call, 0);16205Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);16206Builder.CreateDefaultAlignedStore(Result, Ops[2]);1620716208Result = Builder.CreateExtractValue(Call, 1);16209Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);16210return Builder.CreateDefaultAlignedStore(Result, Ops[3]);16211}1621216213case X86::BI__builtin_ia32_vpmultishiftqb128:16214case X86::BI__builtin_ia32_vpmultishiftqb256:16215case X86::BI__builtin_ia32_vpmultishiftqb512: {16216Intrinsic::ID ID;16217switch (BuiltinID) {16218default: llvm_unreachable("Unsupported intrinsic!");16219case X86::BI__builtin_ia32_vpmultishiftqb128:16220ID = Intrinsic::x86_avx512_pmultishift_qb_128;16221break;16222case X86::BI__builtin_ia32_vpmultishiftqb256:16223ID = Intrinsic::x86_avx512_pmultishift_qb_256;16224break;16225case X86::BI__builtin_ia32_vpmultishiftqb512:16226ID = Intrinsic::x86_avx512_pmultishift_qb_512;16227break;16228}1622916230return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);16231}1623216233case X86::BI__builtin_ia32_vpshufbitqmb128_mask:16234case X86::BI__builtin_ia32_vpshufbitqmb256_mask:16235case X86::BI__builtin_ia32_vpshufbitqmb512_mask: {16236unsigned NumElts =16237cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();16238Value *MaskIn = Ops[2];16239Ops.erase(&Ops[2]);1624016241Intrinsic::ID ID;16242switch (BuiltinID) {16243default: llvm_unreachable("Unsupported intrinsic!");16244case X86::BI__builtin_ia32_vpshufbitqmb128_mask:16245ID = Intrinsic::x86_avx512_vpshufbitqmb_128;16246break;16247case X86::BI__builtin_ia32_vpshufbitqmb256_mask:16248ID = Intrinsic::x86_avx512_vpshufbitqmb_256;16249break;16250case X86::BI__builtin_ia32_vpshufbitqmb512_mask:16251ID = Intrinsic::x86_avx512_vpshufbitqmb_512;16252break;16253}1625416255Value *Shufbit = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);16256return EmitX86MaskedCompareResult(*this, Shufbit, NumElts, MaskIn);16257}1625816259// packed comparison intrinsics16260case X86::BI__builtin_ia32_cmpeqps:16261case X86::BI__builtin_ia32_cmpeqpd:16262return getVectorFCmpIR(CmpInst::FCMP_OEQ, /*IsSignaling*/false);16263case X86::BI__builtin_ia32_cmpltps:16264case X86::BI__builtin_ia32_cmpltpd:16265return getVectorFCmpIR(CmpInst::FCMP_OLT, /*IsSignaling*/true);16266case X86::BI__builtin_ia32_cmpleps:16267case X86::BI__builtin_ia32_cmplepd:16268return getVectorFCmpIR(CmpInst::FCMP_OLE, /*IsSignaling*/true);16269case X86::BI__builtin_ia32_cmpunordps:16270case X86::BI__builtin_ia32_cmpunordpd:16271return getVectorFCmpIR(CmpInst::FCMP_UNO, /*IsSignaling*/false);16272case X86::BI__builtin_ia32_cmpneqps:16273case X86::BI__builtin_ia32_cmpneqpd:16274return getVectorFCmpIR(CmpInst::FCMP_UNE, /*IsSignaling*/false);16275case X86::BI__builtin_ia32_cmpnltps:16276case X86::BI__builtin_ia32_cmpnltpd:16277return getVectorFCmpIR(CmpInst::FCMP_UGE, /*IsSignaling*/true);16278case X86::BI__builtin_ia32_cmpnleps:16279case X86::BI__builtin_ia32_cmpnlepd:16280return getVectorFCmpIR(CmpInst::FCMP_UGT, /*IsSignaling*/true);16281case X86::BI__builtin_ia32_cmpordps:16282case X86::BI__builtin_ia32_cmpordpd:16283return getVectorFCmpIR(CmpInst::FCMP_ORD, /*IsSignaling*/false);16284case X86::BI__builtin_ia32_cmpph128_mask:16285case X86::BI__builtin_ia32_cmpph256_mask:16286case X86::BI__builtin_ia32_cmpph512_mask:16287case X86::BI__builtin_ia32_cmpps128_mask:16288case X86::BI__builtin_ia32_cmpps256_mask:16289case X86::BI__builtin_ia32_cmpps512_mask:16290case X86::BI__builtin_ia32_cmppd128_mask:16291case X86::BI__builtin_ia32_cmppd256_mask:16292case X86::BI__builtin_ia32_cmppd512_mask:16293IsMaskFCmp = true;16294[[fallthrough]];16295case X86::BI__builtin_ia32_cmpps:16296case X86::BI__builtin_ia32_cmpps256:16297case X86::BI__builtin_ia32_cmppd:16298case X86::BI__builtin_ia32_cmppd256: {16299// Lowering vector comparisons to fcmp instructions, while16300// ignoring signalling behaviour requested16301// ignoring rounding mode requested16302// This is only possible if fp-model is not strict and FENV_ACCESS is off.1630316304// The third argument is the comparison condition, and integer in the16305// range [0, 31]16306unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x1f;1630716308// Lowering to IR fcmp instruction.16309// Ignoring requested signaling behaviour,16310// e.g. both _CMP_GT_OS & _CMP_GT_OQ are translated to FCMP_OGT.16311FCmpInst::Predicate Pred;16312bool IsSignaling;16313// Predicates for 16-31 repeat the 0-15 predicates. Only the signalling16314// behavior is inverted. We'll handle that after the switch.16315switch (CC & 0xf) {16316case 0x00: Pred = FCmpInst::FCMP_OEQ; IsSignaling = false; break;16317case 0x01: Pred = FCmpInst::FCMP_OLT; IsSignaling = true; break;16318case 0x02: Pred = FCmpInst::FCMP_OLE; IsSignaling = true; break;16319case 0x03: Pred = FCmpInst::FCMP_UNO; IsSignaling = false; break;16320case 0x04: Pred = FCmpInst::FCMP_UNE; IsSignaling = false; break;16321case 0x05: Pred = FCmpInst::FCMP_UGE; IsSignaling = true; break;16322case 0x06: Pred = FCmpInst::FCMP_UGT; IsSignaling = true; break;16323case 0x07: Pred = FCmpInst::FCMP_ORD; IsSignaling = false; break;16324case 0x08: Pred = FCmpInst::FCMP_UEQ; IsSignaling = false; break;16325case 0x09: Pred = FCmpInst::FCMP_ULT; IsSignaling = true; break;16326case 0x0a: Pred = FCmpInst::FCMP_ULE; IsSignaling = true; break;16327case 0x0b: Pred = FCmpInst::FCMP_FALSE; IsSignaling = false; break;16328case 0x0c: Pred = FCmpInst::FCMP_ONE; IsSignaling = false; break;16329case 0x0d: Pred = FCmpInst::FCMP_OGE; IsSignaling = true; break;16330case 0x0e: Pred = FCmpInst::FCMP_OGT; IsSignaling = true; break;16331case 0x0f: Pred = FCmpInst::FCMP_TRUE; IsSignaling = false; break;16332default: llvm_unreachable("Unhandled CC");16333}1633416335// Invert the signalling behavior for 16-31.16336if (CC & 0x10)16337IsSignaling = !IsSignaling;1633816339// If the predicate is true or false and we're using constrained intrinsics,16340// we don't have a compare intrinsic we can use. Just use the legacy X8616341// specific intrinsic.16342// If the intrinsic is mask enabled and we're using constrained intrinsics,16343// use the legacy X86 specific intrinsic.16344if (Builder.getIsFPConstrained() &&16345(Pred == FCmpInst::FCMP_TRUE || Pred == FCmpInst::FCMP_FALSE ||16346IsMaskFCmp)) {1634716348Intrinsic::ID IID;16349switch (BuiltinID) {16350default: llvm_unreachable("Unexpected builtin");16351case X86::BI__builtin_ia32_cmpps:16352IID = Intrinsic::x86_sse_cmp_ps;16353break;16354case X86::BI__builtin_ia32_cmpps256:16355IID = Intrinsic::x86_avx_cmp_ps_256;16356break;16357case X86::BI__builtin_ia32_cmppd:16358IID = Intrinsic::x86_sse2_cmp_pd;16359break;16360case X86::BI__builtin_ia32_cmppd256:16361IID = Intrinsic::x86_avx_cmp_pd_256;16362break;16363case X86::BI__builtin_ia32_cmpph128_mask:16364IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_128;16365break;16366case X86::BI__builtin_ia32_cmpph256_mask:16367IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_256;16368break;16369case X86::BI__builtin_ia32_cmpph512_mask:16370IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_512;16371break;16372case X86::BI__builtin_ia32_cmpps512_mask:16373IID = Intrinsic::x86_avx512_mask_cmp_ps_512;16374break;16375case X86::BI__builtin_ia32_cmppd512_mask:16376IID = Intrinsic::x86_avx512_mask_cmp_pd_512;16377break;16378case X86::BI__builtin_ia32_cmpps128_mask:16379IID = Intrinsic::x86_avx512_mask_cmp_ps_128;16380break;16381case X86::BI__builtin_ia32_cmpps256_mask:16382IID = Intrinsic::x86_avx512_mask_cmp_ps_256;16383break;16384case X86::BI__builtin_ia32_cmppd128_mask:16385IID = Intrinsic::x86_avx512_mask_cmp_pd_128;16386break;16387case X86::BI__builtin_ia32_cmppd256_mask:16388IID = Intrinsic::x86_avx512_mask_cmp_pd_256;16389break;16390}1639116392Function *Intr = CGM.getIntrinsic(IID);16393if (IsMaskFCmp) {16394unsigned NumElts =16395cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();16396Ops[3] = getMaskVecValue(*this, Ops[3], NumElts);16397Value *Cmp = Builder.CreateCall(Intr, Ops);16398return EmitX86MaskedCompareResult(*this, Cmp, NumElts, nullptr);16399}1640016401return Builder.CreateCall(Intr, Ops);16402}1640316404// Builtins without the _mask suffix return a vector of integers16405// of the same width as the input vectors16406if (IsMaskFCmp) {16407// We ignore SAE if strict FP is disabled. We only keep precise16408// exception behavior under strict FP.16409// NOTE: If strict FP does ever go through here a CGFPOptionsRAII16410// object will be required.16411unsigned NumElts =16412cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements();16413Value *Cmp;16414if (IsSignaling)16415Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);16416else16417Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);16418return EmitX86MaskedCompareResult(*this, Cmp, NumElts, Ops[3]);16419}1642016421return getVectorFCmpIR(Pred, IsSignaling);16422}1642316424// SSE scalar comparison intrinsics16425case X86::BI__builtin_ia32_cmpeqss:16426return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);16427case X86::BI__builtin_ia32_cmpltss:16428return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);16429case X86::BI__builtin_ia32_cmpless:16430return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);16431case X86::BI__builtin_ia32_cmpunordss:16432return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);16433case X86::BI__builtin_ia32_cmpneqss:16434return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);16435case X86::BI__builtin_ia32_cmpnltss:16436return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);16437case X86::BI__builtin_ia32_cmpnless:16438return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);16439case X86::BI__builtin_ia32_cmpordss:16440return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);16441case X86::BI__builtin_ia32_cmpeqsd:16442return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);16443case X86::BI__builtin_ia32_cmpltsd:16444return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);16445case X86::BI__builtin_ia32_cmplesd:16446return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);16447case X86::BI__builtin_ia32_cmpunordsd:16448return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);16449case X86::BI__builtin_ia32_cmpneqsd:16450return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);16451case X86::BI__builtin_ia32_cmpnltsd:16452return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);16453case X86::BI__builtin_ia32_cmpnlesd:16454return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);16455case X86::BI__builtin_ia32_cmpordsd:16456return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);1645716458// f16c half2float intrinsics16459case X86::BI__builtin_ia32_vcvtph2ps:16460case X86::BI__builtin_ia32_vcvtph2ps256:16461case X86::BI__builtin_ia32_vcvtph2ps_mask:16462case X86::BI__builtin_ia32_vcvtph2ps256_mask:16463case X86::BI__builtin_ia32_vcvtph2ps512_mask: {16464CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);16465return EmitX86CvtF16ToFloatExpr(*this, Ops, ConvertType(E->getType()));16466}1646716468// AVX512 bf16 intrinsics16469case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: {16470Ops[2] = getMaskVecValue(16471*this, Ops[2],16472cast<llvm::FixedVectorType>(Ops[0]->getType())->getNumElements());16473Intrinsic::ID IID = Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128;16474return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);16475}16476case X86::BI__builtin_ia32_cvtsbf162ss_32:16477return Builder.CreateFPExt(Ops[0], Builder.getFloatTy());1647816479case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:16480case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: {16481Intrinsic::ID IID;16482switch (BuiltinID) {16483default: llvm_unreachable("Unsupported intrinsic!");16484case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:16485IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_256;16486break;16487case X86::BI__builtin_ia32_cvtneps2bf16_512_mask:16488IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_512;16489break;16490}16491Value *Res = Builder.CreateCall(CGM.getIntrinsic(IID), Ops[0]);16492return EmitX86Select(*this, Ops[2], Res, Ops[1]);16493}1649416495case X86::BI__cpuid:16496case X86::BI__cpuidex: {16497Value *FuncId = EmitScalarExpr(E->getArg(1));16498Value *SubFuncId = BuiltinID == X86::BI__cpuidex16499? EmitScalarExpr(E->getArg(2))16500: llvm::ConstantInt::get(Int32Ty, 0);1650116502llvm::StructType *CpuidRetTy =16503llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, Int32Ty);16504llvm::FunctionType *FTy =16505llvm::FunctionType::get(CpuidRetTy, {Int32Ty, Int32Ty}, false);1650616507StringRef Asm, Constraints;16508if (getTarget().getTriple().getArch() == llvm::Triple::x86) {16509Asm = "cpuid";16510Constraints = "={ax},={bx},={cx},={dx},{ax},{cx}";16511} else {16512// x86-64 uses %rbx as the base register, so preserve it.16513Asm = "xchgq %rbx, ${1:q}\n"16514"cpuid\n"16515"xchgq %rbx, ${1:q}";16516Constraints = "={ax},=r,={cx},={dx},0,2";16517}1651816519llvm::InlineAsm *IA = llvm::InlineAsm::get(FTy, Asm, Constraints,16520/*hasSideEffects=*/false);16521Value *IACall = Builder.CreateCall(IA, {FuncId, SubFuncId});16522Value *BasePtr = EmitScalarExpr(E->getArg(0));16523Value *Store = nullptr;16524for (unsigned i = 0; i < 4; i++) {16525Value *Extracted = Builder.CreateExtractValue(IACall, i);16526Value *StorePtr = Builder.CreateConstInBoundsGEP1_32(Int32Ty, BasePtr, i);16527Store = Builder.CreateAlignedStore(Extracted, StorePtr, getIntAlign());16528}1652916530// Return the last store instruction to signal that we have emitted the16531// the intrinsic.16532return Store;16533}1653416535case X86::BI__emul:16536case X86::BI__emulu: {16537llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64);16538bool isSigned = (BuiltinID == X86::BI__emul);16539Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned);16540Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned);16541return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned);16542}16543case X86::BI__mulh:16544case X86::BI__umulh:16545case X86::BI_mul128:16546case X86::BI_umul128: {16547llvm::Type *ResType = ConvertType(E->getType());16548llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);1654916550bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128);16551Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned);16552Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned);1655316554Value *MulResult, *HigherBits;16555if (IsSigned) {16556MulResult = Builder.CreateNSWMul(LHS, RHS);16557HigherBits = Builder.CreateAShr(MulResult, 64);16558} else {16559MulResult = Builder.CreateNUWMul(LHS, RHS);16560HigherBits = Builder.CreateLShr(MulResult, 64);16561}16562HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);1656316564if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh)16565return HigherBits;1656616567Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2));16568Builder.CreateStore(HigherBits, HighBitsAddress);16569return Builder.CreateIntCast(MulResult, ResType, IsSigned);16570}1657116572case X86::BI__faststorefence: {16573return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,16574llvm::SyncScope::System);16575}16576case X86::BI__shiftleft128:16577case X86::BI__shiftright128: {16578llvm::Function *F = CGM.getIntrinsic(16579BuiltinID == X86::BI__shiftleft128 ? Intrinsic::fshl : Intrinsic::fshr,16580Int64Ty);16581// Flip low/high ops and zero-extend amount to matching type.16582// shiftleft128(Low, High, Amt) -> fshl(High, Low, Amt)16583// shiftright128(Low, High, Amt) -> fshr(High, Low, Amt)16584std::swap(Ops[0], Ops[1]);16585Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);16586return Builder.CreateCall(F, Ops);16587}16588case X86::BI_ReadWriteBarrier:16589case X86::BI_ReadBarrier:16590case X86::BI_WriteBarrier: {16591return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,16592llvm::SyncScope::SingleThread);16593}1659416595case X86::BI_AddressOfReturnAddress: {16596Function *F =16597CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy);16598return Builder.CreateCall(F);16599}16600case X86::BI__stosb: {16601// We treat __stosb as a volatile memset - it may not generate "rep stosb"16602// instruction, but it will create a memset that won't be optimized away.16603return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], Align(1), true);16604}16605case X86::BI__ud2:16606// llvm.trap makes a ud2a instruction on x86.16607return EmitTrapCall(Intrinsic::trap);16608case X86::BI__int2c: {16609// This syscall signals a driver assertion failure in x86 NT kernels.16610llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);16611llvm::InlineAsm *IA =16612llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*hasSideEffects=*/true);16613llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(16614getLLVMContext(), llvm::AttributeList::FunctionIndex,16615llvm::Attribute::NoReturn);16616llvm::CallInst *CI = Builder.CreateCall(IA);16617CI->setAttributes(NoReturnAttr);16618return CI;16619}16620case X86::BI__readfsbyte:16621case X86::BI__readfsword:16622case X86::BI__readfsdword:16623case X86::BI__readfsqword: {16624llvm::Type *IntTy = ConvertType(E->getType());16625Value *Ptr = Builder.CreateIntToPtr(16626Ops[0], llvm::PointerType::get(getLLVMContext(), 257));16627LoadInst *Load = Builder.CreateAlignedLoad(16628IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));16629Load->setVolatile(true);16630return Load;16631}16632case X86::BI__readgsbyte:16633case X86::BI__readgsword:16634case X86::BI__readgsdword:16635case X86::BI__readgsqword: {16636llvm::Type *IntTy = ConvertType(E->getType());16637Value *Ptr = Builder.CreateIntToPtr(16638Ops[0], llvm::PointerType::get(getLLVMContext(), 256));16639LoadInst *Load = Builder.CreateAlignedLoad(16640IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));16641Load->setVolatile(true);16642return Load;16643}16644case X86::BI__builtin_ia32_encodekey128_u32: {16645Intrinsic::ID IID = Intrinsic::x86_encodekey128;1664616647Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1]});1664816649for (int i = 0; i < 3; ++i) {16650Value *Extract = Builder.CreateExtractValue(Call, i + 1);16651Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[2], i * 16);16652Builder.CreateAlignedStore(Extract, Ptr, Align(1));16653}1665416655return Builder.CreateExtractValue(Call, 0);16656}16657case X86::BI__builtin_ia32_encodekey256_u32: {16658Intrinsic::ID IID = Intrinsic::x86_encodekey256;1665916660Value *Call =16661Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1], Ops[2]});1666216663for (int i = 0; i < 4; ++i) {16664Value *Extract = Builder.CreateExtractValue(Call, i + 1);16665Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[3], i * 16);16666Builder.CreateAlignedStore(Extract, Ptr, Align(1));16667}1666816669return Builder.CreateExtractValue(Call, 0);16670}16671case X86::BI__builtin_ia32_aesenc128kl_u8:16672case X86::BI__builtin_ia32_aesdec128kl_u8:16673case X86::BI__builtin_ia32_aesenc256kl_u8:16674case X86::BI__builtin_ia32_aesdec256kl_u8: {16675Intrinsic::ID IID;16676StringRef BlockName;16677switch (BuiltinID) {16678default:16679llvm_unreachable("Unexpected builtin");16680case X86::BI__builtin_ia32_aesenc128kl_u8:16681IID = Intrinsic::x86_aesenc128kl;16682BlockName = "aesenc128kl";16683break;16684case X86::BI__builtin_ia32_aesdec128kl_u8:16685IID = Intrinsic::x86_aesdec128kl;16686BlockName = "aesdec128kl";16687break;16688case X86::BI__builtin_ia32_aesenc256kl_u8:16689IID = Intrinsic::x86_aesenc256kl;16690BlockName = "aesenc256kl";16691break;16692case X86::BI__builtin_ia32_aesdec256kl_u8:16693IID = Intrinsic::x86_aesdec256kl;16694BlockName = "aesdec256kl";16695break;16696}1669716698Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[1], Ops[2]});1669916700BasicBlock *NoError =16701createBasicBlock(BlockName + "_no_error", this->CurFn);16702BasicBlock *Error = createBasicBlock(BlockName + "_error", this->CurFn);16703BasicBlock *End = createBasicBlock(BlockName + "_end", this->CurFn);1670416705Value *Ret = Builder.CreateExtractValue(Call, 0);16706Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty());16707Value *Out = Builder.CreateExtractValue(Call, 1);16708Builder.CreateCondBr(Succ, NoError, Error);1670916710Builder.SetInsertPoint(NoError);16711Builder.CreateDefaultAlignedStore(Out, Ops[0]);16712Builder.CreateBr(End);1671316714Builder.SetInsertPoint(Error);16715Constant *Zero = llvm::Constant::getNullValue(Out->getType());16716Builder.CreateDefaultAlignedStore(Zero, Ops[0]);16717Builder.CreateBr(End);1671816719Builder.SetInsertPoint(End);16720return Builder.CreateExtractValue(Call, 0);16721}16722case X86::BI__builtin_ia32_aesencwide128kl_u8:16723case X86::BI__builtin_ia32_aesdecwide128kl_u8:16724case X86::BI__builtin_ia32_aesencwide256kl_u8:16725case X86::BI__builtin_ia32_aesdecwide256kl_u8: {16726Intrinsic::ID IID;16727StringRef BlockName;16728switch (BuiltinID) {16729case X86::BI__builtin_ia32_aesencwide128kl_u8:16730IID = Intrinsic::x86_aesencwide128kl;16731BlockName = "aesencwide128kl";16732break;16733case X86::BI__builtin_ia32_aesdecwide128kl_u8:16734IID = Intrinsic::x86_aesdecwide128kl;16735BlockName = "aesdecwide128kl";16736break;16737case X86::BI__builtin_ia32_aesencwide256kl_u8:16738IID = Intrinsic::x86_aesencwide256kl;16739BlockName = "aesencwide256kl";16740break;16741case X86::BI__builtin_ia32_aesdecwide256kl_u8:16742IID = Intrinsic::x86_aesdecwide256kl;16743BlockName = "aesdecwide256kl";16744break;16745}1674616747llvm::Type *Ty = FixedVectorType::get(Builder.getInt64Ty(), 2);16748Value *InOps[9];16749InOps[0] = Ops[2];16750for (int i = 0; i != 8; ++i) {16751Value *Ptr = Builder.CreateConstGEP1_32(Ty, Ops[1], i);16752InOps[i + 1] = Builder.CreateAlignedLoad(Ty, Ptr, Align(16));16753}1675416755Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), InOps);1675616757BasicBlock *NoError =16758createBasicBlock(BlockName + "_no_error", this->CurFn);16759BasicBlock *Error = createBasicBlock(BlockName + "_error", this->CurFn);16760BasicBlock *End = createBasicBlock(BlockName + "_end", this->CurFn);1676116762Value *Ret = Builder.CreateExtractValue(Call, 0);16763Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty());16764Builder.CreateCondBr(Succ, NoError, Error);1676516766Builder.SetInsertPoint(NoError);16767for (int i = 0; i != 8; ++i) {16768Value *Extract = Builder.CreateExtractValue(Call, i + 1);16769Value *Ptr = Builder.CreateConstGEP1_32(Extract->getType(), Ops[0], i);16770Builder.CreateAlignedStore(Extract, Ptr, Align(16));16771}16772Builder.CreateBr(End);1677316774Builder.SetInsertPoint(Error);16775for (int i = 0; i != 8; ++i) {16776Value *Out = Builder.CreateExtractValue(Call, i + 1);16777Constant *Zero = llvm::Constant::getNullValue(Out->getType());16778Value *Ptr = Builder.CreateConstGEP1_32(Out->getType(), Ops[0], i);16779Builder.CreateAlignedStore(Zero, Ptr, Align(16));16780}16781Builder.CreateBr(End);1678216783Builder.SetInsertPoint(End);16784return Builder.CreateExtractValue(Call, 0);16785}16786case X86::BI__builtin_ia32_vfcmaddcph512_mask:16787IsConjFMA = true;16788[[fallthrough]];16789case X86::BI__builtin_ia32_vfmaddcph512_mask: {16790Intrinsic::ID IID = IsConjFMA16791? Intrinsic::x86_avx512fp16_mask_vfcmadd_cph_51216792: Intrinsic::x86_avx512fp16_mask_vfmadd_cph_512;16793Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);16794return EmitX86Select(*this, Ops[3], Call, Ops[0]);16795}16796case X86::BI__builtin_ia32_vfcmaddcsh_round_mask:16797IsConjFMA = true;16798[[fallthrough]];16799case X86::BI__builtin_ia32_vfmaddcsh_round_mask: {16800Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh16801: Intrinsic::x86_avx512fp16_mask_vfmadd_csh;16802Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);16803Value *And = Builder.CreateAnd(Ops[3], llvm::ConstantInt::get(Int8Ty, 1));16804return EmitX86Select(*this, And, Call, Ops[0]);16805}16806case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3:16807IsConjFMA = true;16808[[fallthrough]];16809case X86::BI__builtin_ia32_vfmaddcsh_round_mask3: {16810Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh16811: Intrinsic::x86_avx512fp16_mask_vfmadd_csh;16812Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);16813static constexpr int Mask[] = {0, 5, 6, 7};16814return Builder.CreateShuffleVector(Call, Ops[2], Mask);16815}16816case X86::BI__builtin_ia32_prefetchi:16817return Builder.CreateCall(16818CGM.getIntrinsic(Intrinsic::prefetch, Ops[0]->getType()),16819{Ops[0], llvm::ConstantInt::get(Int32Ty, 0), Ops[1],16820llvm::ConstantInt::get(Int32Ty, 0)});16821}16822}1682316824Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,16825const CallExpr *E) {16826// Do not emit the builtin arguments in the arguments of a function call,16827// because the evaluation order of function arguments is not specified in C++.16828// This is important when testing to ensure the arguments are emitted in the16829// same order every time. Eg:16830// Instead of:16831// return Builder.CreateFDiv(EmitScalarExpr(E->getArg(0)),16832// EmitScalarExpr(E->getArg(1)), "swdiv");16833// Use:16834// Value *Op0 = EmitScalarExpr(E->getArg(0));16835// Value *Op1 = EmitScalarExpr(E->getArg(1));16836// return Builder.CreateFDiv(Op0, Op1, "swdiv")1683716838Intrinsic::ID ID = Intrinsic::not_intrinsic;1683916840#include "llvm/TargetParser/PPCTargetParser.def"16841auto GenAIXPPCBuiltinCpuExpr = [&](unsigned SupportMethod, unsigned FieldIdx,16842unsigned Mask, CmpInst::Predicate CompOp,16843unsigned OpValue) -> Value * {16844if (SupportMethod == BUILTIN_PPC_FALSE)16845return llvm::ConstantInt::getFalse(ConvertType(E->getType()));1684616847if (SupportMethod == BUILTIN_PPC_TRUE)16848return llvm::ConstantInt::getTrue(ConvertType(E->getType()));1684916850assert(SupportMethod <= SYS_CALL && "Invalid value for SupportMethod.");1685116852llvm::Value *FieldValue = nullptr;16853if (SupportMethod == USE_SYS_CONF) {16854llvm::Type *STy = llvm::StructType::get(PPC_SYSTEMCONFIG_TYPE);16855llvm::Constant *SysConf =16856CGM.CreateRuntimeVariable(STy, "_system_configuration");1685716858// Grab the appropriate field from _system_configuration.16859llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0),16860ConstantInt::get(Int32Ty, FieldIdx)};1686116862FieldValue = Builder.CreateInBoundsGEP(STy, SysConf, Idxs);16863FieldValue = Builder.CreateAlignedLoad(Int32Ty, FieldValue,16864CharUnits::fromQuantity(4));16865} else if (SupportMethod == SYS_CALL) {16866llvm::FunctionType *FTy =16867llvm::FunctionType::get(Int64Ty, Int32Ty, false);16868llvm::FunctionCallee Func =16869CGM.CreateRuntimeFunction(FTy, "getsystemcfg");1687016871FieldValue =16872Builder.CreateCall(Func, {ConstantInt::get(Int32Ty, FieldIdx)});16873}16874assert(FieldValue &&16875"SupportMethod value is not defined in PPCTargetParser.def.");1687616877if (Mask)16878FieldValue = Builder.CreateAnd(FieldValue, Mask);1687916880llvm::Type *ValueType = FieldValue->getType();16881bool IsValueType64Bit = ValueType->isIntegerTy(64);16882assert(16883(IsValueType64Bit || ValueType->isIntegerTy(32)) &&16884"Only 32/64-bit integers are supported in GenAIXPPCBuiltinCpuExpr().");1688516886return Builder.CreateICmp(16887CompOp, FieldValue,16888ConstantInt::get(IsValueType64Bit ? Int64Ty : Int32Ty, OpValue));16889};1689016891switch (BuiltinID) {16892default: return nullptr;1689316894case Builtin::BI__builtin_cpu_is: {16895const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();16896StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();16897llvm::Triple Triple = getTarget().getTriple();1689816899unsigned LinuxSupportMethod, LinuxIDValue, AIXSupportMethod, AIXIDValue;16900typedef std::tuple<unsigned, unsigned, unsigned, unsigned> CPUInfo;1690116902std::tie(LinuxSupportMethod, LinuxIDValue, AIXSupportMethod, AIXIDValue) =16903static_cast<CPUInfo>(StringSwitch<CPUInfo>(CPUStr)16904#define PPC_CPU(NAME, Linux_SUPPORT_METHOD, LinuxID, AIX_SUPPORT_METHOD, \16905AIXID) \16906.Case(NAME, {Linux_SUPPORT_METHOD, LinuxID, AIX_SUPPORT_METHOD, AIXID})16907#include "llvm/TargetParser/PPCTargetParser.def"16908.Default({BUILTIN_PPC_UNSUPPORTED, 0,16909BUILTIN_PPC_UNSUPPORTED, 0}));1691016911if (Triple.isOSAIX()) {16912assert((AIXSupportMethod != BUILTIN_PPC_UNSUPPORTED) &&16913"Invalid CPU name. Missed by SemaChecking?");16914return GenAIXPPCBuiltinCpuExpr(AIXSupportMethod, AIX_SYSCON_IMPL_IDX, 0,16915ICmpInst::ICMP_EQ, AIXIDValue);16916}1691716918assert(Triple.isOSLinux() &&16919"__builtin_cpu_is() is only supported for AIX and Linux.");1692016921assert((LinuxSupportMethod != BUILTIN_PPC_UNSUPPORTED) &&16922"Invalid CPU name. Missed by SemaChecking?");1692316924if (LinuxSupportMethod == BUILTIN_PPC_FALSE)16925return llvm::ConstantInt::getFalse(ConvertType(E->getType()));1692616927Value *Op0 = llvm::ConstantInt::get(Int32Ty, PPC_FAWORD_CPUID);16928llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld);16929Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_is");16930return Builder.CreateICmpEQ(TheCall,16931llvm::ConstantInt::get(Int32Ty, LinuxIDValue));16932}16933case Builtin::BI__builtin_cpu_supports: {16934llvm::Triple Triple = getTarget().getTriple();16935const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();16936StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();16937if (Triple.isOSAIX()) {16938unsigned SupportMethod, FieldIdx, Mask, Value;16939CmpInst::Predicate CompOp;16940typedef std::tuple<unsigned, unsigned, unsigned, CmpInst::Predicate,16941unsigned>16942CPUSupportType;16943std::tie(SupportMethod, FieldIdx, Mask, CompOp, Value) =16944static_cast<CPUSupportType>(StringSwitch<CPUSupportType>(CPUStr)16945#define PPC_AIX_FEATURE(NAME, DESC, SUPPORT_METHOD, INDEX, MASK, COMP_OP, \16946VALUE) \16947.Case(NAME, {SUPPORT_METHOD, INDEX, MASK, COMP_OP, VALUE})16948#include "llvm/TargetParser/PPCTargetParser.def"16949.Default({BUILTIN_PPC_FALSE, 0, 0,16950CmpInst::Predicate(), 0}));16951return GenAIXPPCBuiltinCpuExpr(SupportMethod, FieldIdx, Mask, CompOp,16952Value);16953}1695416955assert(Triple.isOSLinux() &&16956"__builtin_cpu_supports() is only supported for AIX and Linux.");16957unsigned FeatureWord;16958unsigned BitMask;16959std::tie(FeatureWord, BitMask) =16960StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)16961#define PPC_LNX_FEATURE(Name, Description, EnumName, Bitmask, FA_WORD) \16962.Case(Name, {FA_WORD, Bitmask})16963#include "llvm/TargetParser/PPCTargetParser.def"16964.Default({0, 0});16965if (!BitMask)16966return Builder.getFalse();16967Value *Op0 = llvm::ConstantInt::get(Int32Ty, FeatureWord);16968llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld);16969Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_supports");16970Value *Mask =16971Builder.CreateAnd(TheCall, llvm::ConstantInt::get(Int32Ty, BitMask));16972return Builder.CreateICmpNE(Mask, llvm::Constant::getNullValue(Int32Ty));16973#undef PPC_FAWORD_HWCAP16974#undef PPC_FAWORD_HWCAP216975#undef PPC_FAWORD_CPUID16976}1697716978// __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we16979// call __builtin_readcyclecounter.16980case PPC::BI__builtin_ppc_get_timebase:16981return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter));1698216983// vec_ld, vec_xl_be, vec_lvsl, vec_lvsr16984case PPC::BI__builtin_altivec_lvx:16985case PPC::BI__builtin_altivec_lvxl:16986case PPC::BI__builtin_altivec_lvebx:16987case PPC::BI__builtin_altivec_lvehx:16988case PPC::BI__builtin_altivec_lvewx:16989case PPC::BI__builtin_altivec_lvsl:16990case PPC::BI__builtin_altivec_lvsr:16991case PPC::BI__builtin_vsx_lxvd2x:16992case PPC::BI__builtin_vsx_lxvw4x:16993case PPC::BI__builtin_vsx_lxvd2x_be:16994case PPC::BI__builtin_vsx_lxvw4x_be:16995case PPC::BI__builtin_vsx_lxvl:16996case PPC::BI__builtin_vsx_lxvll:16997{16998SmallVector<Value *, 2> Ops;16999Ops.push_back(EmitScalarExpr(E->getArg(0)));17000Ops.push_back(EmitScalarExpr(E->getArg(1)));17001if (!(BuiltinID == PPC::BI__builtin_vsx_lxvl ||17002BuiltinID == PPC::BI__builtin_vsx_lxvll)) {17003Ops[0] = Builder.CreateGEP(Int8Ty, Ops[1], Ops[0]);17004Ops.pop_back();17005}1700617007switch (BuiltinID) {17008default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");17009case PPC::BI__builtin_altivec_lvx:17010ID = Intrinsic::ppc_altivec_lvx;17011break;17012case PPC::BI__builtin_altivec_lvxl:17013ID = Intrinsic::ppc_altivec_lvxl;17014break;17015case PPC::BI__builtin_altivec_lvebx:17016ID = Intrinsic::ppc_altivec_lvebx;17017break;17018case PPC::BI__builtin_altivec_lvehx:17019ID = Intrinsic::ppc_altivec_lvehx;17020break;17021case PPC::BI__builtin_altivec_lvewx:17022ID = Intrinsic::ppc_altivec_lvewx;17023break;17024case PPC::BI__builtin_altivec_lvsl:17025ID = Intrinsic::ppc_altivec_lvsl;17026break;17027case PPC::BI__builtin_altivec_lvsr:17028ID = Intrinsic::ppc_altivec_lvsr;17029break;17030case PPC::BI__builtin_vsx_lxvd2x:17031ID = Intrinsic::ppc_vsx_lxvd2x;17032break;17033case PPC::BI__builtin_vsx_lxvw4x:17034ID = Intrinsic::ppc_vsx_lxvw4x;17035break;17036case PPC::BI__builtin_vsx_lxvd2x_be:17037ID = Intrinsic::ppc_vsx_lxvd2x_be;17038break;17039case PPC::BI__builtin_vsx_lxvw4x_be:17040ID = Intrinsic::ppc_vsx_lxvw4x_be;17041break;17042case PPC::BI__builtin_vsx_lxvl:17043ID = Intrinsic::ppc_vsx_lxvl;17044break;17045case PPC::BI__builtin_vsx_lxvll:17046ID = Intrinsic::ppc_vsx_lxvll;17047break;17048}17049llvm::Function *F = CGM.getIntrinsic(ID);17050return Builder.CreateCall(F, Ops, "");17051}1705217053// vec_st, vec_xst_be17054case PPC::BI__builtin_altivec_stvx:17055case PPC::BI__builtin_altivec_stvxl:17056case PPC::BI__builtin_altivec_stvebx:17057case PPC::BI__builtin_altivec_stvehx:17058case PPC::BI__builtin_altivec_stvewx:17059case PPC::BI__builtin_vsx_stxvd2x:17060case PPC::BI__builtin_vsx_stxvw4x:17061case PPC::BI__builtin_vsx_stxvd2x_be:17062case PPC::BI__builtin_vsx_stxvw4x_be:17063case PPC::BI__builtin_vsx_stxvl:17064case PPC::BI__builtin_vsx_stxvll:17065{17066SmallVector<Value *, 3> Ops;17067Ops.push_back(EmitScalarExpr(E->getArg(0)));17068Ops.push_back(EmitScalarExpr(E->getArg(1)));17069Ops.push_back(EmitScalarExpr(E->getArg(2)));17070if (!(BuiltinID == PPC::BI__builtin_vsx_stxvl ||17071BuiltinID == PPC::BI__builtin_vsx_stxvll)) {17072Ops[1] = Builder.CreateGEP(Int8Ty, Ops[2], Ops[1]);17073Ops.pop_back();17074}1707517076switch (BuiltinID) {17077default: llvm_unreachable("Unsupported st intrinsic!");17078case PPC::BI__builtin_altivec_stvx:17079ID = Intrinsic::ppc_altivec_stvx;17080break;17081case PPC::BI__builtin_altivec_stvxl:17082ID = Intrinsic::ppc_altivec_stvxl;17083break;17084case PPC::BI__builtin_altivec_stvebx:17085ID = Intrinsic::ppc_altivec_stvebx;17086break;17087case PPC::BI__builtin_altivec_stvehx:17088ID = Intrinsic::ppc_altivec_stvehx;17089break;17090case PPC::BI__builtin_altivec_stvewx:17091ID = Intrinsic::ppc_altivec_stvewx;17092break;17093case PPC::BI__builtin_vsx_stxvd2x:17094ID = Intrinsic::ppc_vsx_stxvd2x;17095break;17096case PPC::BI__builtin_vsx_stxvw4x:17097ID = Intrinsic::ppc_vsx_stxvw4x;17098break;17099case PPC::BI__builtin_vsx_stxvd2x_be:17100ID = Intrinsic::ppc_vsx_stxvd2x_be;17101break;17102case PPC::BI__builtin_vsx_stxvw4x_be:17103ID = Intrinsic::ppc_vsx_stxvw4x_be;17104break;17105case PPC::BI__builtin_vsx_stxvl:17106ID = Intrinsic::ppc_vsx_stxvl;17107break;17108case PPC::BI__builtin_vsx_stxvll:17109ID = Intrinsic::ppc_vsx_stxvll;17110break;17111}17112llvm::Function *F = CGM.getIntrinsic(ID);17113return Builder.CreateCall(F, Ops, "");17114}17115case PPC::BI__builtin_vsx_ldrmb: {17116// Essentially boils down to performing an unaligned VMX load sequence so17117// as to avoid crossing a page boundary and then shuffling the elements17118// into the right side of the vector register.17119Value *Op0 = EmitScalarExpr(E->getArg(0));17120Value *Op1 = EmitScalarExpr(E->getArg(1));17121int64_t NumBytes = cast<ConstantInt>(Op1)->getZExtValue();17122llvm::Type *ResTy = ConvertType(E->getType());17123bool IsLE = getTarget().isLittleEndian();1712417125// If the user wants the entire vector, just load the entire vector.17126if (NumBytes == 16) {17127Value *LD =17128Builder.CreateLoad(Address(Op0, ResTy, CharUnits::fromQuantity(1)));17129if (!IsLE)17130return LD;1713117132// Reverse the bytes on LE.17133SmallVector<int, 16> RevMask;17134for (int Idx = 0; Idx < 16; Idx++)17135RevMask.push_back(15 - Idx);17136return Builder.CreateShuffleVector(LD, LD, RevMask);17137}1713817139llvm::Function *Lvx = CGM.getIntrinsic(Intrinsic::ppc_altivec_lvx);17140llvm::Function *Lvs = CGM.getIntrinsic(IsLE ? Intrinsic::ppc_altivec_lvsr17141: Intrinsic::ppc_altivec_lvsl);17142llvm::Function *Vperm = CGM.getIntrinsic(Intrinsic::ppc_altivec_vperm);17143Value *HiMem = Builder.CreateGEP(17144Int8Ty, Op0, ConstantInt::get(Op1->getType(), NumBytes - 1));17145Value *LoLd = Builder.CreateCall(Lvx, Op0, "ld.lo");17146Value *HiLd = Builder.CreateCall(Lvx, HiMem, "ld.hi");17147Value *Mask1 = Builder.CreateCall(Lvs, Op0, "mask1");1714817149Op0 = IsLE ? HiLd : LoLd;17150Op1 = IsLE ? LoLd : HiLd;17151Value *AllElts = Builder.CreateCall(Vperm, {Op0, Op1, Mask1}, "shuffle1");17152Constant *Zero = llvm::Constant::getNullValue(IsLE ? ResTy : AllElts->getType());1715317154if (IsLE) {17155SmallVector<int, 16> Consts;17156for (int Idx = 0; Idx < 16; Idx++) {17157int Val = (NumBytes - Idx - 1 >= 0) ? (NumBytes - Idx - 1)17158: 16 - (NumBytes - Idx);17159Consts.push_back(Val);17160}17161return Builder.CreateShuffleVector(Builder.CreateBitCast(AllElts, ResTy),17162Zero, Consts);17163}17164SmallVector<Constant *, 16> Consts;17165for (int Idx = 0; Idx < 16; Idx++)17166Consts.push_back(Builder.getInt8(NumBytes + Idx));17167Value *Mask2 = ConstantVector::get(Consts);17168return Builder.CreateBitCast(17169Builder.CreateCall(Vperm, {Zero, AllElts, Mask2}, "shuffle2"), ResTy);17170}17171case PPC::BI__builtin_vsx_strmb: {17172Value *Op0 = EmitScalarExpr(E->getArg(0));17173Value *Op1 = EmitScalarExpr(E->getArg(1));17174Value *Op2 = EmitScalarExpr(E->getArg(2));17175int64_t NumBytes = cast<ConstantInt>(Op1)->getZExtValue();17176bool IsLE = getTarget().isLittleEndian();17177auto StoreSubVec = [&](unsigned Width, unsigned Offset, unsigned EltNo) {17178// Storing the whole vector, simply store it on BE and reverse bytes and17179// store on LE.17180if (Width == 16) {17181Value *StVec = Op2;17182if (IsLE) {17183SmallVector<int, 16> RevMask;17184for (int Idx = 0; Idx < 16; Idx++)17185RevMask.push_back(15 - Idx);17186StVec = Builder.CreateShuffleVector(Op2, Op2, RevMask);17187}17188return Builder.CreateStore(17189StVec, Address(Op0, Op2->getType(), CharUnits::fromQuantity(1)));17190}17191auto *ConvTy = Int64Ty;17192unsigned NumElts = 0;17193switch (Width) {17194default:17195llvm_unreachable("width for stores must be a power of 2");17196case 8:17197ConvTy = Int64Ty;17198NumElts = 2;17199break;17200case 4:17201ConvTy = Int32Ty;17202NumElts = 4;17203break;17204case 2:17205ConvTy = Int16Ty;17206NumElts = 8;17207break;17208case 1:17209ConvTy = Int8Ty;17210NumElts = 16;17211break;17212}17213Value *Vec = Builder.CreateBitCast(17214Op2, llvm::FixedVectorType::get(ConvTy, NumElts));17215Value *Ptr =17216Builder.CreateGEP(Int8Ty, Op0, ConstantInt::get(Int64Ty, Offset));17217Value *Elt = Builder.CreateExtractElement(Vec, EltNo);17218if (IsLE && Width > 1) {17219Function *F = CGM.getIntrinsic(Intrinsic::bswap, ConvTy);17220Elt = Builder.CreateCall(F, Elt);17221}17222return Builder.CreateStore(17223Elt, Address(Ptr, ConvTy, CharUnits::fromQuantity(1)));17224};17225unsigned Stored = 0;17226unsigned RemainingBytes = NumBytes;17227Value *Result;17228if (NumBytes == 16)17229return StoreSubVec(16, 0, 0);17230if (NumBytes >= 8) {17231Result = StoreSubVec(8, NumBytes - 8, IsLE ? 0 : 1);17232RemainingBytes -= 8;17233Stored += 8;17234}17235if (RemainingBytes >= 4) {17236Result = StoreSubVec(4, NumBytes - Stored - 4,17237IsLE ? (Stored >> 2) : 3 - (Stored >> 2));17238RemainingBytes -= 4;17239Stored += 4;17240}17241if (RemainingBytes >= 2) {17242Result = StoreSubVec(2, NumBytes - Stored - 2,17243IsLE ? (Stored >> 1) : 7 - (Stored >> 1));17244RemainingBytes -= 2;17245Stored += 2;17246}17247if (RemainingBytes)17248Result =17249StoreSubVec(1, NumBytes - Stored - 1, IsLE ? Stored : 15 - Stored);17250return Result;17251}17252// Square root17253case PPC::BI__builtin_vsx_xvsqrtsp:17254case PPC::BI__builtin_vsx_xvsqrtdp: {17255llvm::Type *ResultType = ConvertType(E->getType());17256Value *X = EmitScalarExpr(E->getArg(0));17257if (Builder.getIsFPConstrained()) {17258llvm::Function *F = CGM.getIntrinsic(17259Intrinsic::experimental_constrained_sqrt, ResultType);17260return Builder.CreateConstrainedFPCall(F, X);17261} else {17262llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);17263return Builder.CreateCall(F, X);17264}17265}17266// Count leading zeros17267case PPC::BI__builtin_altivec_vclzb:17268case PPC::BI__builtin_altivec_vclzh:17269case PPC::BI__builtin_altivec_vclzw:17270case PPC::BI__builtin_altivec_vclzd: {17271llvm::Type *ResultType = ConvertType(E->getType());17272Value *X = EmitScalarExpr(E->getArg(0));17273Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);17274Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);17275return Builder.CreateCall(F, {X, Undef});17276}17277case PPC::BI__builtin_altivec_vctzb:17278case PPC::BI__builtin_altivec_vctzh:17279case PPC::BI__builtin_altivec_vctzw:17280case PPC::BI__builtin_altivec_vctzd: {17281llvm::Type *ResultType = ConvertType(E->getType());17282Value *X = EmitScalarExpr(E->getArg(0));17283Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);17284Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);17285return Builder.CreateCall(F, {X, Undef});17286}17287case PPC::BI__builtin_altivec_vinsd:17288case PPC::BI__builtin_altivec_vinsw:17289case PPC::BI__builtin_altivec_vinsd_elt:17290case PPC::BI__builtin_altivec_vinsw_elt: {17291llvm::Type *ResultType = ConvertType(E->getType());17292Value *Op0 = EmitScalarExpr(E->getArg(0));17293Value *Op1 = EmitScalarExpr(E->getArg(1));17294Value *Op2 = EmitScalarExpr(E->getArg(2));1729517296bool IsUnaligned = (BuiltinID == PPC::BI__builtin_altivec_vinsw ||17297BuiltinID == PPC::BI__builtin_altivec_vinsd);1729817299bool Is32bit = (BuiltinID == PPC::BI__builtin_altivec_vinsw ||17300BuiltinID == PPC::BI__builtin_altivec_vinsw_elt);1730117302// The third argument must be a compile time constant.17303ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);17304assert(ArgCI &&17305"Third Arg to vinsw/vinsd intrinsic must be a constant integer!");1730617307// Valid value for the third argument is dependent on the input type and17308// builtin called.17309int ValidMaxValue = 0;17310if (IsUnaligned)17311ValidMaxValue = (Is32bit) ? 12 : 8;17312else17313ValidMaxValue = (Is32bit) ? 3 : 1;1731417315// Get value of third argument.17316int64_t ConstArg = ArgCI->getSExtValue();1731717318// Compose range checking error message.17319std::string RangeErrMsg = IsUnaligned ? "byte" : "element";17320RangeErrMsg += " number " + llvm::to_string(ConstArg);17321RangeErrMsg += " is outside of the valid range [0, ";17322RangeErrMsg += llvm::to_string(ValidMaxValue) + "]";1732317324// Issue error if third argument is not within the valid range.17325if (ConstArg < 0 || ConstArg > ValidMaxValue)17326CGM.Error(E->getExprLoc(), RangeErrMsg);1732717328// Input to vec_replace_elt is an element index, convert to byte index.17329if (!IsUnaligned) {17330ConstArg *= Is32bit ? 4 : 8;17331// Fix the constant according to endianess.17332if (getTarget().isLittleEndian())17333ConstArg = (Is32bit ? 12 : 8) - ConstArg;17334}1733517336ID = Is32bit ? Intrinsic::ppc_altivec_vinsw : Intrinsic::ppc_altivec_vinsd;17337Op2 = ConstantInt::getSigned(Int32Ty, ConstArg);17338// Casting input to vector int as per intrinsic definition.17339Op0 =17340Is32bit17341? Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4))17342: Builder.CreateBitCast(Op0,17343llvm::FixedVectorType::get(Int64Ty, 2));17344return Builder.CreateBitCast(17345Builder.CreateCall(CGM.getIntrinsic(ID), {Op0, Op1, Op2}), ResultType);17346}17347case PPC::BI__builtin_altivec_vpopcntb:17348case PPC::BI__builtin_altivec_vpopcnth:17349case PPC::BI__builtin_altivec_vpopcntw:17350case PPC::BI__builtin_altivec_vpopcntd: {17351llvm::Type *ResultType = ConvertType(E->getType());17352Value *X = EmitScalarExpr(E->getArg(0));17353llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);17354return Builder.CreateCall(F, X);17355}17356case PPC::BI__builtin_altivec_vadduqm:17357case PPC::BI__builtin_altivec_vsubuqm: {17358Value *Op0 = EmitScalarExpr(E->getArg(0));17359Value *Op1 = EmitScalarExpr(E->getArg(1));17360llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);17361Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int128Ty, 1));17362Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int128Ty, 1));17363if (BuiltinID == PPC::BI__builtin_altivec_vadduqm)17364return Builder.CreateAdd(Op0, Op1, "vadduqm");17365else17366return Builder.CreateSub(Op0, Op1, "vsubuqm");17367}17368case PPC::BI__builtin_altivec_vaddcuq_c:17369case PPC::BI__builtin_altivec_vsubcuq_c: {17370SmallVector<Value *, 2> Ops;17371Value *Op0 = EmitScalarExpr(E->getArg(0));17372Value *Op1 = EmitScalarExpr(E->getArg(1));17373llvm::Type *V1I128Ty = llvm::FixedVectorType::get(17374llvm::IntegerType::get(getLLVMContext(), 128), 1);17375Ops.push_back(Builder.CreateBitCast(Op0, V1I128Ty));17376Ops.push_back(Builder.CreateBitCast(Op1, V1I128Ty));17377ID = (BuiltinID == PPC::BI__builtin_altivec_vaddcuq_c)17378? Intrinsic::ppc_altivec_vaddcuq17379: Intrinsic::ppc_altivec_vsubcuq;17380return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, "");17381}17382case PPC::BI__builtin_altivec_vaddeuqm_c:17383case PPC::BI__builtin_altivec_vaddecuq_c:17384case PPC::BI__builtin_altivec_vsubeuqm_c:17385case PPC::BI__builtin_altivec_vsubecuq_c: {17386SmallVector<Value *, 3> Ops;17387Value *Op0 = EmitScalarExpr(E->getArg(0));17388Value *Op1 = EmitScalarExpr(E->getArg(1));17389Value *Op2 = EmitScalarExpr(E->getArg(2));17390llvm::Type *V1I128Ty = llvm::FixedVectorType::get(17391llvm::IntegerType::get(getLLVMContext(), 128), 1);17392Ops.push_back(Builder.CreateBitCast(Op0, V1I128Ty));17393Ops.push_back(Builder.CreateBitCast(Op1, V1I128Ty));17394Ops.push_back(Builder.CreateBitCast(Op2, V1I128Ty));17395switch (BuiltinID) {17396default:17397llvm_unreachable("Unsupported intrinsic!");17398case PPC::BI__builtin_altivec_vaddeuqm_c:17399ID = Intrinsic::ppc_altivec_vaddeuqm;17400break;17401case PPC::BI__builtin_altivec_vaddecuq_c:17402ID = Intrinsic::ppc_altivec_vaddecuq;17403break;17404case PPC::BI__builtin_altivec_vsubeuqm_c:17405ID = Intrinsic::ppc_altivec_vsubeuqm;17406break;17407case PPC::BI__builtin_altivec_vsubecuq_c:17408ID = Intrinsic::ppc_altivec_vsubecuq;17409break;17410}17411return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, "");17412}17413case PPC::BI__builtin_ppc_rldimi:17414case PPC::BI__builtin_ppc_rlwimi: {17415Value *Op0 = EmitScalarExpr(E->getArg(0));17416Value *Op1 = EmitScalarExpr(E->getArg(1));17417Value *Op2 = EmitScalarExpr(E->getArg(2));17418Value *Op3 = EmitScalarExpr(E->getArg(3));17419// rldimi is 64-bit instruction, expand the intrinsic before isel to17420// leverage peephole and avoid legalization efforts.17421if (BuiltinID == PPC::BI__builtin_ppc_rldimi &&17422!getTarget().getTriple().isPPC64()) {17423Function *F = CGM.getIntrinsic(Intrinsic::fshl, Op0->getType());17424Op2 = Builder.CreateZExt(Op2, Int64Ty);17425Value *Shift = Builder.CreateCall(F, {Op0, Op0, Op2});17426return Builder.CreateOr(Builder.CreateAnd(Shift, Op3),17427Builder.CreateAnd(Op1, Builder.CreateNot(Op3)));17428}17429return Builder.CreateCall(17430CGM.getIntrinsic(BuiltinID == PPC::BI__builtin_ppc_rldimi17431? Intrinsic::ppc_rldimi17432: Intrinsic::ppc_rlwimi),17433{Op0, Op1, Op2, Op3});17434}17435case PPC::BI__builtin_ppc_rlwnm: {17436Value *Op0 = EmitScalarExpr(E->getArg(0));17437Value *Op1 = EmitScalarExpr(E->getArg(1));17438Value *Op2 = EmitScalarExpr(E->getArg(2));17439return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_rlwnm),17440{Op0, Op1, Op2});17441}17442case PPC::BI__builtin_ppc_poppar4:17443case PPC::BI__builtin_ppc_poppar8: {17444Value *Op0 = EmitScalarExpr(E->getArg(0));17445llvm::Type *ArgType = Op0->getType();17446Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);17447Value *Tmp = Builder.CreateCall(F, Op0);1744817449llvm::Type *ResultType = ConvertType(E->getType());17450Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));17451if (Result->getType() != ResultType)17452Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,17453"cast");17454return Result;17455}17456case PPC::BI__builtin_ppc_cmpb: {17457Value *Op0 = EmitScalarExpr(E->getArg(0));17458Value *Op1 = EmitScalarExpr(E->getArg(1));17459if (getTarget().getTriple().isPPC64()) {17460Function *F =17461CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int64Ty, Int64Ty, Int64Ty});17462return Builder.CreateCall(F, {Op0, Op1}, "cmpb");17463}17464// For 32 bit, emit the code as below:17465// %conv = trunc i64 %a to i3217466// %conv1 = trunc i64 %b to i3217467// %shr = lshr i64 %a, 3217468// %conv2 = trunc i64 %shr to i3217469// %shr3 = lshr i64 %b, 3217470// %conv4 = trunc i64 %shr3 to i3217471// %0 = tail call i32 @llvm.ppc.cmpb32(i32 %conv, i32 %conv1)17472// %conv5 = zext i32 %0 to i6417473// %1 = tail call i32 @llvm.ppc.cmpb32(i32 %conv2, i32 %conv4)17474// %conv614 = zext i32 %1 to i6417475// %shl = shl nuw i64 %conv614, 3217476// %or = or i64 %shl, %conv517477// ret i64 %or17478Function *F =17479CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int32Ty, Int32Ty, Int32Ty});17480Value *ArgOneLo = Builder.CreateTrunc(Op0, Int32Ty);17481Value *ArgTwoLo = Builder.CreateTrunc(Op1, Int32Ty);17482Constant *ShiftAmt = ConstantInt::get(Int64Ty, 32);17483Value *ArgOneHi =17484Builder.CreateTrunc(Builder.CreateLShr(Op0, ShiftAmt), Int32Ty);17485Value *ArgTwoHi =17486Builder.CreateTrunc(Builder.CreateLShr(Op1, ShiftAmt), Int32Ty);17487Value *ResLo = Builder.CreateZExt(17488Builder.CreateCall(F, {ArgOneLo, ArgTwoLo}, "cmpb"), Int64Ty);17489Value *ResHiShift = Builder.CreateZExt(17490Builder.CreateCall(F, {ArgOneHi, ArgTwoHi}, "cmpb"), Int64Ty);17491Value *ResHi = Builder.CreateShl(ResHiShift, ShiftAmt);17492return Builder.CreateOr(ResLo, ResHi);17493}17494// Copy sign17495case PPC::BI__builtin_vsx_xvcpsgnsp:17496case PPC::BI__builtin_vsx_xvcpsgndp: {17497llvm::Type *ResultType = ConvertType(E->getType());17498Value *X = EmitScalarExpr(E->getArg(0));17499Value *Y = EmitScalarExpr(E->getArg(1));17500ID = Intrinsic::copysign;17501llvm::Function *F = CGM.getIntrinsic(ID, ResultType);17502return Builder.CreateCall(F, {X, Y});17503}17504// Rounding/truncation17505case PPC::BI__builtin_vsx_xvrspip:17506case PPC::BI__builtin_vsx_xvrdpip:17507case PPC::BI__builtin_vsx_xvrdpim:17508case PPC::BI__builtin_vsx_xvrspim:17509case PPC::BI__builtin_vsx_xvrdpi:17510case PPC::BI__builtin_vsx_xvrspi:17511case PPC::BI__builtin_vsx_xvrdpic:17512case PPC::BI__builtin_vsx_xvrspic:17513case PPC::BI__builtin_vsx_xvrdpiz:17514case PPC::BI__builtin_vsx_xvrspiz: {17515llvm::Type *ResultType = ConvertType(E->getType());17516Value *X = EmitScalarExpr(E->getArg(0));17517if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim ||17518BuiltinID == PPC::BI__builtin_vsx_xvrspim)17519ID = Builder.getIsFPConstrained()17520? Intrinsic::experimental_constrained_floor17521: Intrinsic::floor;17522else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi ||17523BuiltinID == PPC::BI__builtin_vsx_xvrspi)17524ID = Builder.getIsFPConstrained()17525? Intrinsic::experimental_constrained_round17526: Intrinsic::round;17527else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||17528BuiltinID == PPC::BI__builtin_vsx_xvrspic)17529ID = Builder.getIsFPConstrained()17530? Intrinsic::experimental_constrained_rint17531: Intrinsic::rint;17532else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||17533BuiltinID == PPC::BI__builtin_vsx_xvrspip)17534ID = Builder.getIsFPConstrained()17535? Intrinsic::experimental_constrained_ceil17536: Intrinsic::ceil;17537else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz ||17538BuiltinID == PPC::BI__builtin_vsx_xvrspiz)17539ID = Builder.getIsFPConstrained()17540? Intrinsic::experimental_constrained_trunc17541: Intrinsic::trunc;17542llvm::Function *F = CGM.getIntrinsic(ID, ResultType);17543return Builder.getIsFPConstrained() ? Builder.CreateConstrainedFPCall(F, X)17544: Builder.CreateCall(F, X);17545}1754617547// Absolute value17548case PPC::BI__builtin_vsx_xvabsdp:17549case PPC::BI__builtin_vsx_xvabssp: {17550llvm::Type *ResultType = ConvertType(E->getType());17551Value *X = EmitScalarExpr(E->getArg(0));17552llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);17553return Builder.CreateCall(F, X);17554}1755517556// Fastmath by default17557case PPC::BI__builtin_ppc_recipdivf:17558case PPC::BI__builtin_ppc_recipdivd:17559case PPC::BI__builtin_ppc_rsqrtf:17560case PPC::BI__builtin_ppc_rsqrtd: {17561FastMathFlags FMF = Builder.getFastMathFlags();17562Builder.getFastMathFlags().setFast();17563llvm::Type *ResultType = ConvertType(E->getType());17564Value *X = EmitScalarExpr(E->getArg(0));1756517566if (BuiltinID == PPC::BI__builtin_ppc_recipdivf ||17567BuiltinID == PPC::BI__builtin_ppc_recipdivd) {17568Value *Y = EmitScalarExpr(E->getArg(1));17569Value *FDiv = Builder.CreateFDiv(X, Y, "recipdiv");17570Builder.getFastMathFlags() &= (FMF);17571return FDiv;17572}17573auto *One = ConstantFP::get(ResultType, 1.0);17574llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);17575Value *FDiv = Builder.CreateFDiv(One, Builder.CreateCall(F, X), "rsqrt");17576Builder.getFastMathFlags() &= (FMF);17577return FDiv;17578}17579case PPC::BI__builtin_ppc_alignx: {17580Value *Op0 = EmitScalarExpr(E->getArg(0));17581Value *Op1 = EmitScalarExpr(E->getArg(1));17582ConstantInt *AlignmentCI = cast<ConstantInt>(Op0);17583if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment))17584AlignmentCI = ConstantInt::get(AlignmentCI->getIntegerType(),17585llvm::Value::MaximumAlignment);1758617587emitAlignmentAssumption(Op1, E->getArg(1),17588/*The expr loc is sufficient.*/ SourceLocation(),17589AlignmentCI, nullptr);17590return Op1;17591}17592case PPC::BI__builtin_ppc_rdlam: {17593Value *Op0 = EmitScalarExpr(E->getArg(0));17594Value *Op1 = EmitScalarExpr(E->getArg(1));17595Value *Op2 = EmitScalarExpr(E->getArg(2));17596llvm::Type *Ty = Op0->getType();17597Value *ShiftAmt = Builder.CreateIntCast(Op1, Ty, false);17598Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty);17599Value *Rotate = Builder.CreateCall(F, {Op0, Op0, ShiftAmt});17600return Builder.CreateAnd(Rotate, Op2);17601}17602case PPC::BI__builtin_ppc_load2r: {17603Function *F = CGM.getIntrinsic(Intrinsic::ppc_load2r);17604Value *Op0 = EmitScalarExpr(E->getArg(0));17605Value *LoadIntrinsic = Builder.CreateCall(F, {Op0});17606return Builder.CreateTrunc(LoadIntrinsic, Int16Ty);17607}17608// FMA variations17609case PPC::BI__builtin_ppc_fnmsub:17610case PPC::BI__builtin_ppc_fnmsubs:17611case PPC::BI__builtin_vsx_xvmaddadp:17612case PPC::BI__builtin_vsx_xvmaddasp:17613case PPC::BI__builtin_vsx_xvnmaddadp:17614case PPC::BI__builtin_vsx_xvnmaddasp:17615case PPC::BI__builtin_vsx_xvmsubadp:17616case PPC::BI__builtin_vsx_xvmsubasp:17617case PPC::BI__builtin_vsx_xvnmsubadp:17618case PPC::BI__builtin_vsx_xvnmsubasp: {17619llvm::Type *ResultType = ConvertType(E->getType());17620Value *X = EmitScalarExpr(E->getArg(0));17621Value *Y = EmitScalarExpr(E->getArg(1));17622Value *Z = EmitScalarExpr(E->getArg(2));17623llvm::Function *F;17624if (Builder.getIsFPConstrained())17625F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);17626else17627F = CGM.getIntrinsic(Intrinsic::fma, ResultType);17628switch (BuiltinID) {17629case PPC::BI__builtin_vsx_xvmaddadp:17630case PPC::BI__builtin_vsx_xvmaddasp:17631if (Builder.getIsFPConstrained())17632return Builder.CreateConstrainedFPCall(F, {X, Y, Z});17633else17634return Builder.CreateCall(F, {X, Y, Z});17635case PPC::BI__builtin_vsx_xvnmaddadp:17636case PPC::BI__builtin_vsx_xvnmaddasp:17637if (Builder.getIsFPConstrained())17638return Builder.CreateFNeg(17639Builder.CreateConstrainedFPCall(F, {X, Y, Z}), "neg");17640else17641return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg");17642case PPC::BI__builtin_vsx_xvmsubadp:17643case PPC::BI__builtin_vsx_xvmsubasp:17644if (Builder.getIsFPConstrained())17645return Builder.CreateConstrainedFPCall(17646F, {X, Y, Builder.CreateFNeg(Z, "neg")});17647else17648return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});17649case PPC::BI__builtin_ppc_fnmsub:17650case PPC::BI__builtin_ppc_fnmsubs:17651case PPC::BI__builtin_vsx_xvnmsubadp:17652case PPC::BI__builtin_vsx_xvnmsubasp:17653if (Builder.getIsFPConstrained())17654return Builder.CreateFNeg(17655Builder.CreateConstrainedFPCall(17656F, {X, Y, Builder.CreateFNeg(Z, "neg")}),17657"neg");17658else17659return Builder.CreateCall(17660CGM.getIntrinsic(Intrinsic::ppc_fnmsub, ResultType), {X, Y, Z});17661}17662llvm_unreachable("Unknown FMA operation");17663return nullptr; // Suppress no-return warning17664}1766517666case PPC::BI__builtin_vsx_insertword: {17667Value *Op0 = EmitScalarExpr(E->getArg(0));17668Value *Op1 = EmitScalarExpr(E->getArg(1));17669Value *Op2 = EmitScalarExpr(E->getArg(2));17670llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw);1767117672// Third argument is a compile time constant int. It must be clamped to17673// to the range [0, 12].17674ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);17675assert(ArgCI &&17676"Third arg to xxinsertw intrinsic must be constant integer");17677const int64_t MaxIndex = 12;17678int64_t Index = std::clamp(ArgCI->getSExtValue(), (int64_t)0, MaxIndex);1767917680// The builtin semantics don't exactly match the xxinsertw instructions17681// semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the17682// word from the first argument, and inserts it in the second argument. The17683// instruction extracts the word from its second input register and inserts17684// it into its first input register, so swap the first and second arguments.17685std::swap(Op0, Op1);1768617687// Need to cast the second argument from a vector of unsigned int to a17688// vector of long long.17689Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int64Ty, 2));1769017691if (getTarget().isLittleEndian()) {17692// Reverse the double words in the vector we will extract from.17693Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));17694Op0 = Builder.CreateShuffleVector(Op0, Op0, ArrayRef<int>{1, 0});1769517696// Reverse the index.17697Index = MaxIndex - Index;17698}1769917700// Intrinsic expects the first arg to be a vector of int.17701Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4));17702Op2 = ConstantInt::getSigned(Int32Ty, Index);17703return Builder.CreateCall(F, {Op0, Op1, Op2});17704}1770517706case PPC::BI__builtin_vsx_extractuword: {17707Value *Op0 = EmitScalarExpr(E->getArg(0));17708Value *Op1 = EmitScalarExpr(E->getArg(1));17709llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw);1771017711// Intrinsic expects the first argument to be a vector of doublewords.17712Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));1771317714// The second argument is a compile time constant int that needs to17715// be clamped to the range [0, 12].17716ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op1);17717assert(ArgCI &&17718"Second Arg to xxextractuw intrinsic must be a constant integer!");17719const int64_t MaxIndex = 12;17720int64_t Index = std::clamp(ArgCI->getSExtValue(), (int64_t)0, MaxIndex);1772117722if (getTarget().isLittleEndian()) {17723// Reverse the index.17724Index = MaxIndex - Index;17725Op1 = ConstantInt::getSigned(Int32Ty, Index);1772617727// Emit the call, then reverse the double words of the results vector.17728Value *Call = Builder.CreateCall(F, {Op0, Op1});1772917730Value *ShuffleCall =17731Builder.CreateShuffleVector(Call, Call, ArrayRef<int>{1, 0});17732return ShuffleCall;17733} else {17734Op1 = ConstantInt::getSigned(Int32Ty, Index);17735return Builder.CreateCall(F, {Op0, Op1});17736}17737}1773817739case PPC::BI__builtin_vsx_xxpermdi: {17740Value *Op0 = EmitScalarExpr(E->getArg(0));17741Value *Op1 = EmitScalarExpr(E->getArg(1));17742Value *Op2 = EmitScalarExpr(E->getArg(2));17743ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);17744assert(ArgCI && "Third arg must be constant integer!");1774517746unsigned Index = ArgCI->getZExtValue();17747Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2));17748Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int64Ty, 2));1774917750// Account for endianness by treating this as just a shuffle. So we use the17751// same indices for both LE and BE in order to produce expected results in17752// both cases.17753int ElemIdx0 = (Index & 2) >> 1;17754int ElemIdx1 = 2 + (Index & 1);1775517756int ShuffleElts[2] = {ElemIdx0, ElemIdx1};17757Value *ShuffleCall = Builder.CreateShuffleVector(Op0, Op1, ShuffleElts);17758QualType BIRetType = E->getType();17759auto RetTy = ConvertType(BIRetType);17760return Builder.CreateBitCast(ShuffleCall, RetTy);17761}1776217763case PPC::BI__builtin_vsx_xxsldwi: {17764Value *Op0 = EmitScalarExpr(E->getArg(0));17765Value *Op1 = EmitScalarExpr(E->getArg(1));17766Value *Op2 = EmitScalarExpr(E->getArg(2));17767ConstantInt *ArgCI = dyn_cast<ConstantInt>(Op2);17768assert(ArgCI && "Third argument must be a compile time constant");17769unsigned Index = ArgCI->getZExtValue() & 0x3;17770Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4));17771Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int32Ty, 4));1777217773// Create a shuffle mask17774int ElemIdx0;17775int ElemIdx1;17776int ElemIdx2;17777int ElemIdx3;17778if (getTarget().isLittleEndian()) {17779// Little endian element N comes from element 8+N-Index of the17780// concatenated wide vector (of course, using modulo arithmetic on17781// the total number of elements).17782ElemIdx0 = (8 - Index) % 8;17783ElemIdx1 = (9 - Index) % 8;17784ElemIdx2 = (10 - Index) % 8;17785ElemIdx3 = (11 - Index) % 8;17786} else {17787// Big endian ElemIdx<N> = Index + N17788ElemIdx0 = Index;17789ElemIdx1 = Index + 1;17790ElemIdx2 = Index + 2;17791ElemIdx3 = Index + 3;17792}1779317794int ShuffleElts[4] = {ElemIdx0, ElemIdx1, ElemIdx2, ElemIdx3};17795Value *ShuffleCall = Builder.CreateShuffleVector(Op0, Op1, ShuffleElts);17796QualType BIRetType = E->getType();17797auto RetTy = ConvertType(BIRetType);17798return Builder.CreateBitCast(ShuffleCall, RetTy);17799}1780017801case PPC::BI__builtin_pack_vector_int128: {17802Value *Op0 = EmitScalarExpr(E->getArg(0));17803Value *Op1 = EmitScalarExpr(E->getArg(1));17804bool isLittleEndian = getTarget().isLittleEndian();17805Value *PoisonValue =17806llvm::PoisonValue::get(llvm::FixedVectorType::get(Op0->getType(), 2));17807Value *Res = Builder.CreateInsertElement(17808PoisonValue, Op0, (uint64_t)(isLittleEndian ? 1 : 0));17809Res = Builder.CreateInsertElement(Res, Op1,17810(uint64_t)(isLittleEndian ? 0 : 1));17811return Builder.CreateBitCast(Res, ConvertType(E->getType()));17812}1781317814case PPC::BI__builtin_unpack_vector_int128: {17815Value *Op0 = EmitScalarExpr(E->getArg(0));17816Value *Op1 = EmitScalarExpr(E->getArg(1));17817ConstantInt *Index = cast<ConstantInt>(Op1);17818Value *Unpacked = Builder.CreateBitCast(17819Op0, llvm::FixedVectorType::get(ConvertType(E->getType()), 2));1782017821if (getTarget().isLittleEndian())17822Index =17823ConstantInt::get(Index->getIntegerType(), 1 - Index->getZExtValue());1782417825return Builder.CreateExtractElement(Unpacked, Index);17826}1782717828case PPC::BI__builtin_ppc_sthcx: {17829llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_sthcx);17830Value *Op0 = EmitScalarExpr(E->getArg(0));17831Value *Op1 = Builder.CreateSExt(EmitScalarExpr(E->getArg(1)), Int32Ty);17832return Builder.CreateCall(F, {Op0, Op1});17833}1783417835// The PPC MMA builtins take a pointer to a __vector_quad as an argument.17836// Some of the MMA instructions accumulate their result into an existing17837// accumulator whereas the others generate a new accumulator. So we need to17838// use custom code generation to expand a builtin call with a pointer to a17839// load (if the corresponding instruction accumulates its result) followed by17840// the call to the intrinsic and a store of the result.17841#define CUSTOM_BUILTIN(Name, Intr, Types, Accumulate, Feature) \17842case PPC::BI__builtin_##Name:17843#include "clang/Basic/BuiltinsPPC.def"17844{17845SmallVector<Value *, 4> Ops;17846for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)17847if (E->getArg(i)->getType()->isArrayType())17848Ops.push_back(17849EmitArrayToPointerDecay(E->getArg(i)).emitRawPointer(*this));17850else17851Ops.push_back(EmitScalarExpr(E->getArg(i)));17852// The first argument of these two builtins is a pointer used to store their17853// result. However, the llvm intrinsics return their result in multiple17854// return values. So, here we emit code extracting these values from the17855// intrinsic results and storing them using that pointer.17856if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc ||17857BuiltinID == PPC::BI__builtin_vsx_disassemble_pair ||17858BuiltinID == PPC::BI__builtin_mma_disassemble_pair) {17859unsigned NumVecs = 2;17860auto Intrinsic = Intrinsic::ppc_vsx_disassemble_pair;17861if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc) {17862NumVecs = 4;17863Intrinsic = Intrinsic::ppc_mma_disassemble_acc;17864}17865llvm::Function *F = CGM.getIntrinsic(Intrinsic);17866Address Addr = EmitPointerWithAlignment(E->getArg(1));17867Value *Vec = Builder.CreateLoad(Addr);17868Value *Call = Builder.CreateCall(F, {Vec});17869llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, 16);17870Value *Ptr = Ops[0];17871for (unsigned i=0; i<NumVecs; i++) {17872Value *Vec = Builder.CreateExtractValue(Call, i);17873llvm::ConstantInt* Index = llvm::ConstantInt::get(IntTy, i);17874Value *GEP = Builder.CreateInBoundsGEP(VTy, Ptr, Index);17875Builder.CreateAlignedStore(Vec, GEP, MaybeAlign(16));17876}17877return Call;17878}17879if (BuiltinID == PPC::BI__builtin_vsx_build_pair ||17880BuiltinID == PPC::BI__builtin_mma_build_acc) {17881// Reverse the order of the operands for LE, so the17882// same builtin call can be used on both LE and BE17883// without the need for the programmer to swap operands.17884// The operands are reversed starting from the second argument,17885// the first operand is the pointer to the pair/accumulator17886// that is being built.17887if (getTarget().isLittleEndian())17888std::reverse(Ops.begin() + 1, Ops.end());17889}17890bool Accumulate;17891switch (BuiltinID) {17892#define CUSTOM_BUILTIN(Name, Intr, Types, Acc, Feature) \17893case PPC::BI__builtin_##Name: \17894ID = Intrinsic::ppc_##Intr; \17895Accumulate = Acc; \17896break;17897#include "clang/Basic/BuiltinsPPC.def"17898}17899if (BuiltinID == PPC::BI__builtin_vsx_lxvp ||17900BuiltinID == PPC::BI__builtin_vsx_stxvp ||17901BuiltinID == PPC::BI__builtin_mma_lxvp ||17902BuiltinID == PPC::BI__builtin_mma_stxvp) {17903if (BuiltinID == PPC::BI__builtin_vsx_lxvp ||17904BuiltinID == PPC::BI__builtin_mma_lxvp) {17905Ops[0] = Builder.CreateGEP(Int8Ty, Ops[1], Ops[0]);17906} else {17907Ops[1] = Builder.CreateGEP(Int8Ty, Ops[2], Ops[1]);17908}17909Ops.pop_back();17910llvm::Function *F = CGM.getIntrinsic(ID);17911return Builder.CreateCall(F, Ops, "");17912}17913SmallVector<Value*, 4> CallOps;17914if (Accumulate) {17915Address Addr = EmitPointerWithAlignment(E->getArg(0));17916Value *Acc = Builder.CreateLoad(Addr);17917CallOps.push_back(Acc);17918}17919for (unsigned i=1; i<Ops.size(); i++)17920CallOps.push_back(Ops[i]);17921llvm::Function *F = CGM.getIntrinsic(ID);17922Value *Call = Builder.CreateCall(F, CallOps);17923return Builder.CreateAlignedStore(Call, Ops[0], MaybeAlign(64));17924}1792517926case PPC::BI__builtin_ppc_compare_and_swap:17927case PPC::BI__builtin_ppc_compare_and_swaplp: {17928Address Addr = EmitPointerWithAlignment(E->getArg(0));17929Address OldValAddr = EmitPointerWithAlignment(E->getArg(1));17930Value *OldVal = Builder.CreateLoad(OldValAddr);17931QualType AtomicTy = E->getArg(0)->getType()->getPointeeType();17932LValue LV = MakeAddrLValue(Addr, AtomicTy);17933Value *Op2 = EmitScalarExpr(E->getArg(2));17934auto Pair = EmitAtomicCompareExchange(17935LV, RValue::get(OldVal), RValue::get(Op2), E->getExprLoc(),17936llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Monotonic, true);17937// Unlike c11's atomic_compare_exchange, according to17938// https://www.ibm.com/docs/en/xl-c-and-cpp-aix/16.1?topic=functions-compare-swap-compare-swaplp17939// > In either case, the contents of the memory location specified by addr17940// > are copied into the memory location specified by old_val_addr.17941// But it hasn't specified storing to OldValAddr is atomic or not and17942// which order to use. Now following XL's codegen, treat it as a normal17943// store.17944Value *LoadedVal = Pair.first.getScalarVal();17945Builder.CreateStore(LoadedVal, OldValAddr);17946return Builder.CreateZExt(Pair.second, Builder.getInt32Ty());17947}17948case PPC::BI__builtin_ppc_fetch_and_add:17949case PPC::BI__builtin_ppc_fetch_and_addlp: {17950return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,17951llvm::AtomicOrdering::Monotonic);17952}17953case PPC::BI__builtin_ppc_fetch_and_and:17954case PPC::BI__builtin_ppc_fetch_and_andlp: {17955return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,17956llvm::AtomicOrdering::Monotonic);17957}1795817959case PPC::BI__builtin_ppc_fetch_and_or:17960case PPC::BI__builtin_ppc_fetch_and_orlp: {17961return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,17962llvm::AtomicOrdering::Monotonic);17963}17964case PPC::BI__builtin_ppc_fetch_and_swap:17965case PPC::BI__builtin_ppc_fetch_and_swaplp: {17966return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,17967llvm::AtomicOrdering::Monotonic);17968}17969case PPC::BI__builtin_ppc_ldarx:17970case PPC::BI__builtin_ppc_lwarx:17971case PPC::BI__builtin_ppc_lharx:17972case PPC::BI__builtin_ppc_lbarx:17973return emitPPCLoadReserveIntrinsic(*this, BuiltinID, E);17974case PPC::BI__builtin_ppc_mfspr: {17975Value *Op0 = EmitScalarExpr(E->getArg(0));17976llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 3217977? Int32Ty17978: Int64Ty;17979Function *F = CGM.getIntrinsic(Intrinsic::ppc_mfspr, RetType);17980return Builder.CreateCall(F, {Op0});17981}17982case PPC::BI__builtin_ppc_mtspr: {17983Value *Op0 = EmitScalarExpr(E->getArg(0));17984Value *Op1 = EmitScalarExpr(E->getArg(1));17985llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 3217986? Int32Ty17987: Int64Ty;17988Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtspr, RetType);17989return Builder.CreateCall(F, {Op0, Op1});17990}17991case PPC::BI__builtin_ppc_popcntb: {17992Value *ArgValue = EmitScalarExpr(E->getArg(0));17993llvm::Type *ArgType = ArgValue->getType();17994Function *F = CGM.getIntrinsic(Intrinsic::ppc_popcntb, {ArgType, ArgType});17995return Builder.CreateCall(F, {ArgValue}, "popcntb");17996}17997case PPC::BI__builtin_ppc_mtfsf: {17998// The builtin takes a uint32 that needs to be cast to an17999// f64 to be passed to the intrinsic.18000Value *Op0 = EmitScalarExpr(E->getArg(0));18001Value *Op1 = EmitScalarExpr(E->getArg(1));18002Value *Cast = Builder.CreateUIToFP(Op1, DoubleTy);18003llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtfsf);18004return Builder.CreateCall(F, {Op0, Cast}, "");18005}1800618007case PPC::BI__builtin_ppc_swdiv_nochk:18008case PPC::BI__builtin_ppc_swdivs_nochk: {18009Value *Op0 = EmitScalarExpr(E->getArg(0));18010Value *Op1 = EmitScalarExpr(E->getArg(1));18011FastMathFlags FMF = Builder.getFastMathFlags();18012Builder.getFastMathFlags().setFast();18013Value *FDiv = Builder.CreateFDiv(Op0, Op1, "swdiv_nochk");18014Builder.getFastMathFlags() &= (FMF);18015return FDiv;18016}18017case PPC::BI__builtin_ppc_fric:18018return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(18019*this, E, Intrinsic::rint,18020Intrinsic::experimental_constrained_rint))18021.getScalarVal();18022case PPC::BI__builtin_ppc_frim:18023case PPC::BI__builtin_ppc_frims:18024return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(18025*this, E, Intrinsic::floor,18026Intrinsic::experimental_constrained_floor))18027.getScalarVal();18028case PPC::BI__builtin_ppc_frin:18029case PPC::BI__builtin_ppc_frins:18030return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(18031*this, E, Intrinsic::round,18032Intrinsic::experimental_constrained_round))18033.getScalarVal();18034case PPC::BI__builtin_ppc_frip:18035case PPC::BI__builtin_ppc_frips:18036return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(18037*this, E, Intrinsic::ceil,18038Intrinsic::experimental_constrained_ceil))18039.getScalarVal();18040case PPC::BI__builtin_ppc_friz:18041case PPC::BI__builtin_ppc_frizs:18042return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(18043*this, E, Intrinsic::trunc,18044Intrinsic::experimental_constrained_trunc))18045.getScalarVal();18046case PPC::BI__builtin_ppc_fsqrt:18047case PPC::BI__builtin_ppc_fsqrts:18048return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(18049*this, E, Intrinsic::sqrt,18050Intrinsic::experimental_constrained_sqrt))18051.getScalarVal();18052case PPC::BI__builtin_ppc_test_data_class: {18053Value *Op0 = EmitScalarExpr(E->getArg(0));18054Value *Op1 = EmitScalarExpr(E->getArg(1));18055return Builder.CreateCall(18056CGM.getIntrinsic(Intrinsic::ppc_test_data_class, Op0->getType()),18057{Op0, Op1}, "test_data_class");18058}18059case PPC::BI__builtin_ppc_maxfe: {18060Value *Op0 = EmitScalarExpr(E->getArg(0));18061Value *Op1 = EmitScalarExpr(E->getArg(1));18062Value *Op2 = EmitScalarExpr(E->getArg(2));18063Value *Op3 = EmitScalarExpr(E->getArg(3));18064return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfe),18065{Op0, Op1, Op2, Op3});18066}18067case PPC::BI__builtin_ppc_maxfl: {18068Value *Op0 = EmitScalarExpr(E->getArg(0));18069Value *Op1 = EmitScalarExpr(E->getArg(1));18070Value *Op2 = EmitScalarExpr(E->getArg(2));18071Value *Op3 = EmitScalarExpr(E->getArg(3));18072return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfl),18073{Op0, Op1, Op2, Op3});18074}18075case PPC::BI__builtin_ppc_maxfs: {18076Value *Op0 = EmitScalarExpr(E->getArg(0));18077Value *Op1 = EmitScalarExpr(E->getArg(1));18078Value *Op2 = EmitScalarExpr(E->getArg(2));18079Value *Op3 = EmitScalarExpr(E->getArg(3));18080return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfs),18081{Op0, Op1, Op2, Op3});18082}18083case PPC::BI__builtin_ppc_minfe: {18084Value *Op0 = EmitScalarExpr(E->getArg(0));18085Value *Op1 = EmitScalarExpr(E->getArg(1));18086Value *Op2 = EmitScalarExpr(E->getArg(2));18087Value *Op3 = EmitScalarExpr(E->getArg(3));18088return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfe),18089{Op0, Op1, Op2, Op3});18090}18091case PPC::BI__builtin_ppc_minfl: {18092Value *Op0 = EmitScalarExpr(E->getArg(0));18093Value *Op1 = EmitScalarExpr(E->getArg(1));18094Value *Op2 = EmitScalarExpr(E->getArg(2));18095Value *Op3 = EmitScalarExpr(E->getArg(3));18096return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfl),18097{Op0, Op1, Op2, Op3});18098}18099case PPC::BI__builtin_ppc_minfs: {18100Value *Op0 = EmitScalarExpr(E->getArg(0));18101Value *Op1 = EmitScalarExpr(E->getArg(1));18102Value *Op2 = EmitScalarExpr(E->getArg(2));18103Value *Op3 = EmitScalarExpr(E->getArg(3));18104return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfs),18105{Op0, Op1, Op2, Op3});18106}18107case PPC::BI__builtin_ppc_swdiv:18108case PPC::BI__builtin_ppc_swdivs: {18109Value *Op0 = EmitScalarExpr(E->getArg(0));18110Value *Op1 = EmitScalarExpr(E->getArg(1));18111return Builder.CreateFDiv(Op0, Op1, "swdiv");18112}18113case PPC::BI__builtin_ppc_set_fpscr_rn:18114return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_setrnd),18115{EmitScalarExpr(E->getArg(0))});18116case PPC::BI__builtin_ppc_mffs:18117return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_readflm));18118}18119}1812018121namespace {18122// If \p E is not null pointer, insert address space cast to match return18123// type of \p E if necessary.18124Value *EmitAMDGPUDispatchPtr(CodeGenFunction &CGF,18125const CallExpr *E = nullptr) {18126auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_dispatch_ptr);18127auto *Call = CGF.Builder.CreateCall(F);18128Call->addRetAttr(18129Attribute::getWithDereferenceableBytes(Call->getContext(), 64));18130Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(4)));18131if (!E)18132return Call;18133QualType BuiltinRetType = E->getType();18134auto *RetTy = cast<llvm::PointerType>(CGF.ConvertType(BuiltinRetType));18135if (RetTy == Call->getType())18136return Call;18137return CGF.Builder.CreateAddrSpaceCast(Call, RetTy);18138}1813918140Value *EmitAMDGPUImplicitArgPtr(CodeGenFunction &CGF) {18141auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_implicitarg_ptr);18142auto *Call = CGF.Builder.CreateCall(F);18143Call->addRetAttr(18144Attribute::getWithDereferenceableBytes(Call->getContext(), 256));18145Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(8)));18146return Call;18147}1814818149// \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.18150/// Emit code based on Code Object ABI version.18151/// COV_4 : Emit code to use dispatch ptr18152/// COV_5+ : Emit code to use implicitarg ptr18153/// COV_NONE : Emit code to load a global variable "__oclc_ABI_version"18154/// and use its value for COV_4 or COV_5+ approach. It is used for18155/// compiling device libraries in an ABI-agnostic way.18156///18157/// Note: "__oclc_ABI_version" is supposed to be emitted and intialized by18158/// clang during compilation of user code.18159Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) {18160llvm::LoadInst *LD;1816118162auto Cov = CGF.getTarget().getTargetOpts().CodeObjectVersion;1816318164if (Cov == CodeObjectVersionKind::COV_None) {18165StringRef Name = "__oclc_ABI_version";18166auto *ABIVersionC = CGF.CGM.getModule().getNamedGlobal(Name);18167if (!ABIVersionC)18168ABIVersionC = new llvm::GlobalVariable(18169CGF.CGM.getModule(), CGF.Int32Ty, false,18170llvm::GlobalValue::ExternalLinkage, nullptr, Name, nullptr,18171llvm::GlobalVariable::NotThreadLocal,18172CGF.CGM.getContext().getTargetAddressSpace(LangAS::opencl_constant));1817318174// This load will be eliminated by the IPSCCP because it is constant18175// weak_odr without externally_initialized. Either changing it to weak or18176// adding externally_initialized will keep the load.18177Value *ABIVersion = CGF.Builder.CreateAlignedLoad(CGF.Int32Ty, ABIVersionC,18178CGF.CGM.getIntAlign());1817918180Value *IsCOV5 = CGF.Builder.CreateICmpSGE(18181ABIVersion,18182llvm::ConstantInt::get(CGF.Int32Ty, CodeObjectVersionKind::COV_5));1818318184// Indexing the implicit kernarg segment.18185Value *ImplicitGEP = CGF.Builder.CreateConstGEP1_32(18186CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2);1818718188// Indexing the HSA kernel_dispatch_packet struct.18189Value *DispatchGEP = CGF.Builder.CreateConstGEP1_32(18190CGF.Int8Ty, EmitAMDGPUDispatchPtr(CGF), 4 + Index * 2);1819118192auto Result = CGF.Builder.CreateSelect(IsCOV5, ImplicitGEP, DispatchGEP);18193LD = CGF.Builder.CreateLoad(18194Address(Result, CGF.Int16Ty, CharUnits::fromQuantity(2)));18195} else {18196Value *GEP = nullptr;18197if (Cov >= CodeObjectVersionKind::COV_5) {18198// Indexing the implicit kernarg segment.18199GEP = CGF.Builder.CreateConstGEP1_32(18200CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2);18201} else {18202// Indexing the HSA kernel_dispatch_packet struct.18203GEP = CGF.Builder.CreateConstGEP1_32(18204CGF.Int8Ty, EmitAMDGPUDispatchPtr(CGF), 4 + Index * 2);18205}18206LD = CGF.Builder.CreateLoad(18207Address(GEP, CGF.Int16Ty, CharUnits::fromQuantity(2)));18208}1820918210llvm::MDBuilder MDHelper(CGF.getLLVMContext());18211llvm::MDNode *RNode = MDHelper.createRange(APInt(16, 1),18212APInt(16, CGF.getTarget().getMaxOpenCLWorkGroupSize() + 1));18213LD->setMetadata(llvm::LLVMContext::MD_range, RNode);18214LD->setMetadata(llvm::LLVMContext::MD_noundef,18215llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt));18216LD->setMetadata(llvm::LLVMContext::MD_invariant_load,18217llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt));18218return LD;18219}1822018221// \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.18222Value *EmitAMDGPUGridSize(CodeGenFunction &CGF, unsigned Index) {18223const unsigned XOffset = 12;18224auto *DP = EmitAMDGPUDispatchPtr(CGF);18225// Indexing the HSA kernel_dispatch_packet struct.18226auto *Offset = llvm::ConstantInt::get(CGF.Int32Ty, XOffset + Index * 4);18227auto *GEP = CGF.Builder.CreateGEP(CGF.Int8Ty, DP, Offset);18228auto *LD = CGF.Builder.CreateLoad(18229Address(GEP, CGF.Int32Ty, CharUnits::fromQuantity(4)));18230LD->setMetadata(llvm::LLVMContext::MD_invariant_load,18231llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt));18232return LD;18233}18234} // namespace1823518236// For processing memory ordering and memory scope arguments of various18237// amdgcn builtins.18238// \p Order takes a C++11 comptabile memory-ordering specifier and converts18239// it into LLVM's memory ordering specifier using atomic C ABI, and writes18240// to \p AO. \p Scope takes a const char * and converts it into AMDGCN18241// specific SyncScopeID and writes it to \p SSID.18242void CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope,18243llvm::AtomicOrdering &AO,18244llvm::SyncScope::ID &SSID) {18245int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();1824618247// Map C11/C++11 memory ordering to LLVM memory ordering18248assert(llvm::isValidAtomicOrderingCABI(ord));18249switch (static_cast<llvm::AtomicOrderingCABI>(ord)) {18250case llvm::AtomicOrderingCABI::acquire:18251case llvm::AtomicOrderingCABI::consume:18252AO = llvm::AtomicOrdering::Acquire;18253break;18254case llvm::AtomicOrderingCABI::release:18255AO = llvm::AtomicOrdering::Release;18256break;18257case llvm::AtomicOrderingCABI::acq_rel:18258AO = llvm::AtomicOrdering::AcquireRelease;18259break;18260case llvm::AtomicOrderingCABI::seq_cst:18261AO = llvm::AtomicOrdering::SequentiallyConsistent;18262break;18263case llvm::AtomicOrderingCABI::relaxed:18264AO = llvm::AtomicOrdering::Monotonic;18265break;18266}1826718268// Some of the atomic builtins take the scope as a string name.18269StringRef scp;18270if (llvm::getConstantStringInfo(Scope, scp)) {18271SSID = getLLVMContext().getOrInsertSyncScopeID(scp);18272return;18273}1827418275// Older builtins had an enum argument for the memory scope.18276int scope = cast<llvm::ConstantInt>(Scope)->getZExtValue();18277switch (scope) {18278case 0: // __MEMORY_SCOPE_SYSTEM18279SSID = llvm::SyncScope::System;18280break;18281case 1: // __MEMORY_SCOPE_DEVICE18282SSID = getLLVMContext().getOrInsertSyncScopeID("agent");18283break;18284case 2: // __MEMORY_SCOPE_WRKGRP18285SSID = getLLVMContext().getOrInsertSyncScopeID("workgroup");18286break;18287case 3: // __MEMORY_SCOPE_WVFRNT18288SSID = getLLVMContext().getOrInsertSyncScopeID("wavefront");18289break;18290case 4: // __MEMORY_SCOPE_SINGLE18291SSID = llvm::SyncScope::SingleThread;18292break;18293default:18294SSID = llvm::SyncScope::System;18295break;18296}18297}1829818299llvm::Value *CodeGenFunction::EmitScalarOrConstFoldImmArg(unsigned ICEArguments,18300unsigned Idx,18301const CallExpr *E) {18302llvm::Value *Arg = nullptr;18303if ((ICEArguments & (1 << Idx)) == 0) {18304Arg = EmitScalarExpr(E->getArg(Idx));18305} else {18306// If this is required to be a constant, constant fold it so that we18307// know that the generated intrinsic gets a ConstantInt.18308std::optional<llvm::APSInt> Result =18309E->getArg(Idx)->getIntegerConstantExpr(getContext());18310assert(Result && "Expected argument to be a constant");18311Arg = llvm::ConstantInt::get(getLLVMContext(), *Result);18312}18313return Arg;18314}1831518316Intrinsic::ID getDotProductIntrinsic(QualType QT, int elementCount) {18317if (QT->hasFloatingRepresentation()) {18318switch (elementCount) {18319case 2:18320return Intrinsic::dx_dot2;18321case 3:18322return Intrinsic::dx_dot3;18323case 4:18324return Intrinsic::dx_dot4;18325}18326}18327if (QT->hasSignedIntegerRepresentation())18328return Intrinsic::dx_sdot;1832918330assert(QT->hasUnsignedIntegerRepresentation());18331return Intrinsic::dx_udot;18332}1833318334Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,18335const CallExpr *E) {18336if (!getLangOpts().HLSL)18337return nullptr;1833818339switch (BuiltinID) {18340case Builtin::BI__builtin_hlsl_elementwise_all: {18341Value *Op0 = EmitScalarExpr(E->getArg(0));18342return Builder.CreateIntrinsic(18343/*ReturnType=*/llvm::Type::getInt1Ty(getLLVMContext()),18344CGM.getHLSLRuntime().getAllIntrinsic(), ArrayRef<Value *>{Op0}, nullptr,18345"hlsl.all");18346}18347case Builtin::BI__builtin_hlsl_elementwise_any: {18348Value *Op0 = EmitScalarExpr(E->getArg(0));18349return Builder.CreateIntrinsic(18350/*ReturnType=*/llvm::Type::getInt1Ty(getLLVMContext()),18351CGM.getHLSLRuntime().getAnyIntrinsic(), ArrayRef<Value *>{Op0}, nullptr,18352"hlsl.any");18353}18354case Builtin::BI__builtin_hlsl_elementwise_clamp: {18355Value *OpX = EmitScalarExpr(E->getArg(0));18356Value *OpMin = EmitScalarExpr(E->getArg(1));18357Value *OpMax = EmitScalarExpr(E->getArg(2));1835818359QualType Ty = E->getArg(0)->getType();18360bool IsUnsigned = false;18361if (auto *VecTy = Ty->getAs<VectorType>())18362Ty = VecTy->getElementType();18363IsUnsigned = Ty->isUnsignedIntegerType();18364return Builder.CreateIntrinsic(18365/*ReturnType=*/OpX->getType(),18366IsUnsigned ? Intrinsic::dx_uclamp : Intrinsic::dx_clamp,18367ArrayRef<Value *>{OpX, OpMin, OpMax}, nullptr, "dx.clamp");18368}18369case Builtin::BI__builtin_hlsl_dot: {18370Value *Op0 = EmitScalarExpr(E->getArg(0));18371Value *Op1 = EmitScalarExpr(E->getArg(1));18372llvm::Type *T0 = Op0->getType();18373llvm::Type *T1 = Op1->getType();18374if (!T0->isVectorTy() && !T1->isVectorTy()) {18375if (T0->isFloatingPointTy())18376return Builder.CreateFMul(Op0, Op1, "dx.dot");1837718378if (T0->isIntegerTy())18379return Builder.CreateMul(Op0, Op1, "dx.dot");1838018381// Bools should have been promoted18382llvm_unreachable(18383"Scalar dot product is only supported on ints and floats.");18384}18385// A VectorSplat should have happened18386assert(T0->isVectorTy() && T1->isVectorTy() &&18387"Dot product of vector and scalar is not supported.");1838818389// A vector sext or sitofp should have happened18390assert(T0->getScalarType() == T1->getScalarType() &&18391"Dot product of vectors need the same element types.");1839218393auto *VecTy0 = E->getArg(0)->getType()->getAs<VectorType>();18394[[maybe_unused]] auto *VecTy1 =18395E->getArg(1)->getType()->getAs<VectorType>();18396// A HLSLVectorTruncation should have happend18397assert(VecTy0->getNumElements() == VecTy1->getNumElements() &&18398"Dot product requires vectors to be of the same size.");1839918400return Builder.CreateIntrinsic(18401/*ReturnType=*/T0->getScalarType(),18402getDotProductIntrinsic(E->getArg(0)->getType(),18403VecTy0->getNumElements()),18404ArrayRef<Value *>{Op0, Op1}, nullptr, "dx.dot");18405} break;18406case Builtin::BI__builtin_hlsl_lerp: {18407Value *X = EmitScalarExpr(E->getArg(0));18408Value *Y = EmitScalarExpr(E->getArg(1));18409Value *S = EmitScalarExpr(E->getArg(2));18410if (!E->getArg(0)->getType()->hasFloatingRepresentation())18411llvm_unreachable("lerp operand must have a float representation");18412return Builder.CreateIntrinsic(18413/*ReturnType=*/X->getType(), CGM.getHLSLRuntime().getLerpIntrinsic(),18414ArrayRef<Value *>{X, Y, S}, nullptr, "hlsl.lerp");18415}18416case Builtin::BI__builtin_hlsl_elementwise_frac: {18417Value *Op0 = EmitScalarExpr(E->getArg(0));18418if (!E->getArg(0)->getType()->hasFloatingRepresentation())18419llvm_unreachable("frac operand must have a float representation");18420return Builder.CreateIntrinsic(18421/*ReturnType=*/Op0->getType(), Intrinsic::dx_frac,18422ArrayRef<Value *>{Op0}, nullptr, "dx.frac");18423}18424case Builtin::BI__builtin_hlsl_elementwise_isinf: {18425Value *Op0 = EmitScalarExpr(E->getArg(0));18426llvm::Type *Xty = Op0->getType();18427llvm::Type *retType = llvm::Type::getInt1Ty(this->getLLVMContext());18428if (Xty->isVectorTy()) {18429auto *XVecTy = E->getArg(0)->getType()->getAs<VectorType>();18430retType = llvm::VectorType::get(18431retType, ElementCount::getFixed(XVecTy->getNumElements()));18432}18433if (!E->getArg(0)->getType()->hasFloatingRepresentation())18434llvm_unreachable("isinf operand must have a float representation");18435return Builder.CreateIntrinsic(retType, Intrinsic::dx_isinf,18436ArrayRef<Value *>{Op0}, nullptr, "dx.isinf");18437}18438case Builtin::BI__builtin_hlsl_mad: {18439Value *M = EmitScalarExpr(E->getArg(0));18440Value *A = EmitScalarExpr(E->getArg(1));18441Value *B = EmitScalarExpr(E->getArg(2));18442if (E->getArg(0)->getType()->hasFloatingRepresentation())18443return Builder.CreateIntrinsic(18444/*ReturnType*/ M->getType(), Intrinsic::fmuladd,18445ArrayRef<Value *>{M, A, B}, nullptr, "hlsl.fmad");1844618447if (E->getArg(0)->getType()->hasSignedIntegerRepresentation()) {18448if (CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil)18449return Builder.CreateIntrinsic(18450/*ReturnType*/ M->getType(), Intrinsic::dx_imad,18451ArrayRef<Value *>{M, A, B}, nullptr, "dx.imad");1845218453Value *Mul = Builder.CreateNSWMul(M, A);18454return Builder.CreateNSWAdd(Mul, B);18455}18456assert(E->getArg(0)->getType()->hasUnsignedIntegerRepresentation());18457if (CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil)18458return Builder.CreateIntrinsic(18459/*ReturnType=*/M->getType(), Intrinsic::dx_umad,18460ArrayRef<Value *>{M, A, B}, nullptr, "dx.umad");1846118462Value *Mul = Builder.CreateNUWMul(M, A);18463return Builder.CreateNUWAdd(Mul, B);18464}18465case Builtin::BI__builtin_hlsl_elementwise_rcp: {18466Value *Op0 = EmitScalarExpr(E->getArg(0));18467if (!E->getArg(0)->getType()->hasFloatingRepresentation())18468llvm_unreachable("rcp operand must have a float representation");18469llvm::Type *Ty = Op0->getType();18470llvm::Type *EltTy = Ty->getScalarType();18471Constant *One = Ty->isVectorTy()18472? ConstantVector::getSplat(18473ElementCount::getFixed(18474cast<FixedVectorType>(Ty)->getNumElements()),18475ConstantFP::get(EltTy, 1.0))18476: ConstantFP::get(EltTy, 1.0);18477return Builder.CreateFDiv(One, Op0, "hlsl.rcp");18478}18479case Builtin::BI__builtin_hlsl_elementwise_rsqrt: {18480Value *Op0 = EmitScalarExpr(E->getArg(0));18481if (!E->getArg(0)->getType()->hasFloatingRepresentation())18482llvm_unreachable("rsqrt operand must have a float representation");18483return Builder.CreateIntrinsic(18484/*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getRsqrtIntrinsic(),18485ArrayRef<Value *>{Op0}, nullptr, "hlsl.rsqrt");18486}18487case Builtin::BI__builtin_hlsl_wave_get_lane_index: {18488return EmitRuntimeCall(CGM.CreateRuntimeFunction(18489llvm::FunctionType::get(IntTy, {}, false), "__hlsl_wave_get_lane_index",18490{}, false, true));18491}18492}18493return nullptr;18494}1849518496void CodeGenFunction::AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst,18497const CallExpr *E) {18498constexpr const char *Tag = "amdgpu-as";1849918500LLVMContext &Ctx = Inst->getContext();18501SmallVector<MMRAMetadata::TagT, 3> MMRAs;18502for (unsigned K = 2; K < E->getNumArgs(); ++K) {18503llvm::Value *V = EmitScalarExpr(E->getArg(K));18504StringRef AS;18505if (llvm::getConstantStringInfo(V, AS)) {18506MMRAs.push_back({Tag, AS});18507// TODO: Delete the resulting unused constant?18508continue;18509}18510CGM.Error(E->getExprLoc(),18511"expected an address space name as a string literal");18512}1851318514llvm::sort(MMRAs);18515MMRAs.erase(llvm::unique(MMRAs), MMRAs.end());18516Inst->setMetadata(LLVMContext::MD_mmra, MMRAMetadata::getMD(Ctx, MMRAs));18517}1851818519Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,18520const CallExpr *E) {18521llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;18522llvm::SyncScope::ID SSID;18523switch (BuiltinID) {18524case AMDGPU::BI__builtin_amdgcn_div_scale:18525case AMDGPU::BI__builtin_amdgcn_div_scalef: {18526// Translate from the intrinsics's struct return to the builtin's out18527// argument.1852818529Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));1853018531llvm::Value *X = EmitScalarExpr(E->getArg(0));18532llvm::Value *Y = EmitScalarExpr(E->getArg(1));18533llvm::Value *Z = EmitScalarExpr(E->getArg(2));1853418535llvm::Function *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,18536X->getType());1853718538llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});1853918540llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);18541llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);1854218543llvm::Type *RealFlagType = FlagOutPtr.getElementType();1854418545llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);18546Builder.CreateStore(FlagExt, FlagOutPtr);18547return Result;18548}18549case AMDGPU::BI__builtin_amdgcn_div_fmas:18550case AMDGPU::BI__builtin_amdgcn_div_fmasf: {18551llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));18552llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));18553llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));18554llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));1855518556llvm::Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,18557Src0->getType());18558llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);18559return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});18560}1856118562case AMDGPU::BI__builtin_amdgcn_ds_swizzle:18563return emitBuiltinWithOneOverloadedType<2>(*this, E,18564Intrinsic::amdgcn_ds_swizzle);18565case AMDGPU::BI__builtin_amdgcn_mov_dpp8:18566return emitBuiltinWithOneOverloadedType<2>(*this, E,18567Intrinsic::amdgcn_mov_dpp8);18568case AMDGPU::BI__builtin_amdgcn_mov_dpp:18569case AMDGPU::BI__builtin_amdgcn_update_dpp: {18570llvm::SmallVector<llvm::Value *, 6> Args;18571// Find out if any arguments are required to be integer constant18572// expressions.18573unsigned ICEArguments = 0;18574ASTContext::GetBuiltinTypeError Error;18575getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);18576assert(Error == ASTContext::GE_None && "Should not codegen an error");18577for (unsigned I = 0; I != E->getNumArgs(); ++I) {18578Args.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, I, E));18579}18580assert(Args.size() == 5 || Args.size() == 6);18581if (Args.size() == 5)18582Args.insert(Args.begin(), llvm::PoisonValue::get(Args[0]->getType()));18583Function *F =18584CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType());18585return Builder.CreateCall(F, Args);18586}18587case AMDGPU::BI__builtin_amdgcn_permlane16:18588case AMDGPU::BI__builtin_amdgcn_permlanex16:18589return emitBuiltinWithOneOverloadedType<6>(18590*this, E,18591BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane1618592? Intrinsic::amdgcn_permlane1618593: Intrinsic::amdgcn_permlanex16);18594case AMDGPU::BI__builtin_amdgcn_permlane64:18595return emitBuiltinWithOneOverloadedType<1>(*this, E,18596Intrinsic::amdgcn_permlane64);18597case AMDGPU::BI__builtin_amdgcn_readlane:18598return emitBuiltinWithOneOverloadedType<2>(*this, E,18599Intrinsic::amdgcn_readlane);18600case AMDGPU::BI__builtin_amdgcn_readfirstlane:18601return emitBuiltinWithOneOverloadedType<1>(*this, E,18602Intrinsic::amdgcn_readfirstlane);18603case AMDGPU::BI__builtin_amdgcn_div_fixup:18604case AMDGPU::BI__builtin_amdgcn_div_fixupf:18605case AMDGPU::BI__builtin_amdgcn_div_fixuph:18606return emitBuiltinWithOneOverloadedType<3>(*this, E,18607Intrinsic::amdgcn_div_fixup);18608case AMDGPU::BI__builtin_amdgcn_trig_preop:18609case AMDGPU::BI__builtin_amdgcn_trig_preopf:18610return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);18611case AMDGPU::BI__builtin_amdgcn_rcp:18612case AMDGPU::BI__builtin_amdgcn_rcpf:18613case AMDGPU::BI__builtin_amdgcn_rcph:18614return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_rcp);18615case AMDGPU::BI__builtin_amdgcn_sqrt:18616case AMDGPU::BI__builtin_amdgcn_sqrtf:18617case AMDGPU::BI__builtin_amdgcn_sqrth:18618return emitBuiltinWithOneOverloadedType<1>(*this, E,18619Intrinsic::amdgcn_sqrt);18620case AMDGPU::BI__builtin_amdgcn_rsq:18621case AMDGPU::BI__builtin_amdgcn_rsqf:18622case AMDGPU::BI__builtin_amdgcn_rsqh:18623return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_rsq);18624case AMDGPU::BI__builtin_amdgcn_rsq_clamp:18625case AMDGPU::BI__builtin_amdgcn_rsq_clampf:18626return emitBuiltinWithOneOverloadedType<1>(*this, E,18627Intrinsic::amdgcn_rsq_clamp);18628case AMDGPU::BI__builtin_amdgcn_sinf:18629case AMDGPU::BI__builtin_amdgcn_sinh:18630return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_sin);18631case AMDGPU::BI__builtin_amdgcn_cosf:18632case AMDGPU::BI__builtin_amdgcn_cosh:18633return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_cos);18634case AMDGPU::BI__builtin_amdgcn_dispatch_ptr:18635return EmitAMDGPUDispatchPtr(*this, E);18636case AMDGPU::BI__builtin_amdgcn_logf:18637return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_log);18638case AMDGPU::BI__builtin_amdgcn_exp2f:18639return emitBuiltinWithOneOverloadedType<1>(*this, E,18640Intrinsic::amdgcn_exp2);18641case AMDGPU::BI__builtin_amdgcn_log_clampf:18642return emitBuiltinWithOneOverloadedType<1>(*this, E,18643Intrinsic::amdgcn_log_clamp);18644case AMDGPU::BI__builtin_amdgcn_ldexp:18645case AMDGPU::BI__builtin_amdgcn_ldexpf: {18646llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));18647llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));18648llvm::Function *F =18649CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Src1->getType()});18650return Builder.CreateCall(F, {Src0, Src1});18651}18652case AMDGPU::BI__builtin_amdgcn_ldexph: {18653// The raw instruction has a different behavior for out of bounds exponent18654// values (implicit truncation instead of saturate to short_min/short_max).18655llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));18656llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));18657llvm::Function *F =18658CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Int16Ty});18659return Builder.CreateCall(F, {Src0, Builder.CreateTrunc(Src1, Int16Ty)});18660}18661case AMDGPU::BI__builtin_amdgcn_frexp_mant:18662case AMDGPU::BI__builtin_amdgcn_frexp_mantf:18663case AMDGPU::BI__builtin_amdgcn_frexp_manth:18664return emitBuiltinWithOneOverloadedType<1>(*this, E,18665Intrinsic::amdgcn_frexp_mant);18666case AMDGPU::BI__builtin_amdgcn_frexp_exp:18667case AMDGPU::BI__builtin_amdgcn_frexp_expf: {18668Value *Src0 = EmitScalarExpr(E->getArg(0));18669Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,18670{ Builder.getInt32Ty(), Src0->getType() });18671return Builder.CreateCall(F, Src0);18672}18673case AMDGPU::BI__builtin_amdgcn_frexp_exph: {18674Value *Src0 = EmitScalarExpr(E->getArg(0));18675Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,18676{ Builder.getInt16Ty(), Src0->getType() });18677return Builder.CreateCall(F, Src0);18678}18679case AMDGPU::BI__builtin_amdgcn_fract:18680case AMDGPU::BI__builtin_amdgcn_fractf:18681case AMDGPU::BI__builtin_amdgcn_fracth:18682return emitBuiltinWithOneOverloadedType<1>(*this, E,18683Intrinsic::amdgcn_fract);18684case AMDGPU::BI__builtin_amdgcn_lerp:18685return emitBuiltinWithOneOverloadedType<3>(*this, E,18686Intrinsic::amdgcn_lerp);18687case AMDGPU::BI__builtin_amdgcn_ubfe:18688return emitBuiltinWithOneOverloadedType<3>(*this, E,18689Intrinsic::amdgcn_ubfe);18690case AMDGPU::BI__builtin_amdgcn_sbfe:18691return emitBuiltinWithOneOverloadedType<3>(*this, E,18692Intrinsic::amdgcn_sbfe);18693case AMDGPU::BI__builtin_amdgcn_ballot_w32:18694case AMDGPU::BI__builtin_amdgcn_ballot_w64: {18695llvm::Type *ResultType = ConvertType(E->getType());18696llvm::Value *Src = EmitScalarExpr(E->getArg(0));18697Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, { ResultType });18698return Builder.CreateCall(F, { Src });18699}18700case AMDGPU::BI__builtin_amdgcn_uicmp:18701case AMDGPU::BI__builtin_amdgcn_uicmpl:18702case AMDGPU::BI__builtin_amdgcn_sicmp:18703case AMDGPU::BI__builtin_amdgcn_sicmpl: {18704llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));18705llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));18706llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));1870718708// FIXME-GFX10: How should 32 bit mask be handled?18709Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_icmp,18710{ Builder.getInt64Ty(), Src0->getType() });18711return Builder.CreateCall(F, { Src0, Src1, Src2 });18712}18713case AMDGPU::BI__builtin_amdgcn_fcmp:18714case AMDGPU::BI__builtin_amdgcn_fcmpf: {18715llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));18716llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));18717llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));1871818719// FIXME-GFX10: How should 32 bit mask be handled?18720Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_fcmp,18721{ Builder.getInt64Ty(), Src0->getType() });18722return Builder.CreateCall(F, { Src0, Src1, Src2 });18723}18724case AMDGPU::BI__builtin_amdgcn_class:18725case AMDGPU::BI__builtin_amdgcn_classf:18726case AMDGPU::BI__builtin_amdgcn_classh:18727return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);18728case AMDGPU::BI__builtin_amdgcn_fmed3f:18729case AMDGPU::BI__builtin_amdgcn_fmed3h:18730return emitBuiltinWithOneOverloadedType<3>(*this, E,18731Intrinsic::amdgcn_fmed3);18732case AMDGPU::BI__builtin_amdgcn_ds_append:18733case AMDGPU::BI__builtin_amdgcn_ds_consume: {18734Intrinsic::ID Intrin = BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_append ?18735Intrinsic::amdgcn_ds_append : Intrinsic::amdgcn_ds_consume;18736Value *Src0 = EmitScalarExpr(E->getArg(0));18737Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() });18738return Builder.CreateCall(F, { Src0, Builder.getFalse() });18739}18740case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:18741case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:18742case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:18743case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:18744case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:18745case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:18746case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:18747case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:18748case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:18749case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: {18750Intrinsic::ID IID;18751llvm::Type *ArgTy = llvm::Type::getDoubleTy(getLLVMContext());18752switch (BuiltinID) {18753case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32:18754ArgTy = llvm::Type::getFloatTy(getLLVMContext());18755IID = Intrinsic::amdgcn_global_atomic_fadd;18756break;18757case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:18758ArgTy = llvm::FixedVectorType::get(18759llvm::Type::getHalfTy(getLLVMContext()), 2);18760IID = Intrinsic::amdgcn_global_atomic_fadd;18761break;18762case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64:18763IID = Intrinsic::amdgcn_global_atomic_fadd;18764break;18765case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64:18766IID = Intrinsic::amdgcn_global_atomic_fmin;18767break;18768case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64:18769IID = Intrinsic::amdgcn_global_atomic_fmax;18770break;18771case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:18772IID = Intrinsic::amdgcn_flat_atomic_fadd;18773break;18774case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64:18775IID = Intrinsic::amdgcn_flat_atomic_fmin;18776break;18777case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64:18778IID = Intrinsic::amdgcn_flat_atomic_fmax;18779break;18780case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:18781ArgTy = llvm::Type::getFloatTy(getLLVMContext());18782IID = Intrinsic::amdgcn_flat_atomic_fadd;18783break;18784case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:18785ArgTy = llvm::FixedVectorType::get(18786llvm::Type::getHalfTy(getLLVMContext()), 2);18787IID = Intrinsic::amdgcn_flat_atomic_fadd;18788break;18789}18790llvm::Value *Addr = EmitScalarExpr(E->getArg(0));18791llvm::Value *Val = EmitScalarExpr(E->getArg(1));18792llvm::Function *F =18793CGM.getIntrinsic(IID, {ArgTy, Addr->getType(), Val->getType()});18794return Builder.CreateCall(F, {Addr, Val});18795}18796case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:18797case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: {18798Intrinsic::ID IID;18799switch (BuiltinID) {18800case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:18801IID = Intrinsic::amdgcn_global_atomic_fadd_v2bf16;18802break;18803case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:18804IID = Intrinsic::amdgcn_flat_atomic_fadd_v2bf16;18805break;18806}18807llvm::Value *Addr = EmitScalarExpr(E->getArg(0));18808llvm::Value *Val = EmitScalarExpr(E->getArg(1));18809llvm::Function *F = CGM.getIntrinsic(IID, {Addr->getType()});18810return Builder.CreateCall(F, {Addr, Val});18811}18812case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:18813case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:18814case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:18815case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16:18816case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16:18817case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:18818case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16:18819case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16: {1882018821Intrinsic::ID IID;18822switch (BuiltinID) {18823case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:18824case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:18825IID = Intrinsic::amdgcn_global_load_tr_b64;18826break;18827case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:18828case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16:18829case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16:18830case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16:18831case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16:18832case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16:18833IID = Intrinsic::amdgcn_global_load_tr_b128;18834break;18835}18836llvm::Type *LoadTy = ConvertType(E->getType());18837llvm::Value *Addr = EmitScalarExpr(E->getArg(0));18838llvm::Function *F = CGM.getIntrinsic(IID, {LoadTy});18839return Builder.CreateCall(F, {Addr});18840}18841case AMDGPU::BI__builtin_amdgcn_get_fpenv: {18842Function *F = CGM.getIntrinsic(Intrinsic::get_fpenv,18843{llvm::Type::getInt64Ty(getLLVMContext())});18844return Builder.CreateCall(F);18845}18846case AMDGPU::BI__builtin_amdgcn_set_fpenv: {18847Function *F = CGM.getIntrinsic(Intrinsic::set_fpenv,18848{llvm::Type::getInt64Ty(getLLVMContext())});18849llvm::Value *Env = EmitScalarExpr(E->getArg(0));18850return Builder.CreateCall(F, {Env});18851}18852case AMDGPU::BI__builtin_amdgcn_read_exec:18853return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, false);18854case AMDGPU::BI__builtin_amdgcn_read_exec_lo:18855return EmitAMDGCNBallotForExec(*this, E, Int32Ty, Int32Ty, false);18856case AMDGPU::BI__builtin_amdgcn_read_exec_hi:18857return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, true);18858case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray:18859case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h:18860case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l:18861case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_lh: {18862llvm::Value *NodePtr = EmitScalarExpr(E->getArg(0));18863llvm::Value *RayExtent = EmitScalarExpr(E->getArg(1));18864llvm::Value *RayOrigin = EmitScalarExpr(E->getArg(2));18865llvm::Value *RayDir = EmitScalarExpr(E->getArg(3));18866llvm::Value *RayInverseDir = EmitScalarExpr(E->getArg(4));18867llvm::Value *TextureDescr = EmitScalarExpr(E->getArg(5));1886818869// The builtins take these arguments as vec4 where the last element is18870// ignored. The intrinsic takes them as vec3.18871RayOrigin = Builder.CreateShuffleVector(RayOrigin, RayOrigin,18872ArrayRef<int>{0, 1, 2});18873RayDir =18874Builder.CreateShuffleVector(RayDir, RayDir, ArrayRef<int>{0, 1, 2});18875RayInverseDir = Builder.CreateShuffleVector(RayInverseDir, RayInverseDir,18876ArrayRef<int>{0, 1, 2});1887718878Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_image_bvh_intersect_ray,18879{NodePtr->getType(), RayDir->getType()});18880return Builder.CreateCall(F, {NodePtr, RayExtent, RayOrigin, RayDir,18881RayInverseDir, TextureDescr});18882}1888318884case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn: {18885SmallVector<Value *, 4> Args;18886for (int i = 0, e = E->getNumArgs(); i != e; ++i)18887Args.push_back(EmitScalarExpr(E->getArg(i)));1888818889Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ds_bvh_stack_rtn);18890Value *Call = Builder.CreateCall(F, Args);18891Value *Rtn = Builder.CreateExtractValue(Call, 0);18892Value *A = Builder.CreateExtractValue(Call, 1);18893llvm::Type *RetTy = ConvertType(E->getType());18894Value *I0 = Builder.CreateInsertElement(PoisonValue::get(RetTy), Rtn,18895(uint64_t)0);18896return Builder.CreateInsertElement(I0, A, 1);18897}1889818899case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:18900case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:18901case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:18902case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:18903case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:18904case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:18905case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:18906case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:18907case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:18908case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:18909case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:18910case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:18911case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:18912case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:18913case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:18914case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:18915case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:18916case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:18917case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:18918case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:18919case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:18920case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:18921case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:18922case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:18923case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:18924case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:18925case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:18926case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:18927case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:18928case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:18929case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:18930case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:18931case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:18932case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:18933case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:18934case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:18935case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:18936case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:18937case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:18938case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:18939case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:18940case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:18941case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:18942case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:18943case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:18944case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:18945case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:18946case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:18947case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:18948case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:18949case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:18950case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:18951case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:18952case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:18953case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:18954case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:18955case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:18956case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:18957case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:18958case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64: {1895918960// These operations perform a matrix multiplication and accumulation of18961// the form:18962// D = A * B + C18963// We need to specify one type for matrices AB and one for matrices CD.18964// Sparse matrix operations can have different types for A and B as well as18965// an additional type for sparsity index.18966// Destination type should be put before types used for source operands.18967SmallVector<unsigned, 2> ArgsForMatchingMatrixTypes;18968// On GFX12, the intrinsics with 16-bit accumulator use a packed layout.18969// There is no need for the variable opsel argument, so always set it to18970// "false".18971bool AppendFalseForOpselArg = false;18972unsigned BuiltinWMMAOp;1897318974switch (BuiltinID) {18975case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:18976case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64:18977case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12:18978case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12:18979ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB18980BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_f16;18981break;18982case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32:18983case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64:18984case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12:18985case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12:18986ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB18987BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf16;18988break;18989case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12:18990case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12:18991AppendFalseForOpselArg = true;18992[[fallthrough]];18993case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32:18994case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64:18995ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB18996BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16;18997break;18998case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12:18999case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12:19000AppendFalseForOpselArg = true;19001[[fallthrough]];19002case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32:19003case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64:19004ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB19005BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16;19006break;19007case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32:19008case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64:19009ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB19010BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied;19011break;19012case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32:19013case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64:19014ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB19015BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied;19016break;19017case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32:19018case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64:19019case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12:19020case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12:19021ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB19022BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu8;19023break;19024case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32:19025case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64:19026case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12:19027case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12:19028ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB19029BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu4;19030break;19031case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12:19032case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12:19033ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB19034BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_fp8;19035break;19036case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12:19037case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12:19038ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB19039BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_bf8;19040break;19041case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12:19042case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12:19043ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB19044BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_fp8;19045break;19046case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12:19047case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12:19048ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB19049BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_bf8;19050break;19051case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12:19052case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12:19053ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB19054BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x32_iu4;19055break;19056case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32:19057case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64:19058ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index19059BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_f16;19060break;19061case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32:19062case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64:19063ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index19064BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16;19065break;19066case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32:19067case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64:19068ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index19069BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x32_f16;19070break;19071case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32:19072case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64:19073ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index19074BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16;19075break;19076case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32:19077case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64:19078ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index19079BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8;19080break;19081case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32:19082case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64:19083ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index19084BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4;19085break;19086case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32:19087case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64:19088ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index19089BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4;19090break;19091case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32:19092case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64:19093ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index19094BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8;19095break;19096case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32:19097case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64:19098ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index19099BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8;19100break;19101case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:19102case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:19103ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index19104BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8;19105break;19106case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:19107case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64:19108ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index19109BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8;19110break;19111}1911219113SmallVector<Value *, 6> Args;19114for (int i = 0, e = E->getNumArgs(); i != e; ++i)19115Args.push_back(EmitScalarExpr(E->getArg(i)));19116if (AppendFalseForOpselArg)19117Args.push_back(Builder.getFalse());1911819119SmallVector<llvm::Type *, 6> ArgTypes;19120for (auto ArgIdx : ArgsForMatchingMatrixTypes)19121ArgTypes.push_back(Args[ArgIdx]->getType());1912219123Function *F = CGM.getIntrinsic(BuiltinWMMAOp, ArgTypes);19124return Builder.CreateCall(F, Args);19125}1912619127// amdgcn workitem19128case AMDGPU::BI__builtin_amdgcn_workitem_id_x:19129return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024);19130case AMDGPU::BI__builtin_amdgcn_workitem_id_y:19131return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024);19132case AMDGPU::BI__builtin_amdgcn_workitem_id_z:19133return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024);1913419135// amdgcn workgroup size19136case AMDGPU::BI__builtin_amdgcn_workgroup_size_x:19137return EmitAMDGPUWorkGroupSize(*this, 0);19138case AMDGPU::BI__builtin_amdgcn_workgroup_size_y:19139return EmitAMDGPUWorkGroupSize(*this, 1);19140case AMDGPU::BI__builtin_amdgcn_workgroup_size_z:19141return EmitAMDGPUWorkGroupSize(*this, 2);1914219143// amdgcn grid size19144case AMDGPU::BI__builtin_amdgcn_grid_size_x:19145return EmitAMDGPUGridSize(*this, 0);19146case AMDGPU::BI__builtin_amdgcn_grid_size_y:19147return EmitAMDGPUGridSize(*this, 1);19148case AMDGPU::BI__builtin_amdgcn_grid_size_z:19149return EmitAMDGPUGridSize(*this, 2);1915019151// r600 intrinsics19152case AMDGPU::BI__builtin_r600_recipsqrt_ieee:19153case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:19154return emitBuiltinWithOneOverloadedType<1>(*this, E,19155Intrinsic::r600_recipsqrt_ieee);19156case AMDGPU::BI__builtin_r600_read_tidig_x:19157return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024);19158case AMDGPU::BI__builtin_r600_read_tidig_y:19159return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024);19160case AMDGPU::BI__builtin_r600_read_tidig_z:19161return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024);19162case AMDGPU::BI__builtin_amdgcn_alignbit: {19163llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));19164llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));19165llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));19166Function *F = CGM.getIntrinsic(Intrinsic::fshr, Src0->getType());19167return Builder.CreateCall(F, { Src0, Src1, Src2 });19168}19169case AMDGPU::BI__builtin_amdgcn_fence: {19170ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(0)),19171EmitScalarExpr(E->getArg(1)), AO, SSID);19172FenceInst *Fence = Builder.CreateFence(AO, SSID);19173if (E->getNumArgs() > 2)19174AddAMDGPUFenceAddressSpaceMMRA(Fence, E);19175return Fence;19176}19177case AMDGPU::BI__builtin_amdgcn_atomic_inc32:19178case AMDGPU::BI__builtin_amdgcn_atomic_inc64:19179case AMDGPU::BI__builtin_amdgcn_atomic_dec32:19180case AMDGPU::BI__builtin_amdgcn_atomic_dec64:19181case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:19182case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:19183case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:19184case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:19185case AMDGPU::BI__builtin_amdgcn_ds_faddf:19186case AMDGPU::BI__builtin_amdgcn_ds_fminf:19187case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: {19188llvm::AtomicRMWInst::BinOp BinOp;19189switch (BuiltinID) {19190case AMDGPU::BI__builtin_amdgcn_atomic_inc32:19191case AMDGPU::BI__builtin_amdgcn_atomic_inc64:19192BinOp = llvm::AtomicRMWInst::UIncWrap;19193break;19194case AMDGPU::BI__builtin_amdgcn_atomic_dec32:19195case AMDGPU::BI__builtin_amdgcn_atomic_dec64:19196BinOp = llvm::AtomicRMWInst::UDecWrap;19197break;19198case AMDGPU::BI__builtin_amdgcn_ds_faddf:19199case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:19200case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:19201case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:19202case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:19203BinOp = llvm::AtomicRMWInst::FAdd;19204break;19205case AMDGPU::BI__builtin_amdgcn_ds_fminf:19206BinOp = llvm::AtomicRMWInst::FMin;19207break;19208case AMDGPU::BI__builtin_amdgcn_ds_fmaxf:19209BinOp = llvm::AtomicRMWInst::FMax;19210break;19211}1921219213Address Ptr = CheckAtomicAlignment(*this, E);19214Value *Val = EmitScalarExpr(E->getArg(1));19215llvm::Type *OrigTy = Val->getType();19216QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();1921719218bool Volatile;1921919220if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_faddf ||19221BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fminf ||19222BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fmaxf) {19223// __builtin_amdgcn_ds_faddf/fminf/fmaxf has an explicit volatile argument19224Volatile =19225cast<ConstantInt>(EmitScalarExpr(E->getArg(4)))->getZExtValue();19226} else {19227// Infer volatile from the passed type.19228Volatile =19229PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();19230}1923119232if (E->getNumArgs() >= 4) {19233// Some of the builtins have explicit ordering and scope arguments.19234ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(2)),19235EmitScalarExpr(E->getArg(3)), AO, SSID);19236} else {19237// The ds_atomic_fadd_* builtins do not have syncscope/order arguments.19238SSID = llvm::SyncScope::System;19239AO = AtomicOrdering::SequentiallyConsistent;1924019241// The v2bf16 builtin uses i16 instead of a natural bfloat type.19242if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16) {19243llvm::Type *V2BF16Ty = FixedVectorType::get(19244llvm::Type::getBFloatTy(Builder.getContext()), 2);19245Val = Builder.CreateBitCast(Val, V2BF16Ty);19246}19247}1924819249llvm::AtomicRMWInst *RMW =19250Builder.CreateAtomicRMW(BinOp, Ptr, Val, AO, SSID);19251if (Volatile)19252RMW->setVolatile(true);19253return Builder.CreateBitCast(RMW, OrigTy);19254}19255case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtn:19256case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtnl: {19257llvm::Value *Arg = EmitScalarExpr(E->getArg(0));19258llvm::Type *ResultType = ConvertType(E->getType());19259// s_sendmsg_rtn is mangled using return type only.19260Function *F =19261CGM.getIntrinsic(Intrinsic::amdgcn_s_sendmsg_rtn, {ResultType});19262return Builder.CreateCall(F, {Arg});19263}19264case AMDGPU::BI__builtin_amdgcn_make_buffer_rsrc:19265return emitBuiltinWithOneOverloadedType<4>(19266*this, E, Intrinsic::amdgcn_make_buffer_rsrc);19267case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b8:19268case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b16:19269case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b32:19270case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b64:19271case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b96:19272case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b128:19273return emitBuiltinWithOneOverloadedType<5>(19274*this, E, Intrinsic::amdgcn_raw_ptr_buffer_store);19275case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:19276case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:19277case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:19278case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:19279case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:19280case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128: {19281llvm::Type *RetTy = nullptr;19282switch (BuiltinID) {19283case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:19284RetTy = Int8Ty;19285break;19286case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:19287RetTy = Int16Ty;19288break;19289case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:19290RetTy = Int32Ty;19291break;19292case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:19293RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/2);19294break;19295case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:19296RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/3);19297break;19298case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128:19299RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/4);19300break;19301}19302Function *F =19303CGM.getIntrinsic(Intrinsic::amdgcn_raw_ptr_buffer_load, RetTy);19304return Builder.CreateCall(19305F, {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)),19306EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3))});19307}19308default:19309return nullptr;19310}19311}1931219313/// Handle a SystemZ function in which the final argument is a pointer19314/// to an int that receives the post-instruction CC value. At the LLVM level19315/// this is represented as a function that returns a {result, cc} pair.19316static Value *EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF,19317unsigned IntrinsicID,19318const CallExpr *E) {19319unsigned NumArgs = E->getNumArgs() - 1;19320SmallVector<Value *, 8> Args(NumArgs);19321for (unsigned I = 0; I < NumArgs; ++I)19322Args[I] = CGF.EmitScalarExpr(E->getArg(I));19323Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs));19324Function *F = CGF.CGM.getIntrinsic(IntrinsicID);19325Value *Call = CGF.Builder.CreateCall(F, Args);19326Value *CC = CGF.Builder.CreateExtractValue(Call, 1);19327CGF.Builder.CreateStore(CC, CCPtr);19328return CGF.Builder.CreateExtractValue(Call, 0);19329}1933019331Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,19332const CallExpr *E) {19333switch (BuiltinID) {19334case SystemZ::BI__builtin_tbegin: {19335Value *TDB = EmitScalarExpr(E->getArg(0));19336Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);19337Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin);19338return Builder.CreateCall(F, {TDB, Control});19339}19340case SystemZ::BI__builtin_tbegin_nofloat: {19341Value *TDB = EmitScalarExpr(E->getArg(0));19342Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);19343Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat);19344return Builder.CreateCall(F, {TDB, Control});19345}19346case SystemZ::BI__builtin_tbeginc: {19347Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy);19348Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08);19349Function *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc);19350return Builder.CreateCall(F, {TDB, Control});19351}19352case SystemZ::BI__builtin_tabort: {19353Value *Data = EmitScalarExpr(E->getArg(0));19354Function *F = CGM.getIntrinsic(Intrinsic::s390_tabort);19355return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort"));19356}19357case SystemZ::BI__builtin_non_tx_store: {19358Value *Address = EmitScalarExpr(E->getArg(0));19359Value *Data = EmitScalarExpr(E->getArg(1));19360Function *F = CGM.getIntrinsic(Intrinsic::s390_ntstg);19361return Builder.CreateCall(F, {Data, Address});19362}1936319364// Vector builtins. Note that most vector builtins are mapped automatically19365// to target-specific LLVM intrinsics. The ones handled specially here can19366// be represented via standard LLVM IR, which is preferable to enable common19367// LLVM optimizations.1936819369case SystemZ::BI__builtin_s390_vpopctb:19370case SystemZ::BI__builtin_s390_vpopcth:19371case SystemZ::BI__builtin_s390_vpopctf:19372case SystemZ::BI__builtin_s390_vpopctg: {19373llvm::Type *ResultType = ConvertType(E->getType());19374Value *X = EmitScalarExpr(E->getArg(0));19375Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);19376return Builder.CreateCall(F, X);19377}1937819379case SystemZ::BI__builtin_s390_vclzb:19380case SystemZ::BI__builtin_s390_vclzh:19381case SystemZ::BI__builtin_s390_vclzf:19382case SystemZ::BI__builtin_s390_vclzg: {19383llvm::Type *ResultType = ConvertType(E->getType());19384Value *X = EmitScalarExpr(E->getArg(0));19385Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);19386Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);19387return Builder.CreateCall(F, {X, Undef});19388}1938919390case SystemZ::BI__builtin_s390_vctzb:19391case SystemZ::BI__builtin_s390_vctzh:19392case SystemZ::BI__builtin_s390_vctzf:19393case SystemZ::BI__builtin_s390_vctzg: {19394llvm::Type *ResultType = ConvertType(E->getType());19395Value *X = EmitScalarExpr(E->getArg(0));19396Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);19397Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);19398return Builder.CreateCall(F, {X, Undef});19399}1940019401case SystemZ::BI__builtin_s390_verllb:19402case SystemZ::BI__builtin_s390_verllh:19403case SystemZ::BI__builtin_s390_verllf:19404case SystemZ::BI__builtin_s390_verllg: {19405llvm::Type *ResultType = ConvertType(E->getType());19406llvm::Value *Src = EmitScalarExpr(E->getArg(0));19407llvm::Value *Amt = EmitScalarExpr(E->getArg(1));19408// Splat scalar rotate amount to vector type.19409unsigned NumElts = cast<llvm::FixedVectorType>(ResultType)->getNumElements();19410Amt = Builder.CreateIntCast(Amt, ResultType->getScalarType(), false);19411Amt = Builder.CreateVectorSplat(NumElts, Amt);19412Function *F = CGM.getIntrinsic(Intrinsic::fshl, ResultType);19413return Builder.CreateCall(F, { Src, Src, Amt });19414}1941519416case SystemZ::BI__builtin_s390_verllvb:19417case SystemZ::BI__builtin_s390_verllvh:19418case SystemZ::BI__builtin_s390_verllvf:19419case SystemZ::BI__builtin_s390_verllvg: {19420llvm::Type *ResultType = ConvertType(E->getType());19421llvm::Value *Src = EmitScalarExpr(E->getArg(0));19422llvm::Value *Amt = EmitScalarExpr(E->getArg(1));19423Function *F = CGM.getIntrinsic(Intrinsic::fshl, ResultType);19424return Builder.CreateCall(F, { Src, Src, Amt });19425}1942619427case SystemZ::BI__builtin_s390_vfsqsb:19428case SystemZ::BI__builtin_s390_vfsqdb: {19429llvm::Type *ResultType = ConvertType(E->getType());19430Value *X = EmitScalarExpr(E->getArg(0));19431if (Builder.getIsFPConstrained()) {19432Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt, ResultType);19433return Builder.CreateConstrainedFPCall(F, { X });19434} else {19435Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);19436return Builder.CreateCall(F, X);19437}19438}19439case SystemZ::BI__builtin_s390_vfmasb:19440case SystemZ::BI__builtin_s390_vfmadb: {19441llvm::Type *ResultType = ConvertType(E->getType());19442Value *X = EmitScalarExpr(E->getArg(0));19443Value *Y = EmitScalarExpr(E->getArg(1));19444Value *Z = EmitScalarExpr(E->getArg(2));19445if (Builder.getIsFPConstrained()) {19446Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);19447return Builder.CreateConstrainedFPCall(F, {X, Y, Z});19448} else {19449Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);19450return Builder.CreateCall(F, {X, Y, Z});19451}19452}19453case SystemZ::BI__builtin_s390_vfmssb:19454case SystemZ::BI__builtin_s390_vfmsdb: {19455llvm::Type *ResultType = ConvertType(E->getType());19456Value *X = EmitScalarExpr(E->getArg(0));19457Value *Y = EmitScalarExpr(E->getArg(1));19458Value *Z = EmitScalarExpr(E->getArg(2));19459if (Builder.getIsFPConstrained()) {19460Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);19461return Builder.CreateConstrainedFPCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});19462} else {19463Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);19464return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});19465}19466}19467case SystemZ::BI__builtin_s390_vfnmasb:19468case SystemZ::BI__builtin_s390_vfnmadb: {19469llvm::Type *ResultType = ConvertType(E->getType());19470Value *X = EmitScalarExpr(E->getArg(0));19471Value *Y = EmitScalarExpr(E->getArg(1));19472Value *Z = EmitScalarExpr(E->getArg(2));19473if (Builder.getIsFPConstrained()) {19474Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);19475return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y, Z}), "neg");19476} else {19477Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);19478return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg");19479}19480}19481case SystemZ::BI__builtin_s390_vfnmssb:19482case SystemZ::BI__builtin_s390_vfnmsdb: {19483llvm::Type *ResultType = ConvertType(E->getType());19484Value *X = EmitScalarExpr(E->getArg(0));19485Value *Y = EmitScalarExpr(E->getArg(1));19486Value *Z = EmitScalarExpr(E->getArg(2));19487if (Builder.getIsFPConstrained()) {19488Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType);19489Value *NegZ = Builder.CreateFNeg(Z, "sub");19490return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y, NegZ}));19491} else {19492Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);19493Value *NegZ = Builder.CreateFNeg(Z, "neg");19494return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, NegZ}));19495}19496}19497case SystemZ::BI__builtin_s390_vflpsb:19498case SystemZ::BI__builtin_s390_vflpdb: {19499llvm::Type *ResultType = ConvertType(E->getType());19500Value *X = EmitScalarExpr(E->getArg(0));19501Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);19502return Builder.CreateCall(F, X);19503}19504case SystemZ::BI__builtin_s390_vflnsb:19505case SystemZ::BI__builtin_s390_vflndb: {19506llvm::Type *ResultType = ConvertType(E->getType());19507Value *X = EmitScalarExpr(E->getArg(0));19508Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);19509return Builder.CreateFNeg(Builder.CreateCall(F, X), "neg");19510}19511case SystemZ::BI__builtin_s390_vfisb:19512case SystemZ::BI__builtin_s390_vfidb: {19513llvm::Type *ResultType = ConvertType(E->getType());19514Value *X = EmitScalarExpr(E->getArg(0));19515// Constant-fold the M4 and M5 mask arguments.19516llvm::APSInt M4 = *E->getArg(1)->getIntegerConstantExpr(getContext());19517llvm::APSInt M5 = *E->getArg(2)->getIntegerConstantExpr(getContext());19518// Check whether this instance can be represented via a LLVM standard19519// intrinsic. We only support some combinations of M4 and M5.19520Intrinsic::ID ID = Intrinsic::not_intrinsic;19521Intrinsic::ID CI;19522switch (M4.getZExtValue()) {19523default: break;19524case 0: // IEEE-inexact exception allowed19525switch (M5.getZExtValue()) {19526default: break;19527case 0: ID = Intrinsic::rint;19528CI = Intrinsic::experimental_constrained_rint; break;19529}19530break;19531case 4: // IEEE-inexact exception suppressed19532switch (M5.getZExtValue()) {19533default: break;19534case 0: ID = Intrinsic::nearbyint;19535CI = Intrinsic::experimental_constrained_nearbyint; break;19536case 1: ID = Intrinsic::round;19537CI = Intrinsic::experimental_constrained_round; break;19538case 5: ID = Intrinsic::trunc;19539CI = Intrinsic::experimental_constrained_trunc; break;19540case 6: ID = Intrinsic::ceil;19541CI = Intrinsic::experimental_constrained_ceil; break;19542case 7: ID = Intrinsic::floor;19543CI = Intrinsic::experimental_constrained_floor; break;19544}19545break;19546}19547if (ID != Intrinsic::not_intrinsic) {19548if (Builder.getIsFPConstrained()) {19549Function *F = CGM.getIntrinsic(CI, ResultType);19550return Builder.CreateConstrainedFPCall(F, X);19551} else {19552Function *F = CGM.getIntrinsic(ID, ResultType);19553return Builder.CreateCall(F, X);19554}19555}19556switch (BuiltinID) { // FIXME: constrained version?19557case SystemZ::BI__builtin_s390_vfisb: ID = Intrinsic::s390_vfisb; break;19558case SystemZ::BI__builtin_s390_vfidb: ID = Intrinsic::s390_vfidb; break;19559default: llvm_unreachable("Unknown BuiltinID");19560}19561Function *F = CGM.getIntrinsic(ID);19562Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);19563Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5);19564return Builder.CreateCall(F, {X, M4Value, M5Value});19565}19566case SystemZ::BI__builtin_s390_vfmaxsb:19567case SystemZ::BI__builtin_s390_vfmaxdb: {19568llvm::Type *ResultType = ConvertType(E->getType());19569Value *X = EmitScalarExpr(E->getArg(0));19570Value *Y = EmitScalarExpr(E->getArg(1));19571// Constant-fold the M4 mask argument.19572llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext());19573// Check whether this instance can be represented via a LLVM standard19574// intrinsic. We only support some values of M4.19575Intrinsic::ID ID = Intrinsic::not_intrinsic;19576Intrinsic::ID CI;19577switch (M4.getZExtValue()) {19578default: break;19579case 4: ID = Intrinsic::maxnum;19580CI = Intrinsic::experimental_constrained_maxnum; break;19581}19582if (ID != Intrinsic::not_intrinsic) {19583if (Builder.getIsFPConstrained()) {19584Function *F = CGM.getIntrinsic(CI, ResultType);19585return Builder.CreateConstrainedFPCall(F, {X, Y});19586} else {19587Function *F = CGM.getIntrinsic(ID, ResultType);19588return Builder.CreateCall(F, {X, Y});19589}19590}19591switch (BuiltinID) {19592case SystemZ::BI__builtin_s390_vfmaxsb: ID = Intrinsic::s390_vfmaxsb; break;19593case SystemZ::BI__builtin_s390_vfmaxdb: ID = Intrinsic::s390_vfmaxdb; break;19594default: llvm_unreachable("Unknown BuiltinID");19595}19596Function *F = CGM.getIntrinsic(ID);19597Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);19598return Builder.CreateCall(F, {X, Y, M4Value});19599}19600case SystemZ::BI__builtin_s390_vfminsb:19601case SystemZ::BI__builtin_s390_vfmindb: {19602llvm::Type *ResultType = ConvertType(E->getType());19603Value *X = EmitScalarExpr(E->getArg(0));19604Value *Y = EmitScalarExpr(E->getArg(1));19605// Constant-fold the M4 mask argument.19606llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext());19607// Check whether this instance can be represented via a LLVM standard19608// intrinsic. We only support some values of M4.19609Intrinsic::ID ID = Intrinsic::not_intrinsic;19610Intrinsic::ID CI;19611switch (M4.getZExtValue()) {19612default: break;19613case 4: ID = Intrinsic::minnum;19614CI = Intrinsic::experimental_constrained_minnum; break;19615}19616if (ID != Intrinsic::not_intrinsic) {19617if (Builder.getIsFPConstrained()) {19618Function *F = CGM.getIntrinsic(CI, ResultType);19619return Builder.CreateConstrainedFPCall(F, {X, Y});19620} else {19621Function *F = CGM.getIntrinsic(ID, ResultType);19622return Builder.CreateCall(F, {X, Y});19623}19624}19625switch (BuiltinID) {19626case SystemZ::BI__builtin_s390_vfminsb: ID = Intrinsic::s390_vfminsb; break;19627case SystemZ::BI__builtin_s390_vfmindb: ID = Intrinsic::s390_vfmindb; break;19628default: llvm_unreachable("Unknown BuiltinID");19629}19630Function *F = CGM.getIntrinsic(ID);19631Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);19632return Builder.CreateCall(F, {X, Y, M4Value});19633}1963419635case SystemZ::BI__builtin_s390_vlbrh:19636case SystemZ::BI__builtin_s390_vlbrf:19637case SystemZ::BI__builtin_s390_vlbrg: {19638llvm::Type *ResultType = ConvertType(E->getType());19639Value *X = EmitScalarExpr(E->getArg(0));19640Function *F = CGM.getIntrinsic(Intrinsic::bswap, ResultType);19641return Builder.CreateCall(F, X);19642}1964319644// Vector intrinsics that output the post-instruction CC value.1964519646#define INTRINSIC_WITH_CC(NAME) \19647case SystemZ::BI__builtin_##NAME: \19648return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)1964919650INTRINSIC_WITH_CC(s390_vpkshs);19651INTRINSIC_WITH_CC(s390_vpksfs);19652INTRINSIC_WITH_CC(s390_vpksgs);1965319654INTRINSIC_WITH_CC(s390_vpklshs);19655INTRINSIC_WITH_CC(s390_vpklsfs);19656INTRINSIC_WITH_CC(s390_vpklsgs);1965719658INTRINSIC_WITH_CC(s390_vceqbs);19659INTRINSIC_WITH_CC(s390_vceqhs);19660INTRINSIC_WITH_CC(s390_vceqfs);19661INTRINSIC_WITH_CC(s390_vceqgs);1966219663INTRINSIC_WITH_CC(s390_vchbs);19664INTRINSIC_WITH_CC(s390_vchhs);19665INTRINSIC_WITH_CC(s390_vchfs);19666INTRINSIC_WITH_CC(s390_vchgs);1966719668INTRINSIC_WITH_CC(s390_vchlbs);19669INTRINSIC_WITH_CC(s390_vchlhs);19670INTRINSIC_WITH_CC(s390_vchlfs);19671INTRINSIC_WITH_CC(s390_vchlgs);1967219673INTRINSIC_WITH_CC(s390_vfaebs);19674INTRINSIC_WITH_CC(s390_vfaehs);19675INTRINSIC_WITH_CC(s390_vfaefs);1967619677INTRINSIC_WITH_CC(s390_vfaezbs);19678INTRINSIC_WITH_CC(s390_vfaezhs);19679INTRINSIC_WITH_CC(s390_vfaezfs);1968019681INTRINSIC_WITH_CC(s390_vfeebs);19682INTRINSIC_WITH_CC(s390_vfeehs);19683INTRINSIC_WITH_CC(s390_vfeefs);1968419685INTRINSIC_WITH_CC(s390_vfeezbs);19686INTRINSIC_WITH_CC(s390_vfeezhs);19687INTRINSIC_WITH_CC(s390_vfeezfs);1968819689INTRINSIC_WITH_CC(s390_vfenebs);19690INTRINSIC_WITH_CC(s390_vfenehs);19691INTRINSIC_WITH_CC(s390_vfenefs);1969219693INTRINSIC_WITH_CC(s390_vfenezbs);19694INTRINSIC_WITH_CC(s390_vfenezhs);19695INTRINSIC_WITH_CC(s390_vfenezfs);1969619697INTRINSIC_WITH_CC(s390_vistrbs);19698INTRINSIC_WITH_CC(s390_vistrhs);19699INTRINSIC_WITH_CC(s390_vistrfs);1970019701INTRINSIC_WITH_CC(s390_vstrcbs);19702INTRINSIC_WITH_CC(s390_vstrchs);19703INTRINSIC_WITH_CC(s390_vstrcfs);1970419705INTRINSIC_WITH_CC(s390_vstrczbs);19706INTRINSIC_WITH_CC(s390_vstrczhs);19707INTRINSIC_WITH_CC(s390_vstrczfs);1970819709INTRINSIC_WITH_CC(s390_vfcesbs);19710INTRINSIC_WITH_CC(s390_vfcedbs);19711INTRINSIC_WITH_CC(s390_vfchsbs);19712INTRINSIC_WITH_CC(s390_vfchdbs);19713INTRINSIC_WITH_CC(s390_vfchesbs);19714INTRINSIC_WITH_CC(s390_vfchedbs);1971519716INTRINSIC_WITH_CC(s390_vftcisb);19717INTRINSIC_WITH_CC(s390_vftcidb);1971819719INTRINSIC_WITH_CC(s390_vstrsb);19720INTRINSIC_WITH_CC(s390_vstrsh);19721INTRINSIC_WITH_CC(s390_vstrsf);1972219723INTRINSIC_WITH_CC(s390_vstrszb);19724INTRINSIC_WITH_CC(s390_vstrszh);19725INTRINSIC_WITH_CC(s390_vstrszf);1972619727#undef INTRINSIC_WITH_CC1972819729default:19730return nullptr;19731}19732}1973319734namespace {19735// Helper classes for mapping MMA builtins to particular LLVM intrinsic variant.19736struct NVPTXMmaLdstInfo {19737unsigned NumResults; // Number of elements to load/store19738// Intrinsic IDs for row/col variants. 0 if particular layout is unsupported.19739unsigned IID_col;19740unsigned IID_row;19741};1974219743#define MMA_INTR(geom_op_type, layout) \19744Intrinsic::nvvm_wmma_##geom_op_type##_##layout##_stride19745#define MMA_LDST(n, geom_op_type) \19746{ n, MMA_INTR(geom_op_type, col), MMA_INTR(geom_op_type, row) }1974719748static NVPTXMmaLdstInfo getNVPTXMmaLdstInfo(unsigned BuiltinID) {19749switch (BuiltinID) {19750// FP MMA loads19751case NVPTX::BI__hmma_m16n16k16_ld_a:19752return MMA_LDST(8, m16n16k16_load_a_f16);19753case NVPTX::BI__hmma_m16n16k16_ld_b:19754return MMA_LDST(8, m16n16k16_load_b_f16);19755case NVPTX::BI__hmma_m16n16k16_ld_c_f16:19756return MMA_LDST(4, m16n16k16_load_c_f16);19757case NVPTX::BI__hmma_m16n16k16_ld_c_f32:19758return MMA_LDST(8, m16n16k16_load_c_f32);19759case NVPTX::BI__hmma_m32n8k16_ld_a:19760return MMA_LDST(8, m32n8k16_load_a_f16);19761case NVPTX::BI__hmma_m32n8k16_ld_b:19762return MMA_LDST(8, m32n8k16_load_b_f16);19763case NVPTX::BI__hmma_m32n8k16_ld_c_f16:19764return MMA_LDST(4, m32n8k16_load_c_f16);19765case NVPTX::BI__hmma_m32n8k16_ld_c_f32:19766return MMA_LDST(8, m32n8k16_load_c_f32);19767case NVPTX::BI__hmma_m8n32k16_ld_a:19768return MMA_LDST(8, m8n32k16_load_a_f16);19769case NVPTX::BI__hmma_m8n32k16_ld_b:19770return MMA_LDST(8, m8n32k16_load_b_f16);19771case NVPTX::BI__hmma_m8n32k16_ld_c_f16:19772return MMA_LDST(4, m8n32k16_load_c_f16);19773case NVPTX::BI__hmma_m8n32k16_ld_c_f32:19774return MMA_LDST(8, m8n32k16_load_c_f32);1977519776// Integer MMA loads19777case NVPTX::BI__imma_m16n16k16_ld_a_s8:19778return MMA_LDST(2, m16n16k16_load_a_s8);19779case NVPTX::BI__imma_m16n16k16_ld_a_u8:19780return MMA_LDST(2, m16n16k16_load_a_u8);19781case NVPTX::BI__imma_m16n16k16_ld_b_s8:19782return MMA_LDST(2, m16n16k16_load_b_s8);19783case NVPTX::BI__imma_m16n16k16_ld_b_u8:19784return MMA_LDST(2, m16n16k16_load_b_u8);19785case NVPTX::BI__imma_m16n16k16_ld_c:19786return MMA_LDST(8, m16n16k16_load_c_s32);19787case NVPTX::BI__imma_m32n8k16_ld_a_s8:19788return MMA_LDST(4, m32n8k16_load_a_s8);19789case NVPTX::BI__imma_m32n8k16_ld_a_u8:19790return MMA_LDST(4, m32n8k16_load_a_u8);19791case NVPTX::BI__imma_m32n8k16_ld_b_s8:19792return MMA_LDST(1, m32n8k16_load_b_s8);19793case NVPTX::BI__imma_m32n8k16_ld_b_u8:19794return MMA_LDST(1, m32n8k16_load_b_u8);19795case NVPTX::BI__imma_m32n8k16_ld_c:19796return MMA_LDST(8, m32n8k16_load_c_s32);19797case NVPTX::BI__imma_m8n32k16_ld_a_s8:19798return MMA_LDST(1, m8n32k16_load_a_s8);19799case NVPTX::BI__imma_m8n32k16_ld_a_u8:19800return MMA_LDST(1, m8n32k16_load_a_u8);19801case NVPTX::BI__imma_m8n32k16_ld_b_s8:19802return MMA_LDST(4, m8n32k16_load_b_s8);19803case NVPTX::BI__imma_m8n32k16_ld_b_u8:19804return MMA_LDST(4, m8n32k16_load_b_u8);19805case NVPTX::BI__imma_m8n32k16_ld_c:19806return MMA_LDST(8, m8n32k16_load_c_s32);1980719808// Sub-integer MMA loads.19809// Only row/col layout is supported by A/B fragments.19810case NVPTX::BI__imma_m8n8k32_ld_a_s4:19811return {1, 0, MMA_INTR(m8n8k32_load_a_s4, row)};19812case NVPTX::BI__imma_m8n8k32_ld_a_u4:19813return {1, 0, MMA_INTR(m8n8k32_load_a_u4, row)};19814case NVPTX::BI__imma_m8n8k32_ld_b_s4:19815return {1, MMA_INTR(m8n8k32_load_b_s4, col), 0};19816case NVPTX::BI__imma_m8n8k32_ld_b_u4:19817return {1, MMA_INTR(m8n8k32_load_b_u4, col), 0};19818case NVPTX::BI__imma_m8n8k32_ld_c:19819return MMA_LDST(2, m8n8k32_load_c_s32);19820case NVPTX::BI__bmma_m8n8k128_ld_a_b1:19821return {1, 0, MMA_INTR(m8n8k128_load_a_b1, row)};19822case NVPTX::BI__bmma_m8n8k128_ld_b_b1:19823return {1, MMA_INTR(m8n8k128_load_b_b1, col), 0};19824case NVPTX::BI__bmma_m8n8k128_ld_c:19825return MMA_LDST(2, m8n8k128_load_c_s32);1982619827// Double MMA loads19828case NVPTX::BI__dmma_m8n8k4_ld_a:19829return MMA_LDST(1, m8n8k4_load_a_f64);19830case NVPTX::BI__dmma_m8n8k4_ld_b:19831return MMA_LDST(1, m8n8k4_load_b_f64);19832case NVPTX::BI__dmma_m8n8k4_ld_c:19833return MMA_LDST(2, m8n8k4_load_c_f64);1983419835// Alternate float MMA loads19836case NVPTX::BI__mma_bf16_m16n16k16_ld_a:19837return MMA_LDST(4, m16n16k16_load_a_bf16);19838case NVPTX::BI__mma_bf16_m16n16k16_ld_b:19839return MMA_LDST(4, m16n16k16_load_b_bf16);19840case NVPTX::BI__mma_bf16_m8n32k16_ld_a:19841return MMA_LDST(2, m8n32k16_load_a_bf16);19842case NVPTX::BI__mma_bf16_m8n32k16_ld_b:19843return MMA_LDST(8, m8n32k16_load_b_bf16);19844case NVPTX::BI__mma_bf16_m32n8k16_ld_a:19845return MMA_LDST(8, m32n8k16_load_a_bf16);19846case NVPTX::BI__mma_bf16_m32n8k16_ld_b:19847return MMA_LDST(2, m32n8k16_load_b_bf16);19848case NVPTX::BI__mma_tf32_m16n16k8_ld_a:19849return MMA_LDST(4, m16n16k8_load_a_tf32);19850case NVPTX::BI__mma_tf32_m16n16k8_ld_b:19851return MMA_LDST(4, m16n16k8_load_b_tf32);19852case NVPTX::BI__mma_tf32_m16n16k8_ld_c:19853return MMA_LDST(8, m16n16k8_load_c_f32);1985419855// NOTE: We need to follow inconsitent naming scheme used by NVCC. Unlike19856// PTX and LLVM IR where stores always use fragment D, NVCC builtins always19857// use fragment C for both loads and stores.19858// FP MMA stores.19859case NVPTX::BI__hmma_m16n16k16_st_c_f16:19860return MMA_LDST(4, m16n16k16_store_d_f16);19861case NVPTX::BI__hmma_m16n16k16_st_c_f32:19862return MMA_LDST(8, m16n16k16_store_d_f32);19863case NVPTX::BI__hmma_m32n8k16_st_c_f16:19864return MMA_LDST(4, m32n8k16_store_d_f16);19865case NVPTX::BI__hmma_m32n8k16_st_c_f32:19866return MMA_LDST(8, m32n8k16_store_d_f32);19867case NVPTX::BI__hmma_m8n32k16_st_c_f16:19868return MMA_LDST(4, m8n32k16_store_d_f16);19869case NVPTX::BI__hmma_m8n32k16_st_c_f32:19870return MMA_LDST(8, m8n32k16_store_d_f32);1987119872// Integer and sub-integer MMA stores.19873// Another naming quirk. Unlike other MMA builtins that use PTX types in the19874// name, integer loads/stores use LLVM's i32.19875case NVPTX::BI__imma_m16n16k16_st_c_i32:19876return MMA_LDST(8, m16n16k16_store_d_s32);19877case NVPTX::BI__imma_m32n8k16_st_c_i32:19878return MMA_LDST(8, m32n8k16_store_d_s32);19879case NVPTX::BI__imma_m8n32k16_st_c_i32:19880return MMA_LDST(8, m8n32k16_store_d_s32);19881case NVPTX::BI__imma_m8n8k32_st_c_i32:19882return MMA_LDST(2, m8n8k32_store_d_s32);19883case NVPTX::BI__bmma_m8n8k128_st_c_i32:19884return MMA_LDST(2, m8n8k128_store_d_s32);1988519886// Double MMA store19887case NVPTX::BI__dmma_m8n8k4_st_c_f64:19888return MMA_LDST(2, m8n8k4_store_d_f64);1988919890// Alternate float MMA store19891case NVPTX::BI__mma_m16n16k8_st_c_f32:19892return MMA_LDST(8, m16n16k8_store_d_f32);1989319894default:19895llvm_unreachable("Unknown MMA builtin");19896}19897}19898#undef MMA_LDST19899#undef MMA_INTR199001990119902struct NVPTXMmaInfo {19903unsigned NumEltsA;19904unsigned NumEltsB;19905unsigned NumEltsC;19906unsigned NumEltsD;1990719908// Variants are ordered by layout-A/layout-B/satf, where 'row' has priority19909// over 'col' for layout. The index of non-satf variants is expected to match19910// the undocumented layout constants used by CUDA's mma.hpp.19911std::array<unsigned, 8> Variants;1991219913unsigned getMMAIntrinsic(int Layout, bool Satf) {19914unsigned Index = Layout + 4 * Satf;19915if (Index >= Variants.size())19916return 0;19917return Variants[Index];19918}19919};1992019921// Returns an intrinsic that matches Layout and Satf for valid combinations of19922// Layout and Satf, 0 otherwise.19923static NVPTXMmaInfo getNVPTXMmaInfo(unsigned BuiltinID) {19924// clang-format off19925#define MMA_VARIANTS(geom, type) \19926Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type, \19927Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \19928Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type, \19929Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type19930#define MMA_SATF_VARIANTS(geom, type) \19931MMA_VARIANTS(geom, type), \19932Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type##_satfinite, \19933Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \19934Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type##_satfinite, \19935Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type##_satfinite19936// Sub-integer MMA only supports row.col layout.19937#define MMA_VARIANTS_I4(geom, type) \199380, \19939Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \199400, \199410, \199420, \19943Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \199440, \19945019946// b1 MMA does not support .satfinite.19947#define MMA_VARIANTS_B1_XOR(geom, type) \199480, \19949Intrinsic::nvvm_wmma_##geom##_mma_xor_popc_row_col_##type, \199500, \199510, \199520, \199530, \199540, \19955019956#define MMA_VARIANTS_B1_AND(geom, type) \199570, \19958Intrinsic::nvvm_wmma_##geom##_mma_and_popc_row_col_##type, \199590, \199600, \199610, \199620, \199630, \19964019965// clang-format on19966switch (BuiltinID) {19967// FP MMA19968// Note that 'type' argument of MMA_SATF_VARIANTS uses D_C notation, while19969// NumEltsN of return value are ordered as A,B,C,D.19970case NVPTX::BI__hmma_m16n16k16_mma_f16f16:19971return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m16n16k16, f16_f16)}}};19972case NVPTX::BI__hmma_m16n16k16_mma_f32f16:19973return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m16n16k16, f32_f16)}}};19974case NVPTX::BI__hmma_m16n16k16_mma_f16f32:19975return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m16n16k16, f16_f32)}}};19976case NVPTX::BI__hmma_m16n16k16_mma_f32f32:19977return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, f32_f32)}}};19978case NVPTX::BI__hmma_m32n8k16_mma_f16f16:19979return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m32n8k16, f16_f16)}}};19980case NVPTX::BI__hmma_m32n8k16_mma_f32f16:19981return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m32n8k16, f32_f16)}}};19982case NVPTX::BI__hmma_m32n8k16_mma_f16f32:19983return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m32n8k16, f16_f32)}}};19984case NVPTX::BI__hmma_m32n8k16_mma_f32f32:19985return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, f32_f32)}}};19986case NVPTX::BI__hmma_m8n32k16_mma_f16f16:19987return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m8n32k16, f16_f16)}}};19988case NVPTX::BI__hmma_m8n32k16_mma_f32f16:19989return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m8n32k16, f32_f16)}}};19990case NVPTX::BI__hmma_m8n32k16_mma_f16f32:19991return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m8n32k16, f16_f32)}}};19992case NVPTX::BI__hmma_m8n32k16_mma_f32f32:19993return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, f32_f32)}}};1999419995// Integer MMA19996case NVPTX::BI__imma_m16n16k16_mma_s8:19997return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, s8)}}};19998case NVPTX::BI__imma_m16n16k16_mma_u8:19999return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, u8)}}};20000case NVPTX::BI__imma_m32n8k16_mma_s8:20001return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, s8)}}};20002case NVPTX::BI__imma_m32n8k16_mma_u8:20003return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, u8)}}};20004case NVPTX::BI__imma_m8n32k16_mma_s8:20005return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, s8)}}};20006case NVPTX::BI__imma_m8n32k16_mma_u8:20007return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, u8)}}};2000820009// Sub-integer MMA20010case NVPTX::BI__imma_m8n8k32_mma_s4:20011return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32, s4)}}};20012case NVPTX::BI__imma_m8n8k32_mma_u4:20013return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32, u4)}}};20014case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1:20015return {1, 1, 2, 2, {{MMA_VARIANTS_B1_XOR(m8n8k128, b1)}}};20016case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1:20017return {1, 1, 2, 2, {{MMA_VARIANTS_B1_AND(m8n8k128, b1)}}};2001820019// Double MMA20020case NVPTX::BI__dmma_m8n8k4_mma_f64:20021return {1, 1, 2, 2, {{MMA_VARIANTS(m8n8k4, f64)}}};2002220023// Alternate FP MMA20024case NVPTX::BI__mma_bf16_m16n16k16_mma_f32:20025return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k16, bf16)}}};20026case NVPTX::BI__mma_bf16_m8n32k16_mma_f32:20027return {2, 8, 8, 8, {{MMA_VARIANTS(m8n32k16, bf16)}}};20028case NVPTX::BI__mma_bf16_m32n8k16_mma_f32:20029return {8, 2, 8, 8, {{MMA_VARIANTS(m32n8k16, bf16)}}};20030case NVPTX::BI__mma_tf32_m16n16k8_mma_f32:20031return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k8, tf32)}}};20032default:20033llvm_unreachable("Unexpected builtin ID.");20034}20035#undef MMA_VARIANTS20036#undef MMA_SATF_VARIANTS20037#undef MMA_VARIANTS_I420038#undef MMA_VARIANTS_B1_AND20039#undef MMA_VARIANTS_B1_XOR20040}2004120042static Value *MakeLdgLdu(unsigned IntrinsicID, CodeGenFunction &CGF,20043const CallExpr *E) {20044Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));20045QualType ArgType = E->getArg(0)->getType();20046clang::CharUnits Align = CGF.CGM.getNaturalPointeeTypeAlignment(ArgType);20047llvm::Type *ElemTy = CGF.ConvertTypeForMem(ArgType->getPointeeType());20048return CGF.Builder.CreateCall(20049CGF.CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}),20050{Ptr, ConstantInt::get(CGF.Builder.getInt32Ty(), Align.getQuantity())});20051}2005220053static Value *MakeScopedAtomic(unsigned IntrinsicID, CodeGenFunction &CGF,20054const CallExpr *E) {20055Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));20056llvm::Type *ElemTy =20057CGF.ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType());20058return CGF.Builder.CreateCall(20059CGF.CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}),20060{Ptr, CGF.EmitScalarExpr(E->getArg(1))});20061}2006220063static Value *MakeCpAsync(unsigned IntrinsicID, unsigned IntrinsicIDS,20064CodeGenFunction &CGF, const CallExpr *E,20065int SrcSize) {20066return E->getNumArgs() == 320067? CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IntrinsicIDS),20068{CGF.EmitScalarExpr(E->getArg(0)),20069CGF.EmitScalarExpr(E->getArg(1)),20070CGF.EmitScalarExpr(E->getArg(2))})20071: CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IntrinsicID),20072{CGF.EmitScalarExpr(E->getArg(0)),20073CGF.EmitScalarExpr(E->getArg(1))});20074}2007520076static Value *MakeHalfType(unsigned IntrinsicID, unsigned BuiltinID,20077const CallExpr *E, CodeGenFunction &CGF) {20078auto &C = CGF.CGM.getContext();20079if (!(C.getLangOpts().NativeHalfType ||20080!C.getTargetInfo().useFP16ConversionIntrinsics())) {20081CGF.CGM.Error(E->getExprLoc(), C.BuiltinInfo.getName(BuiltinID).str() +20082" requires native half type support.");20083return nullptr;20084}2008520086if (IntrinsicID == Intrinsic::nvvm_ldg_global_f ||20087IntrinsicID == Intrinsic::nvvm_ldu_global_f)20088return MakeLdgLdu(IntrinsicID, CGF, E);2008920090SmallVector<Value *, 16> Args;20091auto *F = CGF.CGM.getIntrinsic(IntrinsicID);20092auto *FTy = F->getFunctionType();20093unsigned ICEArguments = 0;20094ASTContext::GetBuiltinTypeError Error;20095C.GetBuiltinType(BuiltinID, Error, &ICEArguments);20096assert(Error == ASTContext::GE_None && "Should not codegen an error");20097for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {20098assert((ICEArguments & (1 << i)) == 0);20099auto *ArgValue = CGF.EmitScalarExpr(E->getArg(i));20100auto *PTy = FTy->getParamType(i);20101if (PTy != ArgValue->getType())20102ArgValue = CGF.Builder.CreateBitCast(ArgValue, PTy);20103Args.push_back(ArgValue);20104}2010520106return CGF.Builder.CreateCall(F, Args);20107}20108} // namespace2010920110Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,20111const CallExpr *E) {20112switch (BuiltinID) {20113case NVPTX::BI__nvvm_atom_add_gen_i:20114case NVPTX::BI__nvvm_atom_add_gen_l:20115case NVPTX::BI__nvvm_atom_add_gen_ll:20116return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E);2011720118case NVPTX::BI__nvvm_atom_sub_gen_i:20119case NVPTX::BI__nvvm_atom_sub_gen_l:20120case NVPTX::BI__nvvm_atom_sub_gen_ll:20121return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E);2012220123case NVPTX::BI__nvvm_atom_and_gen_i:20124case NVPTX::BI__nvvm_atom_and_gen_l:20125case NVPTX::BI__nvvm_atom_and_gen_ll:20126return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E);2012720128case NVPTX::BI__nvvm_atom_or_gen_i:20129case NVPTX::BI__nvvm_atom_or_gen_l:20130case NVPTX::BI__nvvm_atom_or_gen_ll:20131return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E);2013220133case NVPTX::BI__nvvm_atom_xor_gen_i:20134case NVPTX::BI__nvvm_atom_xor_gen_l:20135case NVPTX::BI__nvvm_atom_xor_gen_ll:20136return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E);2013720138case NVPTX::BI__nvvm_atom_xchg_gen_i:20139case NVPTX::BI__nvvm_atom_xchg_gen_l:20140case NVPTX::BI__nvvm_atom_xchg_gen_ll:20141return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E);2014220143case NVPTX::BI__nvvm_atom_max_gen_i:20144case NVPTX::BI__nvvm_atom_max_gen_l:20145case NVPTX::BI__nvvm_atom_max_gen_ll:20146return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);2014720148case NVPTX::BI__nvvm_atom_max_gen_ui:20149case NVPTX::BI__nvvm_atom_max_gen_ul:20150case NVPTX::BI__nvvm_atom_max_gen_ull:20151return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E);2015220153case NVPTX::BI__nvvm_atom_min_gen_i:20154case NVPTX::BI__nvvm_atom_min_gen_l:20155case NVPTX::BI__nvvm_atom_min_gen_ll:20156return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);2015720158case NVPTX::BI__nvvm_atom_min_gen_ui:20159case NVPTX::BI__nvvm_atom_min_gen_ul:20160case NVPTX::BI__nvvm_atom_min_gen_ull:20161return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E);2016220163case NVPTX::BI__nvvm_atom_cas_gen_i:20164case NVPTX::BI__nvvm_atom_cas_gen_l:20165case NVPTX::BI__nvvm_atom_cas_gen_ll:20166// __nvvm_atom_cas_gen_* should return the old value rather than the20167// success flag.20168return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false);2016920170case NVPTX::BI__nvvm_atom_add_gen_f:20171case NVPTX::BI__nvvm_atom_add_gen_d: {20172Address DestAddr = EmitPointerWithAlignment(E->getArg(0));20173Value *Val = EmitScalarExpr(E->getArg(1));2017420175return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::FAdd, DestAddr, Val,20176AtomicOrdering::SequentiallyConsistent);20177}2017820179case NVPTX::BI__nvvm_atom_inc_gen_ui: {20180Value *Ptr = EmitScalarExpr(E->getArg(0));20181Value *Val = EmitScalarExpr(E->getArg(1));20182Function *FnALI32 =20183CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType());20184return Builder.CreateCall(FnALI32, {Ptr, Val});20185}2018620187case NVPTX::BI__nvvm_atom_dec_gen_ui: {20188Value *Ptr = EmitScalarExpr(E->getArg(0));20189Value *Val = EmitScalarExpr(E->getArg(1));20190Function *FnALD32 =20191CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType());20192return Builder.CreateCall(FnALD32, {Ptr, Val});20193}2019420195case NVPTX::BI__nvvm_ldg_c:20196case NVPTX::BI__nvvm_ldg_sc:20197case NVPTX::BI__nvvm_ldg_c2:20198case NVPTX::BI__nvvm_ldg_sc2:20199case NVPTX::BI__nvvm_ldg_c4:20200case NVPTX::BI__nvvm_ldg_sc4:20201case NVPTX::BI__nvvm_ldg_s:20202case NVPTX::BI__nvvm_ldg_s2:20203case NVPTX::BI__nvvm_ldg_s4:20204case NVPTX::BI__nvvm_ldg_i:20205case NVPTX::BI__nvvm_ldg_i2:20206case NVPTX::BI__nvvm_ldg_i4:20207case NVPTX::BI__nvvm_ldg_l:20208case NVPTX::BI__nvvm_ldg_l2:20209case NVPTX::BI__nvvm_ldg_ll:20210case NVPTX::BI__nvvm_ldg_ll2:20211case NVPTX::BI__nvvm_ldg_uc:20212case NVPTX::BI__nvvm_ldg_uc2:20213case NVPTX::BI__nvvm_ldg_uc4:20214case NVPTX::BI__nvvm_ldg_us:20215case NVPTX::BI__nvvm_ldg_us2:20216case NVPTX::BI__nvvm_ldg_us4:20217case NVPTX::BI__nvvm_ldg_ui:20218case NVPTX::BI__nvvm_ldg_ui2:20219case NVPTX::BI__nvvm_ldg_ui4:20220case NVPTX::BI__nvvm_ldg_ul:20221case NVPTX::BI__nvvm_ldg_ul2:20222case NVPTX::BI__nvvm_ldg_ull:20223case NVPTX::BI__nvvm_ldg_ull2:20224// PTX Interoperability section 2.2: "For a vector with an even number of20225// elements, its alignment is set to number of elements times the alignment20226// of its member: n*alignof(t)."20227return MakeLdgLdu(Intrinsic::nvvm_ldg_global_i, *this, E);20228case NVPTX::BI__nvvm_ldg_f:20229case NVPTX::BI__nvvm_ldg_f2:20230case NVPTX::BI__nvvm_ldg_f4:20231case NVPTX::BI__nvvm_ldg_d:20232case NVPTX::BI__nvvm_ldg_d2:20233return MakeLdgLdu(Intrinsic::nvvm_ldg_global_f, *this, E);2023420235case NVPTX::BI__nvvm_ldu_c:20236case NVPTX::BI__nvvm_ldu_sc:20237case NVPTX::BI__nvvm_ldu_c2:20238case NVPTX::BI__nvvm_ldu_sc2:20239case NVPTX::BI__nvvm_ldu_c4:20240case NVPTX::BI__nvvm_ldu_sc4:20241case NVPTX::BI__nvvm_ldu_s:20242case NVPTX::BI__nvvm_ldu_s2:20243case NVPTX::BI__nvvm_ldu_s4:20244case NVPTX::BI__nvvm_ldu_i:20245case NVPTX::BI__nvvm_ldu_i2:20246case NVPTX::BI__nvvm_ldu_i4:20247case NVPTX::BI__nvvm_ldu_l:20248case NVPTX::BI__nvvm_ldu_l2:20249case NVPTX::BI__nvvm_ldu_ll:20250case NVPTX::BI__nvvm_ldu_ll2:20251case NVPTX::BI__nvvm_ldu_uc:20252case NVPTX::BI__nvvm_ldu_uc2:20253case NVPTX::BI__nvvm_ldu_uc4:20254case NVPTX::BI__nvvm_ldu_us:20255case NVPTX::BI__nvvm_ldu_us2:20256case NVPTX::BI__nvvm_ldu_us4:20257case NVPTX::BI__nvvm_ldu_ui:20258case NVPTX::BI__nvvm_ldu_ui2:20259case NVPTX::BI__nvvm_ldu_ui4:20260case NVPTX::BI__nvvm_ldu_ul:20261case NVPTX::BI__nvvm_ldu_ul2:20262case NVPTX::BI__nvvm_ldu_ull:20263case NVPTX::BI__nvvm_ldu_ull2:20264return MakeLdgLdu(Intrinsic::nvvm_ldu_global_i, *this, E);20265case NVPTX::BI__nvvm_ldu_f:20266case NVPTX::BI__nvvm_ldu_f2:20267case NVPTX::BI__nvvm_ldu_f4:20268case NVPTX::BI__nvvm_ldu_d:20269case NVPTX::BI__nvvm_ldu_d2:20270return MakeLdgLdu(Intrinsic::nvvm_ldu_global_f, *this, E);2027120272case NVPTX::BI__nvvm_atom_cta_add_gen_i:20273case NVPTX::BI__nvvm_atom_cta_add_gen_l:20274case NVPTX::BI__nvvm_atom_cta_add_gen_ll:20275return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta, *this, E);20276case NVPTX::BI__nvvm_atom_sys_add_gen_i:20277case NVPTX::BI__nvvm_atom_sys_add_gen_l:20278case NVPTX::BI__nvvm_atom_sys_add_gen_ll:20279return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys, *this, E);20280case NVPTX::BI__nvvm_atom_cta_add_gen_f:20281case NVPTX::BI__nvvm_atom_cta_add_gen_d:20282return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta, *this, E);20283case NVPTX::BI__nvvm_atom_sys_add_gen_f:20284case NVPTX::BI__nvvm_atom_sys_add_gen_d:20285return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys, *this, E);20286case NVPTX::BI__nvvm_atom_cta_xchg_gen_i:20287case NVPTX::BI__nvvm_atom_cta_xchg_gen_l:20288case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll:20289return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta, *this, E);20290case NVPTX::BI__nvvm_atom_sys_xchg_gen_i:20291case NVPTX::BI__nvvm_atom_sys_xchg_gen_l:20292case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll:20293return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys, *this, E);20294case NVPTX::BI__nvvm_atom_cta_max_gen_i:20295case NVPTX::BI__nvvm_atom_cta_max_gen_ui:20296case NVPTX::BI__nvvm_atom_cta_max_gen_l:20297case NVPTX::BI__nvvm_atom_cta_max_gen_ul:20298case NVPTX::BI__nvvm_atom_cta_max_gen_ll:20299case NVPTX::BI__nvvm_atom_cta_max_gen_ull:20300return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta, *this, E);20301case NVPTX::BI__nvvm_atom_sys_max_gen_i:20302case NVPTX::BI__nvvm_atom_sys_max_gen_ui:20303case NVPTX::BI__nvvm_atom_sys_max_gen_l:20304case NVPTX::BI__nvvm_atom_sys_max_gen_ul:20305case NVPTX::BI__nvvm_atom_sys_max_gen_ll:20306case NVPTX::BI__nvvm_atom_sys_max_gen_ull:20307return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys, *this, E);20308case NVPTX::BI__nvvm_atom_cta_min_gen_i:20309case NVPTX::BI__nvvm_atom_cta_min_gen_ui:20310case NVPTX::BI__nvvm_atom_cta_min_gen_l:20311case NVPTX::BI__nvvm_atom_cta_min_gen_ul:20312case NVPTX::BI__nvvm_atom_cta_min_gen_ll:20313case NVPTX::BI__nvvm_atom_cta_min_gen_ull:20314return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta, *this, E);20315case NVPTX::BI__nvvm_atom_sys_min_gen_i:20316case NVPTX::BI__nvvm_atom_sys_min_gen_ui:20317case NVPTX::BI__nvvm_atom_sys_min_gen_l:20318case NVPTX::BI__nvvm_atom_sys_min_gen_ul:20319case NVPTX::BI__nvvm_atom_sys_min_gen_ll:20320case NVPTX::BI__nvvm_atom_sys_min_gen_ull:20321return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys, *this, E);20322case NVPTX::BI__nvvm_atom_cta_inc_gen_ui:20323return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta, *this, E);20324case NVPTX::BI__nvvm_atom_cta_dec_gen_ui:20325return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta, *this, E);20326case NVPTX::BI__nvvm_atom_sys_inc_gen_ui:20327return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys, *this, E);20328case NVPTX::BI__nvvm_atom_sys_dec_gen_ui:20329return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys, *this, E);20330case NVPTX::BI__nvvm_atom_cta_and_gen_i:20331case NVPTX::BI__nvvm_atom_cta_and_gen_l:20332case NVPTX::BI__nvvm_atom_cta_and_gen_ll:20333return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta, *this, E);20334case NVPTX::BI__nvvm_atom_sys_and_gen_i:20335case NVPTX::BI__nvvm_atom_sys_and_gen_l:20336case NVPTX::BI__nvvm_atom_sys_and_gen_ll:20337return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys, *this, E);20338case NVPTX::BI__nvvm_atom_cta_or_gen_i:20339case NVPTX::BI__nvvm_atom_cta_or_gen_l:20340case NVPTX::BI__nvvm_atom_cta_or_gen_ll:20341return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta, *this, E);20342case NVPTX::BI__nvvm_atom_sys_or_gen_i:20343case NVPTX::BI__nvvm_atom_sys_or_gen_l:20344case NVPTX::BI__nvvm_atom_sys_or_gen_ll:20345return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys, *this, E);20346case NVPTX::BI__nvvm_atom_cta_xor_gen_i:20347case NVPTX::BI__nvvm_atom_cta_xor_gen_l:20348case NVPTX::BI__nvvm_atom_cta_xor_gen_ll:20349return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta, *this, E);20350case NVPTX::BI__nvvm_atom_sys_xor_gen_i:20351case NVPTX::BI__nvvm_atom_sys_xor_gen_l:20352case NVPTX::BI__nvvm_atom_sys_xor_gen_ll:20353return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys, *this, E);20354case NVPTX::BI__nvvm_atom_cta_cas_gen_i:20355case NVPTX::BI__nvvm_atom_cta_cas_gen_l:20356case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: {20357Value *Ptr = EmitScalarExpr(E->getArg(0));20358llvm::Type *ElemTy =20359ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType());20360return Builder.CreateCall(20361CGM.getIntrinsic(20362Intrinsic::nvvm_atomic_cas_gen_i_cta, {ElemTy, Ptr->getType()}),20363{Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});20364}20365case NVPTX::BI__nvvm_atom_sys_cas_gen_i:20366case NVPTX::BI__nvvm_atom_sys_cas_gen_l:20367case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: {20368Value *Ptr = EmitScalarExpr(E->getArg(0));20369llvm::Type *ElemTy =20370ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType());20371return Builder.CreateCall(20372CGM.getIntrinsic(20373Intrinsic::nvvm_atomic_cas_gen_i_sys, {ElemTy, Ptr->getType()}),20374{Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});20375}20376case NVPTX::BI__nvvm_match_all_sync_i32p:20377case NVPTX::BI__nvvm_match_all_sync_i64p: {20378Value *Mask = EmitScalarExpr(E->getArg(0));20379Value *Val = EmitScalarExpr(E->getArg(1));20380Address PredOutPtr = EmitPointerWithAlignment(E->getArg(2));20381Value *ResultPair = Builder.CreateCall(20382CGM.getIntrinsic(BuiltinID == NVPTX::BI__nvvm_match_all_sync_i32p20383? Intrinsic::nvvm_match_all_sync_i32p20384: Intrinsic::nvvm_match_all_sync_i64p),20385{Mask, Val});20386Value *Pred = Builder.CreateZExt(Builder.CreateExtractValue(ResultPair, 1),20387PredOutPtr.getElementType());20388Builder.CreateStore(Pred, PredOutPtr);20389return Builder.CreateExtractValue(ResultPair, 0);20390}2039120392// FP MMA loads20393case NVPTX::BI__hmma_m16n16k16_ld_a:20394case NVPTX::BI__hmma_m16n16k16_ld_b:20395case NVPTX::BI__hmma_m16n16k16_ld_c_f16:20396case NVPTX::BI__hmma_m16n16k16_ld_c_f32:20397case NVPTX::BI__hmma_m32n8k16_ld_a:20398case NVPTX::BI__hmma_m32n8k16_ld_b:20399case NVPTX::BI__hmma_m32n8k16_ld_c_f16:20400case NVPTX::BI__hmma_m32n8k16_ld_c_f32:20401case NVPTX::BI__hmma_m8n32k16_ld_a:20402case NVPTX::BI__hmma_m8n32k16_ld_b:20403case NVPTX::BI__hmma_m8n32k16_ld_c_f16:20404case NVPTX::BI__hmma_m8n32k16_ld_c_f32:20405// Integer MMA loads.20406case NVPTX::BI__imma_m16n16k16_ld_a_s8:20407case NVPTX::BI__imma_m16n16k16_ld_a_u8:20408case NVPTX::BI__imma_m16n16k16_ld_b_s8:20409case NVPTX::BI__imma_m16n16k16_ld_b_u8:20410case NVPTX::BI__imma_m16n16k16_ld_c:20411case NVPTX::BI__imma_m32n8k16_ld_a_s8:20412case NVPTX::BI__imma_m32n8k16_ld_a_u8:20413case NVPTX::BI__imma_m32n8k16_ld_b_s8:20414case NVPTX::BI__imma_m32n8k16_ld_b_u8:20415case NVPTX::BI__imma_m32n8k16_ld_c:20416case NVPTX::BI__imma_m8n32k16_ld_a_s8:20417case NVPTX::BI__imma_m8n32k16_ld_a_u8:20418case NVPTX::BI__imma_m8n32k16_ld_b_s8:20419case NVPTX::BI__imma_m8n32k16_ld_b_u8:20420case NVPTX::BI__imma_m8n32k16_ld_c:20421// Sub-integer MMA loads.20422case NVPTX::BI__imma_m8n8k32_ld_a_s4:20423case NVPTX::BI__imma_m8n8k32_ld_a_u4:20424case NVPTX::BI__imma_m8n8k32_ld_b_s4:20425case NVPTX::BI__imma_m8n8k32_ld_b_u4:20426case NVPTX::BI__imma_m8n8k32_ld_c:20427case NVPTX::BI__bmma_m8n8k128_ld_a_b1:20428case NVPTX::BI__bmma_m8n8k128_ld_b_b1:20429case NVPTX::BI__bmma_m8n8k128_ld_c:20430// Double MMA loads.20431case NVPTX::BI__dmma_m8n8k4_ld_a:20432case NVPTX::BI__dmma_m8n8k4_ld_b:20433case NVPTX::BI__dmma_m8n8k4_ld_c:20434// Alternate float MMA loads.20435case NVPTX::BI__mma_bf16_m16n16k16_ld_a:20436case NVPTX::BI__mma_bf16_m16n16k16_ld_b:20437case NVPTX::BI__mma_bf16_m8n32k16_ld_a:20438case NVPTX::BI__mma_bf16_m8n32k16_ld_b:20439case NVPTX::BI__mma_bf16_m32n8k16_ld_a:20440case NVPTX::BI__mma_bf16_m32n8k16_ld_b:20441case NVPTX::BI__mma_tf32_m16n16k8_ld_a:20442case NVPTX::BI__mma_tf32_m16n16k8_ld_b:20443case NVPTX::BI__mma_tf32_m16n16k8_ld_c: {20444Address Dst = EmitPointerWithAlignment(E->getArg(0));20445Value *Src = EmitScalarExpr(E->getArg(1));20446Value *Ldm = EmitScalarExpr(E->getArg(2));20447std::optional<llvm::APSInt> isColMajorArg =20448E->getArg(3)->getIntegerConstantExpr(getContext());20449if (!isColMajorArg)20450return nullptr;20451bool isColMajor = isColMajorArg->getSExtValue();20452NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);20453unsigned IID = isColMajor ? II.IID_col : II.IID_row;20454if (IID == 0)20455return nullptr;2045620457Value *Result =20458Builder.CreateCall(CGM.getIntrinsic(IID, Src->getType()), {Src, Ldm});2045920460// Save returned values.20461assert(II.NumResults);20462if (II.NumResults == 1) {20463Builder.CreateAlignedStore(Result, Dst.emitRawPointer(*this),20464CharUnits::fromQuantity(4));20465} else {20466for (unsigned i = 0; i < II.NumResults; ++i) {20467Builder.CreateAlignedStore(20468Builder.CreateBitCast(Builder.CreateExtractValue(Result, i),20469Dst.getElementType()),20470Builder.CreateGEP(Dst.getElementType(), Dst.emitRawPointer(*this),20471llvm::ConstantInt::get(IntTy, i)),20472CharUnits::fromQuantity(4));20473}20474}20475return Result;20476}2047720478case NVPTX::BI__hmma_m16n16k16_st_c_f16:20479case NVPTX::BI__hmma_m16n16k16_st_c_f32:20480case NVPTX::BI__hmma_m32n8k16_st_c_f16:20481case NVPTX::BI__hmma_m32n8k16_st_c_f32:20482case NVPTX::BI__hmma_m8n32k16_st_c_f16:20483case NVPTX::BI__hmma_m8n32k16_st_c_f32:20484case NVPTX::BI__imma_m16n16k16_st_c_i32:20485case NVPTX::BI__imma_m32n8k16_st_c_i32:20486case NVPTX::BI__imma_m8n32k16_st_c_i32:20487case NVPTX::BI__imma_m8n8k32_st_c_i32:20488case NVPTX::BI__bmma_m8n8k128_st_c_i32:20489case NVPTX::BI__dmma_m8n8k4_st_c_f64:20490case NVPTX::BI__mma_m16n16k8_st_c_f32: {20491Value *Dst = EmitScalarExpr(E->getArg(0));20492Address Src = EmitPointerWithAlignment(E->getArg(1));20493Value *Ldm = EmitScalarExpr(E->getArg(2));20494std::optional<llvm::APSInt> isColMajorArg =20495E->getArg(3)->getIntegerConstantExpr(getContext());20496if (!isColMajorArg)20497return nullptr;20498bool isColMajor = isColMajorArg->getSExtValue();20499NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);20500unsigned IID = isColMajor ? II.IID_col : II.IID_row;20501if (IID == 0)20502return nullptr;20503Function *Intrinsic =20504CGM.getIntrinsic(IID, Dst->getType());20505llvm::Type *ParamType = Intrinsic->getFunctionType()->getParamType(1);20506SmallVector<Value *, 10> Values = {Dst};20507for (unsigned i = 0; i < II.NumResults; ++i) {20508Value *V = Builder.CreateAlignedLoad(20509Src.getElementType(),20510Builder.CreateGEP(Src.getElementType(), Src.emitRawPointer(*this),20511llvm::ConstantInt::get(IntTy, i)),20512CharUnits::fromQuantity(4));20513Values.push_back(Builder.CreateBitCast(V, ParamType));20514}20515Values.push_back(Ldm);20516Value *Result = Builder.CreateCall(Intrinsic, Values);20517return Result;20518}2051920520// BI__hmma_m16n16k16_mma_<Dtype><CType>(d, a, b, c, layout, satf) -->20521// Intrinsic::nvvm_wmma_m16n16k16_mma_sync<layout A,B><DType><CType><Satf>20522case NVPTX::BI__hmma_m16n16k16_mma_f16f16:20523case NVPTX::BI__hmma_m16n16k16_mma_f32f16:20524case NVPTX::BI__hmma_m16n16k16_mma_f32f32:20525case NVPTX::BI__hmma_m16n16k16_mma_f16f32:20526case NVPTX::BI__hmma_m32n8k16_mma_f16f16:20527case NVPTX::BI__hmma_m32n8k16_mma_f32f16:20528case NVPTX::BI__hmma_m32n8k16_mma_f32f32:20529case NVPTX::BI__hmma_m32n8k16_mma_f16f32:20530case NVPTX::BI__hmma_m8n32k16_mma_f16f16:20531case NVPTX::BI__hmma_m8n32k16_mma_f32f16:20532case NVPTX::BI__hmma_m8n32k16_mma_f32f32:20533case NVPTX::BI__hmma_m8n32k16_mma_f16f32:20534case NVPTX::BI__imma_m16n16k16_mma_s8:20535case NVPTX::BI__imma_m16n16k16_mma_u8:20536case NVPTX::BI__imma_m32n8k16_mma_s8:20537case NVPTX::BI__imma_m32n8k16_mma_u8:20538case NVPTX::BI__imma_m8n32k16_mma_s8:20539case NVPTX::BI__imma_m8n32k16_mma_u8:20540case NVPTX::BI__imma_m8n8k32_mma_s4:20541case NVPTX::BI__imma_m8n8k32_mma_u4:20542case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1:20543case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1:20544case NVPTX::BI__dmma_m8n8k4_mma_f64:20545case NVPTX::BI__mma_bf16_m16n16k16_mma_f32:20546case NVPTX::BI__mma_bf16_m8n32k16_mma_f32:20547case NVPTX::BI__mma_bf16_m32n8k16_mma_f32:20548case NVPTX::BI__mma_tf32_m16n16k8_mma_f32: {20549Address Dst = EmitPointerWithAlignment(E->getArg(0));20550Address SrcA = EmitPointerWithAlignment(E->getArg(1));20551Address SrcB = EmitPointerWithAlignment(E->getArg(2));20552Address SrcC = EmitPointerWithAlignment(E->getArg(3));20553std::optional<llvm::APSInt> LayoutArg =20554E->getArg(4)->getIntegerConstantExpr(getContext());20555if (!LayoutArg)20556return nullptr;20557int Layout = LayoutArg->getSExtValue();20558if (Layout < 0 || Layout > 3)20559return nullptr;20560llvm::APSInt SatfArg;20561if (BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1 ||20562BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1)20563SatfArg = 0; // .b1 does not have satf argument.20564else if (std::optional<llvm::APSInt> OptSatfArg =20565E->getArg(5)->getIntegerConstantExpr(getContext()))20566SatfArg = *OptSatfArg;20567else20568return nullptr;20569bool Satf = SatfArg.getSExtValue();20570NVPTXMmaInfo MI = getNVPTXMmaInfo(BuiltinID);20571unsigned IID = MI.getMMAIntrinsic(Layout, Satf);20572if (IID == 0) // Unsupported combination of Layout/Satf.20573return nullptr;2057420575SmallVector<Value *, 24> Values;20576Function *Intrinsic = CGM.getIntrinsic(IID);20577llvm::Type *AType = Intrinsic->getFunctionType()->getParamType(0);20578// Load A20579for (unsigned i = 0; i < MI.NumEltsA; ++i) {20580Value *V = Builder.CreateAlignedLoad(20581SrcA.getElementType(),20582Builder.CreateGEP(SrcA.getElementType(), SrcA.emitRawPointer(*this),20583llvm::ConstantInt::get(IntTy, i)),20584CharUnits::fromQuantity(4));20585Values.push_back(Builder.CreateBitCast(V, AType));20586}20587// Load B20588llvm::Type *BType = Intrinsic->getFunctionType()->getParamType(MI.NumEltsA);20589for (unsigned i = 0; i < MI.NumEltsB; ++i) {20590Value *V = Builder.CreateAlignedLoad(20591SrcB.getElementType(),20592Builder.CreateGEP(SrcB.getElementType(), SrcB.emitRawPointer(*this),20593llvm::ConstantInt::get(IntTy, i)),20594CharUnits::fromQuantity(4));20595Values.push_back(Builder.CreateBitCast(V, BType));20596}20597// Load C20598llvm::Type *CType =20599Intrinsic->getFunctionType()->getParamType(MI.NumEltsA + MI.NumEltsB);20600for (unsigned i = 0; i < MI.NumEltsC; ++i) {20601Value *V = Builder.CreateAlignedLoad(20602SrcC.getElementType(),20603Builder.CreateGEP(SrcC.getElementType(), SrcC.emitRawPointer(*this),20604llvm::ConstantInt::get(IntTy, i)),20605CharUnits::fromQuantity(4));20606Values.push_back(Builder.CreateBitCast(V, CType));20607}20608Value *Result = Builder.CreateCall(Intrinsic, Values);20609llvm::Type *DType = Dst.getElementType();20610for (unsigned i = 0; i < MI.NumEltsD; ++i)20611Builder.CreateAlignedStore(20612Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), DType),20613Builder.CreateGEP(Dst.getElementType(), Dst.emitRawPointer(*this),20614llvm::ConstantInt::get(IntTy, i)),20615CharUnits::fromQuantity(4));20616return Result;20617}20618// The following builtins require half type support20619case NVPTX::BI__nvvm_ex2_approx_f16:20620return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16, BuiltinID, E, *this);20621case NVPTX::BI__nvvm_ex2_approx_f16x2:20622return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16x2, BuiltinID, E, *this);20623case NVPTX::BI__nvvm_ff2f16x2_rn:20624return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn, BuiltinID, E, *this);20625case NVPTX::BI__nvvm_ff2f16x2_rn_relu:20626return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn_relu, BuiltinID, E, *this);20627case NVPTX::BI__nvvm_ff2f16x2_rz:20628return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz, BuiltinID, E, *this);20629case NVPTX::BI__nvvm_ff2f16x2_rz_relu:20630return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz_relu, BuiltinID, E, *this);20631case NVPTX::BI__nvvm_fma_rn_f16:20632return MakeHalfType(Intrinsic::nvvm_fma_rn_f16, BuiltinID, E, *this);20633case NVPTX::BI__nvvm_fma_rn_f16x2:20634return MakeHalfType(Intrinsic::nvvm_fma_rn_f16x2, BuiltinID, E, *this);20635case NVPTX::BI__nvvm_fma_rn_ftz_f16:20636return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16, BuiltinID, E, *this);20637case NVPTX::BI__nvvm_fma_rn_ftz_f16x2:20638return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16x2, BuiltinID, E, *this);20639case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16:20640return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16, BuiltinID, E,20641*this);20642case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16x2:20643return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16x2, BuiltinID, E,20644*this);20645case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16:20646return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16, BuiltinID, E,20647*this);20648case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16x2:20649return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16x2, BuiltinID, E,20650*this);20651case NVPTX::BI__nvvm_fma_rn_relu_f16:20652return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16, BuiltinID, E, *this);20653case NVPTX::BI__nvvm_fma_rn_relu_f16x2:20654return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16x2, BuiltinID, E, *this);20655case NVPTX::BI__nvvm_fma_rn_sat_f16:20656return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16, BuiltinID, E, *this);20657case NVPTX::BI__nvvm_fma_rn_sat_f16x2:20658return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16x2, BuiltinID, E, *this);20659case NVPTX::BI__nvvm_fmax_f16:20660return MakeHalfType(Intrinsic::nvvm_fmax_f16, BuiltinID, E, *this);20661case NVPTX::BI__nvvm_fmax_f16x2:20662return MakeHalfType(Intrinsic::nvvm_fmax_f16x2, BuiltinID, E, *this);20663case NVPTX::BI__nvvm_fmax_ftz_f16:20664return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16, BuiltinID, E, *this);20665case NVPTX::BI__nvvm_fmax_ftz_f16x2:20666return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16x2, BuiltinID, E, *this);20667case NVPTX::BI__nvvm_fmax_ftz_nan_f16:20668return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16, BuiltinID, E, *this);20669case NVPTX::BI__nvvm_fmax_ftz_nan_f16x2:20670return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16x2, BuiltinID, E,20671*this);20672case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16:20673return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16, BuiltinID,20674E, *this);20675case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16x2:20676return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16x2,20677BuiltinID, E, *this);20678case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16:20679return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16, BuiltinID, E,20680*this);20681case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16x2:20682return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16x2, BuiltinID,20683E, *this);20684case NVPTX::BI__nvvm_fmax_nan_f16:20685return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16, BuiltinID, E, *this);20686case NVPTX::BI__nvvm_fmax_nan_f16x2:20687return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16x2, BuiltinID, E, *this);20688case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16:20689return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16, BuiltinID, E,20690*this);20691case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16x2:20692return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16x2, BuiltinID,20693E, *this);20694case NVPTX::BI__nvvm_fmax_xorsign_abs_f16:20695return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16, BuiltinID, E,20696*this);20697case NVPTX::BI__nvvm_fmax_xorsign_abs_f16x2:20698return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16x2, BuiltinID, E,20699*this);20700case NVPTX::BI__nvvm_fmin_f16:20701return MakeHalfType(Intrinsic::nvvm_fmin_f16, BuiltinID, E, *this);20702case NVPTX::BI__nvvm_fmin_f16x2:20703return MakeHalfType(Intrinsic::nvvm_fmin_f16x2, BuiltinID, E, *this);20704case NVPTX::BI__nvvm_fmin_ftz_f16:20705return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16, BuiltinID, E, *this);20706case NVPTX::BI__nvvm_fmin_ftz_f16x2:20707return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16x2, BuiltinID, E, *this);20708case NVPTX::BI__nvvm_fmin_ftz_nan_f16:20709return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16, BuiltinID, E, *this);20710case NVPTX::BI__nvvm_fmin_ftz_nan_f16x2:20711return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16x2, BuiltinID, E,20712*this);20713case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16:20714return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16, BuiltinID,20715E, *this);20716case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16x2:20717return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16x2,20718BuiltinID, E, *this);20719case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16:20720return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16, BuiltinID, E,20721*this);20722case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16x2:20723return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16x2, BuiltinID,20724E, *this);20725case NVPTX::BI__nvvm_fmin_nan_f16:20726return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16, BuiltinID, E, *this);20727case NVPTX::BI__nvvm_fmin_nan_f16x2:20728return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16x2, BuiltinID, E, *this);20729case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16:20730return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16, BuiltinID, E,20731*this);20732case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16x2:20733return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16x2, BuiltinID,20734E, *this);20735case NVPTX::BI__nvvm_fmin_xorsign_abs_f16:20736return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16, BuiltinID, E,20737*this);20738case NVPTX::BI__nvvm_fmin_xorsign_abs_f16x2:20739return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16x2, BuiltinID, E,20740*this);20741case NVPTX::BI__nvvm_ldg_h:20742return MakeHalfType(Intrinsic::nvvm_ldg_global_f, BuiltinID, E, *this);20743case NVPTX::BI__nvvm_ldg_h2:20744return MakeHalfType(Intrinsic::nvvm_ldg_global_f, BuiltinID, E, *this);20745case NVPTX::BI__nvvm_ldu_h:20746return MakeHalfType(Intrinsic::nvvm_ldu_global_f, BuiltinID, E, *this);20747case NVPTX::BI__nvvm_ldu_h2: {20748return MakeHalfType(Intrinsic::nvvm_ldu_global_f, BuiltinID, E, *this);20749}20750case NVPTX::BI__nvvm_cp_async_ca_shared_global_4:20751return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_4,20752Intrinsic::nvvm_cp_async_ca_shared_global_4_s, *this, E,207534);20754case NVPTX::BI__nvvm_cp_async_ca_shared_global_8:20755return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_8,20756Intrinsic::nvvm_cp_async_ca_shared_global_8_s, *this, E,207578);20758case NVPTX::BI__nvvm_cp_async_ca_shared_global_16:20759return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_16,20760Intrinsic::nvvm_cp_async_ca_shared_global_16_s, *this, E,2076116);20762case NVPTX::BI__nvvm_cp_async_cg_shared_global_16:20763return MakeCpAsync(Intrinsic::nvvm_cp_async_cg_shared_global_16,20764Intrinsic::nvvm_cp_async_cg_shared_global_16_s, *this, E,2076516);20766case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_x:20767return Builder.CreateCall(20768CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_x));20769case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_y:20770return Builder.CreateCall(20771CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_y));20772case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_z:20773return Builder.CreateCall(20774CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_z));20775case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_w:20776return Builder.CreateCall(20777CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_w));20778case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_x:20779return Builder.CreateCall(20780CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_x));20781case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_y:20782return Builder.CreateCall(20783CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_y));20784case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_z:20785return Builder.CreateCall(20786CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_z));20787case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_w:20788return Builder.CreateCall(20789CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_w));20790case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_x:20791return Builder.CreateCall(20792CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_x));20793case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_y:20794return Builder.CreateCall(20795CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_y));20796case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_z:20797return Builder.CreateCall(20798CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_z));20799case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_w:20800return Builder.CreateCall(20801CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_w));20802case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_x:20803return Builder.CreateCall(20804CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_x));20805case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_y:20806return Builder.CreateCall(20807CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_y));20808case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_z:20809return Builder.CreateCall(20810CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_z));20811case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_w:20812return Builder.CreateCall(20813CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_w));20814case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctarank:20815return Builder.CreateCall(20816CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctarank));20817case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctarank:20818return Builder.CreateCall(20819CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctarank));20820case NVPTX::BI__nvvm_is_explicit_cluster:20821return Builder.CreateCall(20822CGM.getIntrinsic(Intrinsic::nvvm_is_explicit_cluster));20823case NVPTX::BI__nvvm_isspacep_shared_cluster:20824return Builder.CreateCall(20825CGM.getIntrinsic(Intrinsic::nvvm_isspacep_shared_cluster),20826EmitScalarExpr(E->getArg(0)));20827case NVPTX::BI__nvvm_mapa:20828return Builder.CreateCall(20829CGM.getIntrinsic(Intrinsic::nvvm_mapa),20830{EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});20831case NVPTX::BI__nvvm_mapa_shared_cluster:20832return Builder.CreateCall(20833CGM.getIntrinsic(Intrinsic::nvvm_mapa_shared_cluster),20834{EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});20835case NVPTX::BI__nvvm_getctarank:20836return Builder.CreateCall(20837CGM.getIntrinsic(Intrinsic::nvvm_getctarank),20838EmitScalarExpr(E->getArg(0)));20839case NVPTX::BI__nvvm_getctarank_shared_cluster:20840return Builder.CreateCall(20841CGM.getIntrinsic(Intrinsic::nvvm_getctarank_shared_cluster),20842EmitScalarExpr(E->getArg(0)));20843case NVPTX::BI__nvvm_barrier_cluster_arrive:20844return Builder.CreateCall(20845CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive));20846case NVPTX::BI__nvvm_barrier_cluster_arrive_relaxed:20847return Builder.CreateCall(20848CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive_relaxed));20849case NVPTX::BI__nvvm_barrier_cluster_wait:20850return Builder.CreateCall(20851CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_wait));20852case NVPTX::BI__nvvm_fence_sc_cluster:20853return Builder.CreateCall(20854CGM.getIntrinsic(Intrinsic::nvvm_fence_sc_cluster));20855default:20856return nullptr;20857}20858}2085920860namespace {20861struct BuiltinAlignArgs {20862llvm::Value *Src = nullptr;20863llvm::Type *SrcType = nullptr;20864llvm::Value *Alignment = nullptr;20865llvm::Value *Mask = nullptr;20866llvm::IntegerType *IntType = nullptr;2086720868BuiltinAlignArgs(const CallExpr *E, CodeGenFunction &CGF) {20869QualType AstType = E->getArg(0)->getType();20870if (AstType->isArrayType())20871Src = CGF.EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(CGF);20872else20873Src = CGF.EmitScalarExpr(E->getArg(0));20874SrcType = Src->getType();20875if (SrcType->isPointerTy()) {20876IntType = IntegerType::get(20877CGF.getLLVMContext(),20878CGF.CGM.getDataLayout().getIndexTypeSizeInBits(SrcType));20879} else {20880assert(SrcType->isIntegerTy());20881IntType = cast<llvm::IntegerType>(SrcType);20882}20883Alignment = CGF.EmitScalarExpr(E->getArg(1));20884Alignment = CGF.Builder.CreateZExtOrTrunc(Alignment, IntType, "alignment");20885auto *One = llvm::ConstantInt::get(IntType, 1);20886Mask = CGF.Builder.CreateSub(Alignment, One, "mask");20887}20888};20889} // namespace2089020891/// Generate (x & (y-1)) == 0.20892RValue CodeGenFunction::EmitBuiltinIsAligned(const CallExpr *E) {20893BuiltinAlignArgs Args(E, *this);20894llvm::Value *SrcAddress = Args.Src;20895if (Args.SrcType->isPointerTy())20896SrcAddress =20897Builder.CreateBitOrPointerCast(Args.Src, Args.IntType, "src_addr");20898return RValue::get(Builder.CreateICmpEQ(20899Builder.CreateAnd(SrcAddress, Args.Mask, "set_bits"),20900llvm::Constant::getNullValue(Args.IntType), "is_aligned"));20901}2090220903/// Generate (x & ~(y-1)) to align down or ((x+(y-1)) & ~(y-1)) to align up.20904/// Note: For pointer types we can avoid ptrtoint/inttoptr pairs by using the20905/// llvm.ptrmask intrinsic (with a GEP before in the align_up case).20906RValue CodeGenFunction::EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp) {20907BuiltinAlignArgs Args(E, *this);20908llvm::Value *SrcForMask = Args.Src;20909if (AlignUp) {20910// When aligning up we have to first add the mask to ensure we go over the20911// next alignment value and then align down to the next valid multiple.20912// By adding the mask, we ensure that align_up on an already aligned20913// value will not change the value.20914if (Args.Src->getType()->isPointerTy()) {20915if (getLangOpts().isSignedOverflowDefined())20916SrcForMask =20917Builder.CreateGEP(Int8Ty, SrcForMask, Args.Mask, "over_boundary");20918else20919SrcForMask = EmitCheckedInBoundsGEP(Int8Ty, SrcForMask, Args.Mask,20920/*SignedIndices=*/true,20921/*isSubtraction=*/false,20922E->getExprLoc(), "over_boundary");20923} else {20924SrcForMask = Builder.CreateAdd(SrcForMask, Args.Mask, "over_boundary");20925}20926}20927// Invert the mask to only clear the lower bits.20928llvm::Value *InvertedMask = Builder.CreateNot(Args.Mask, "inverted_mask");20929llvm::Value *Result = nullptr;20930if (Args.Src->getType()->isPointerTy()) {20931Result = Builder.CreateIntrinsic(20932Intrinsic::ptrmask, {Args.SrcType, Args.IntType},20933{SrcForMask, InvertedMask}, nullptr, "aligned_result");20934} else {20935Result = Builder.CreateAnd(SrcForMask, InvertedMask, "aligned_result");20936}20937assert(Result->getType() == Args.SrcType);20938return RValue::get(Result);20939}2094020941Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,20942const CallExpr *E) {20943switch (BuiltinID) {20944case WebAssembly::BI__builtin_wasm_memory_size: {20945llvm::Type *ResultType = ConvertType(E->getType());20946Value *I = EmitScalarExpr(E->getArg(0));20947Function *Callee =20948CGM.getIntrinsic(Intrinsic::wasm_memory_size, ResultType);20949return Builder.CreateCall(Callee, I);20950}20951case WebAssembly::BI__builtin_wasm_memory_grow: {20952llvm::Type *ResultType = ConvertType(E->getType());20953Value *Args[] = {EmitScalarExpr(E->getArg(0)),20954EmitScalarExpr(E->getArg(1))};20955Function *Callee =20956CGM.getIntrinsic(Intrinsic::wasm_memory_grow, ResultType);20957return Builder.CreateCall(Callee, Args);20958}20959case WebAssembly::BI__builtin_wasm_tls_size: {20960llvm::Type *ResultType = ConvertType(E->getType());20961Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_size, ResultType);20962return Builder.CreateCall(Callee);20963}20964case WebAssembly::BI__builtin_wasm_tls_align: {20965llvm::Type *ResultType = ConvertType(E->getType());20966Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_align, ResultType);20967return Builder.CreateCall(Callee);20968}20969case WebAssembly::BI__builtin_wasm_tls_base: {20970Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_base);20971return Builder.CreateCall(Callee);20972}20973case WebAssembly::BI__builtin_wasm_throw: {20974Value *Tag = EmitScalarExpr(E->getArg(0));20975Value *Obj = EmitScalarExpr(E->getArg(1));20976Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw);20977return Builder.CreateCall(Callee, {Tag, Obj});20978}20979case WebAssembly::BI__builtin_wasm_rethrow: {20980Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow);20981return Builder.CreateCall(Callee);20982}20983case WebAssembly::BI__builtin_wasm_memory_atomic_wait32: {20984Value *Addr = EmitScalarExpr(E->getArg(0));20985Value *Expected = EmitScalarExpr(E->getArg(1));20986Value *Timeout = EmitScalarExpr(E->getArg(2));20987Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_wait32);20988return Builder.CreateCall(Callee, {Addr, Expected, Timeout});20989}20990case WebAssembly::BI__builtin_wasm_memory_atomic_wait64: {20991Value *Addr = EmitScalarExpr(E->getArg(0));20992Value *Expected = EmitScalarExpr(E->getArg(1));20993Value *Timeout = EmitScalarExpr(E->getArg(2));20994Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_wait64);20995return Builder.CreateCall(Callee, {Addr, Expected, Timeout});20996}20997case WebAssembly::BI__builtin_wasm_memory_atomic_notify: {20998Value *Addr = EmitScalarExpr(E->getArg(0));20999Value *Count = EmitScalarExpr(E->getArg(1));21000Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_notify);21001return Builder.CreateCall(Callee, {Addr, Count});21002}21003case WebAssembly::BI__builtin_wasm_trunc_s_i32_f32:21004case WebAssembly::BI__builtin_wasm_trunc_s_i32_f64:21005case WebAssembly::BI__builtin_wasm_trunc_s_i64_f32:21006case WebAssembly::BI__builtin_wasm_trunc_s_i64_f64: {21007Value *Src = EmitScalarExpr(E->getArg(0));21008llvm::Type *ResT = ConvertType(E->getType());21009Function *Callee =21010CGM.getIntrinsic(Intrinsic::wasm_trunc_signed, {ResT, Src->getType()});21011return Builder.CreateCall(Callee, {Src});21012}21013case WebAssembly::BI__builtin_wasm_trunc_u_i32_f32:21014case WebAssembly::BI__builtin_wasm_trunc_u_i32_f64:21015case WebAssembly::BI__builtin_wasm_trunc_u_i64_f32:21016case WebAssembly::BI__builtin_wasm_trunc_u_i64_f64: {21017Value *Src = EmitScalarExpr(E->getArg(0));21018llvm::Type *ResT = ConvertType(E->getType());21019Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_unsigned,21020{ResT, Src->getType()});21021return Builder.CreateCall(Callee, {Src});21022}21023case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f32:21024case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f64:21025case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f32:21026case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f64:21027case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32x4_f32x4: {21028Value *Src = EmitScalarExpr(E->getArg(0));21029llvm::Type *ResT = ConvertType(E->getType());21030Function *Callee =21031CGM.getIntrinsic(Intrinsic::fptosi_sat, {ResT, Src->getType()});21032return Builder.CreateCall(Callee, {Src});21033}21034case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f32:21035case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f64:21036case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f32:21037case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f64:21038case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32x4_f32x4: {21039Value *Src = EmitScalarExpr(E->getArg(0));21040llvm::Type *ResT = ConvertType(E->getType());21041Function *Callee =21042CGM.getIntrinsic(Intrinsic::fptoui_sat, {ResT, Src->getType()});21043return Builder.CreateCall(Callee, {Src});21044}21045case WebAssembly::BI__builtin_wasm_min_f32:21046case WebAssembly::BI__builtin_wasm_min_f64:21047case WebAssembly::BI__builtin_wasm_min_f16x8:21048case WebAssembly::BI__builtin_wasm_min_f32x4:21049case WebAssembly::BI__builtin_wasm_min_f64x2: {21050Value *LHS = EmitScalarExpr(E->getArg(0));21051Value *RHS = EmitScalarExpr(E->getArg(1));21052Function *Callee =21053CGM.getIntrinsic(Intrinsic::minimum, ConvertType(E->getType()));21054return Builder.CreateCall(Callee, {LHS, RHS});21055}21056case WebAssembly::BI__builtin_wasm_max_f32:21057case WebAssembly::BI__builtin_wasm_max_f64:21058case WebAssembly::BI__builtin_wasm_max_f16x8:21059case WebAssembly::BI__builtin_wasm_max_f32x4:21060case WebAssembly::BI__builtin_wasm_max_f64x2: {21061Value *LHS = EmitScalarExpr(E->getArg(0));21062Value *RHS = EmitScalarExpr(E->getArg(1));21063Function *Callee =21064CGM.getIntrinsic(Intrinsic::maximum, ConvertType(E->getType()));21065return Builder.CreateCall(Callee, {LHS, RHS});21066}21067case WebAssembly::BI__builtin_wasm_pmin_f16x8:21068case WebAssembly::BI__builtin_wasm_pmin_f32x4:21069case WebAssembly::BI__builtin_wasm_pmin_f64x2: {21070Value *LHS = EmitScalarExpr(E->getArg(0));21071Value *RHS = EmitScalarExpr(E->getArg(1));21072Function *Callee =21073CGM.getIntrinsic(Intrinsic::wasm_pmin, ConvertType(E->getType()));21074return Builder.CreateCall(Callee, {LHS, RHS});21075}21076case WebAssembly::BI__builtin_wasm_pmax_f16x8:21077case WebAssembly::BI__builtin_wasm_pmax_f32x4:21078case WebAssembly::BI__builtin_wasm_pmax_f64x2: {21079Value *LHS = EmitScalarExpr(E->getArg(0));21080Value *RHS = EmitScalarExpr(E->getArg(1));21081Function *Callee =21082CGM.getIntrinsic(Intrinsic::wasm_pmax, ConvertType(E->getType()));21083return Builder.CreateCall(Callee, {LHS, RHS});21084}21085case WebAssembly::BI__builtin_wasm_ceil_f32x4:21086case WebAssembly::BI__builtin_wasm_floor_f32x4:21087case WebAssembly::BI__builtin_wasm_trunc_f32x4:21088case WebAssembly::BI__builtin_wasm_nearest_f32x4:21089case WebAssembly::BI__builtin_wasm_ceil_f64x2:21090case WebAssembly::BI__builtin_wasm_floor_f64x2:21091case WebAssembly::BI__builtin_wasm_trunc_f64x2:21092case WebAssembly::BI__builtin_wasm_nearest_f64x2: {21093unsigned IntNo;21094switch (BuiltinID) {21095case WebAssembly::BI__builtin_wasm_ceil_f32x4:21096case WebAssembly::BI__builtin_wasm_ceil_f64x2:21097IntNo = Intrinsic::ceil;21098break;21099case WebAssembly::BI__builtin_wasm_floor_f32x4:21100case WebAssembly::BI__builtin_wasm_floor_f64x2:21101IntNo = Intrinsic::floor;21102break;21103case WebAssembly::BI__builtin_wasm_trunc_f32x4:21104case WebAssembly::BI__builtin_wasm_trunc_f64x2:21105IntNo = Intrinsic::trunc;21106break;21107case WebAssembly::BI__builtin_wasm_nearest_f32x4:21108case WebAssembly::BI__builtin_wasm_nearest_f64x2:21109IntNo = Intrinsic::nearbyint;21110break;21111default:21112llvm_unreachable("unexpected builtin ID");21113}21114Value *Value = EmitScalarExpr(E->getArg(0));21115Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType()));21116return Builder.CreateCall(Callee, Value);21117}21118case WebAssembly::BI__builtin_wasm_ref_null_extern: {21119Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_ref_null_extern);21120return Builder.CreateCall(Callee);21121}21122case WebAssembly::BI__builtin_wasm_ref_null_func: {21123Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_ref_null_func);21124return Builder.CreateCall(Callee);21125}21126case WebAssembly::BI__builtin_wasm_swizzle_i8x16: {21127Value *Src = EmitScalarExpr(E->getArg(0));21128Value *Indices = EmitScalarExpr(E->getArg(1));21129Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_swizzle);21130return Builder.CreateCall(Callee, {Src, Indices});21131}21132case WebAssembly::BI__builtin_wasm_add_sat_s_i8x16:21133case WebAssembly::BI__builtin_wasm_add_sat_u_i8x16:21134case WebAssembly::BI__builtin_wasm_add_sat_s_i16x8:21135case WebAssembly::BI__builtin_wasm_add_sat_u_i16x8:21136case WebAssembly::BI__builtin_wasm_sub_sat_s_i8x16:21137case WebAssembly::BI__builtin_wasm_sub_sat_u_i8x16:21138case WebAssembly::BI__builtin_wasm_sub_sat_s_i16x8:21139case WebAssembly::BI__builtin_wasm_sub_sat_u_i16x8: {21140unsigned IntNo;21141switch (BuiltinID) {21142case WebAssembly::BI__builtin_wasm_add_sat_s_i8x16:21143case WebAssembly::BI__builtin_wasm_add_sat_s_i16x8:21144IntNo = Intrinsic::sadd_sat;21145break;21146case WebAssembly::BI__builtin_wasm_add_sat_u_i8x16:21147case WebAssembly::BI__builtin_wasm_add_sat_u_i16x8:21148IntNo = Intrinsic::uadd_sat;21149break;21150case WebAssembly::BI__builtin_wasm_sub_sat_s_i8x16:21151case WebAssembly::BI__builtin_wasm_sub_sat_s_i16x8:21152IntNo = Intrinsic::wasm_sub_sat_signed;21153break;21154case WebAssembly::BI__builtin_wasm_sub_sat_u_i8x16:21155case WebAssembly::BI__builtin_wasm_sub_sat_u_i16x8:21156IntNo = Intrinsic::wasm_sub_sat_unsigned;21157break;21158default:21159llvm_unreachable("unexpected builtin ID");21160}21161Value *LHS = EmitScalarExpr(E->getArg(0));21162Value *RHS = EmitScalarExpr(E->getArg(1));21163Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType()));21164return Builder.CreateCall(Callee, {LHS, RHS});21165}21166case WebAssembly::BI__builtin_wasm_abs_i8x16:21167case WebAssembly::BI__builtin_wasm_abs_i16x8:21168case WebAssembly::BI__builtin_wasm_abs_i32x4:21169case WebAssembly::BI__builtin_wasm_abs_i64x2: {21170Value *Vec = EmitScalarExpr(E->getArg(0));21171Value *Neg = Builder.CreateNeg(Vec, "neg");21172Constant *Zero = llvm::Constant::getNullValue(Vec->getType());21173Value *ICmp = Builder.CreateICmpSLT(Vec, Zero, "abscond");21174return Builder.CreateSelect(ICmp, Neg, Vec, "abs");21175}21176case WebAssembly::BI__builtin_wasm_min_s_i8x16:21177case WebAssembly::BI__builtin_wasm_min_u_i8x16:21178case WebAssembly::BI__builtin_wasm_max_s_i8x16:21179case WebAssembly::BI__builtin_wasm_max_u_i8x16:21180case WebAssembly::BI__builtin_wasm_min_s_i16x8:21181case WebAssembly::BI__builtin_wasm_min_u_i16x8:21182case WebAssembly::BI__builtin_wasm_max_s_i16x8:21183case WebAssembly::BI__builtin_wasm_max_u_i16x8:21184case WebAssembly::BI__builtin_wasm_min_s_i32x4:21185case WebAssembly::BI__builtin_wasm_min_u_i32x4:21186case WebAssembly::BI__builtin_wasm_max_s_i32x4:21187case WebAssembly::BI__builtin_wasm_max_u_i32x4: {21188Value *LHS = EmitScalarExpr(E->getArg(0));21189Value *RHS = EmitScalarExpr(E->getArg(1));21190Value *ICmp;21191switch (BuiltinID) {21192case WebAssembly::BI__builtin_wasm_min_s_i8x16:21193case WebAssembly::BI__builtin_wasm_min_s_i16x8:21194case WebAssembly::BI__builtin_wasm_min_s_i32x4:21195ICmp = Builder.CreateICmpSLT(LHS, RHS);21196break;21197case WebAssembly::BI__builtin_wasm_min_u_i8x16:21198case WebAssembly::BI__builtin_wasm_min_u_i16x8:21199case WebAssembly::BI__builtin_wasm_min_u_i32x4:21200ICmp = Builder.CreateICmpULT(LHS, RHS);21201break;21202case WebAssembly::BI__builtin_wasm_max_s_i8x16:21203case WebAssembly::BI__builtin_wasm_max_s_i16x8:21204case WebAssembly::BI__builtin_wasm_max_s_i32x4:21205ICmp = Builder.CreateICmpSGT(LHS, RHS);21206break;21207case WebAssembly::BI__builtin_wasm_max_u_i8x16:21208case WebAssembly::BI__builtin_wasm_max_u_i16x8:21209case WebAssembly::BI__builtin_wasm_max_u_i32x4:21210ICmp = Builder.CreateICmpUGT(LHS, RHS);21211break;21212default:21213llvm_unreachable("unexpected builtin ID");21214}21215return Builder.CreateSelect(ICmp, LHS, RHS);21216}21217case WebAssembly::BI__builtin_wasm_avgr_u_i8x16:21218case WebAssembly::BI__builtin_wasm_avgr_u_i16x8: {21219Value *LHS = EmitScalarExpr(E->getArg(0));21220Value *RHS = EmitScalarExpr(E->getArg(1));21221Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_avgr_unsigned,21222ConvertType(E->getType()));21223return Builder.CreateCall(Callee, {LHS, RHS});21224}21225case WebAssembly::BI__builtin_wasm_q15mulr_sat_s_i16x8: {21226Value *LHS = EmitScalarExpr(E->getArg(0));21227Value *RHS = EmitScalarExpr(E->getArg(1));21228Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_q15mulr_sat_signed);21229return Builder.CreateCall(Callee, {LHS, RHS});21230}21231case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8:21232case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8:21233case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4:21234case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4: {21235Value *Vec = EmitScalarExpr(E->getArg(0));21236unsigned IntNo;21237switch (BuiltinID) {21238case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8:21239case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4:21240IntNo = Intrinsic::wasm_extadd_pairwise_signed;21241break;21242case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8:21243case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4:21244IntNo = Intrinsic::wasm_extadd_pairwise_unsigned;21245break;21246default:21247llvm_unreachable("unexpected builtin ID");21248}2124921250Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType()));21251return Builder.CreateCall(Callee, Vec);21252}21253case WebAssembly::BI__builtin_wasm_bitselect: {21254Value *V1 = EmitScalarExpr(E->getArg(0));21255Value *V2 = EmitScalarExpr(E->getArg(1));21256Value *C = EmitScalarExpr(E->getArg(2));21257Function *Callee =21258CGM.getIntrinsic(Intrinsic::wasm_bitselect, ConvertType(E->getType()));21259return Builder.CreateCall(Callee, {V1, V2, C});21260}21261case WebAssembly::BI__builtin_wasm_dot_s_i32x4_i16x8: {21262Value *LHS = EmitScalarExpr(E->getArg(0));21263Value *RHS = EmitScalarExpr(E->getArg(1));21264Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_dot);21265return Builder.CreateCall(Callee, {LHS, RHS});21266}21267case WebAssembly::BI__builtin_wasm_popcnt_i8x16: {21268Value *Vec = EmitScalarExpr(E->getArg(0));21269Function *Callee =21270CGM.getIntrinsic(Intrinsic::ctpop, ConvertType(E->getType()));21271return Builder.CreateCall(Callee, {Vec});21272}21273case WebAssembly::BI__builtin_wasm_any_true_v128:21274case WebAssembly::BI__builtin_wasm_all_true_i8x16:21275case WebAssembly::BI__builtin_wasm_all_true_i16x8:21276case WebAssembly::BI__builtin_wasm_all_true_i32x4:21277case WebAssembly::BI__builtin_wasm_all_true_i64x2: {21278unsigned IntNo;21279switch (BuiltinID) {21280case WebAssembly::BI__builtin_wasm_any_true_v128:21281IntNo = Intrinsic::wasm_anytrue;21282break;21283case WebAssembly::BI__builtin_wasm_all_true_i8x16:21284case WebAssembly::BI__builtin_wasm_all_true_i16x8:21285case WebAssembly::BI__builtin_wasm_all_true_i32x4:21286case WebAssembly::BI__builtin_wasm_all_true_i64x2:21287IntNo = Intrinsic::wasm_alltrue;21288break;21289default:21290llvm_unreachable("unexpected builtin ID");21291}21292Value *Vec = EmitScalarExpr(E->getArg(0));21293Function *Callee = CGM.getIntrinsic(IntNo, Vec->getType());21294return Builder.CreateCall(Callee, {Vec});21295}21296case WebAssembly::BI__builtin_wasm_bitmask_i8x16:21297case WebAssembly::BI__builtin_wasm_bitmask_i16x8:21298case WebAssembly::BI__builtin_wasm_bitmask_i32x4:21299case WebAssembly::BI__builtin_wasm_bitmask_i64x2: {21300Value *Vec = EmitScalarExpr(E->getArg(0));21301Function *Callee =21302CGM.getIntrinsic(Intrinsic::wasm_bitmask, Vec->getType());21303return Builder.CreateCall(Callee, {Vec});21304}21305case WebAssembly::BI__builtin_wasm_abs_f32x4:21306case WebAssembly::BI__builtin_wasm_abs_f64x2: {21307Value *Vec = EmitScalarExpr(E->getArg(0));21308Function *Callee = CGM.getIntrinsic(Intrinsic::fabs, Vec->getType());21309return Builder.CreateCall(Callee, {Vec});21310}21311case WebAssembly::BI__builtin_wasm_sqrt_f32x4:21312case WebAssembly::BI__builtin_wasm_sqrt_f64x2: {21313Value *Vec = EmitScalarExpr(E->getArg(0));21314Function *Callee = CGM.getIntrinsic(Intrinsic::sqrt, Vec->getType());21315return Builder.CreateCall(Callee, {Vec});21316}21317case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8:21318case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8:21319case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4:21320case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4: {21321Value *Low = EmitScalarExpr(E->getArg(0));21322Value *High = EmitScalarExpr(E->getArg(1));21323unsigned IntNo;21324switch (BuiltinID) {21325case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8:21326case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4:21327IntNo = Intrinsic::wasm_narrow_signed;21328break;21329case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8:21330case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4:21331IntNo = Intrinsic::wasm_narrow_unsigned;21332break;21333default:21334llvm_unreachable("unexpected builtin ID");21335}21336Function *Callee =21337CGM.getIntrinsic(IntNo, {ConvertType(E->getType()), Low->getType()});21338return Builder.CreateCall(Callee, {Low, High});21339}21340case WebAssembly::BI__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4:21341case WebAssembly::BI__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4: {21342Value *Vec = EmitScalarExpr(E->getArg(0));21343unsigned IntNo;21344switch (BuiltinID) {21345case WebAssembly::BI__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4:21346IntNo = Intrinsic::fptosi_sat;21347break;21348case WebAssembly::BI__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4:21349IntNo = Intrinsic::fptoui_sat;21350break;21351default:21352llvm_unreachable("unexpected builtin ID");21353}21354llvm::Type *SrcT = Vec->getType();21355llvm::Type *TruncT = SrcT->getWithNewType(Builder.getInt32Ty());21356Function *Callee = CGM.getIntrinsic(IntNo, {TruncT, SrcT});21357Value *Trunc = Builder.CreateCall(Callee, Vec);21358Value *Splat = Constant::getNullValue(TruncT);21359return Builder.CreateShuffleVector(Trunc, Splat, ArrayRef<int>{0, 1, 2, 3});21360}21361case WebAssembly::BI__builtin_wasm_shuffle_i8x16: {21362Value *Ops[18];21363size_t OpIdx = 0;21364Ops[OpIdx++] = EmitScalarExpr(E->getArg(0));21365Ops[OpIdx++] = EmitScalarExpr(E->getArg(1));21366while (OpIdx < 18) {21367std::optional<llvm::APSInt> LaneConst =21368E->getArg(OpIdx)->getIntegerConstantExpr(getContext());21369assert(LaneConst && "Constant arg isn't actually constant?");21370Ops[OpIdx++] = llvm::ConstantInt::get(getLLVMContext(), *LaneConst);21371}21372Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_shuffle);21373return Builder.CreateCall(Callee, Ops);21374}21375case WebAssembly::BI__builtin_wasm_relaxed_madd_f16x8:21376case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f16x8:21377case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4:21378case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4:21379case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2:21380case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2: {21381Value *A = EmitScalarExpr(E->getArg(0));21382Value *B = EmitScalarExpr(E->getArg(1));21383Value *C = EmitScalarExpr(E->getArg(2));21384unsigned IntNo;21385switch (BuiltinID) {21386case WebAssembly::BI__builtin_wasm_relaxed_madd_f16x8:21387case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4:21388case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2:21389IntNo = Intrinsic::wasm_relaxed_madd;21390break;21391case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f16x8:21392case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4:21393case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2:21394IntNo = Intrinsic::wasm_relaxed_nmadd;21395break;21396default:21397llvm_unreachable("unexpected builtin ID");21398}21399Function *Callee = CGM.getIntrinsic(IntNo, A->getType());21400return Builder.CreateCall(Callee, {A, B, C});21401}21402case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i8x16:21403case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i16x8:21404case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i32x4:21405case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i64x2: {21406Value *A = EmitScalarExpr(E->getArg(0));21407Value *B = EmitScalarExpr(E->getArg(1));21408Value *C = EmitScalarExpr(E->getArg(2));21409Function *Callee =21410CGM.getIntrinsic(Intrinsic::wasm_relaxed_laneselect, A->getType());21411return Builder.CreateCall(Callee, {A, B, C});21412}21413case WebAssembly::BI__builtin_wasm_relaxed_swizzle_i8x16: {21414Value *Src = EmitScalarExpr(E->getArg(0));21415Value *Indices = EmitScalarExpr(E->getArg(1));21416Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_swizzle);21417return Builder.CreateCall(Callee, {Src, Indices});21418}21419case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4:21420case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4:21421case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2:21422case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2: {21423Value *LHS = EmitScalarExpr(E->getArg(0));21424Value *RHS = EmitScalarExpr(E->getArg(1));21425unsigned IntNo;21426switch (BuiltinID) {21427case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4:21428case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2:21429IntNo = Intrinsic::wasm_relaxed_min;21430break;21431case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4:21432case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2:21433IntNo = Intrinsic::wasm_relaxed_max;21434break;21435default:21436llvm_unreachable("unexpected builtin ID");21437}21438Function *Callee = CGM.getIntrinsic(IntNo, LHS->getType());21439return Builder.CreateCall(Callee, {LHS, RHS});21440}21441case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4:21442case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4:21443case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2:21444case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2: {21445Value *Vec = EmitScalarExpr(E->getArg(0));21446unsigned IntNo;21447switch (BuiltinID) {21448case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4:21449IntNo = Intrinsic::wasm_relaxed_trunc_signed;21450break;21451case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4:21452IntNo = Intrinsic::wasm_relaxed_trunc_unsigned;21453break;21454case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2:21455IntNo = Intrinsic::wasm_relaxed_trunc_signed_zero;21456break;21457case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2:21458IntNo = Intrinsic::wasm_relaxed_trunc_unsigned_zero;21459break;21460default:21461llvm_unreachable("unexpected builtin ID");21462}21463Function *Callee = CGM.getIntrinsic(IntNo);21464return Builder.CreateCall(Callee, {Vec});21465}21466case WebAssembly::BI__builtin_wasm_relaxed_q15mulr_s_i16x8: {21467Value *LHS = EmitScalarExpr(E->getArg(0));21468Value *RHS = EmitScalarExpr(E->getArg(1));21469Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_q15mulr_signed);21470return Builder.CreateCall(Callee, {LHS, RHS});21471}21472case WebAssembly::BI__builtin_wasm_relaxed_dot_i8x16_i7x16_s_i16x8: {21473Value *LHS = EmitScalarExpr(E->getArg(0));21474Value *RHS = EmitScalarExpr(E->getArg(1));21475Function *Callee =21476CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed);21477return Builder.CreateCall(Callee, {LHS, RHS});21478}21479case WebAssembly::BI__builtin_wasm_relaxed_dot_i8x16_i7x16_add_s_i32x4: {21480Value *LHS = EmitScalarExpr(E->getArg(0));21481Value *RHS = EmitScalarExpr(E->getArg(1));21482Value *Acc = EmitScalarExpr(E->getArg(2));21483Function *Callee =21484CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed);21485return Builder.CreateCall(Callee, {LHS, RHS, Acc});21486}21487case WebAssembly::BI__builtin_wasm_relaxed_dot_bf16x8_add_f32_f32x4: {21488Value *LHS = EmitScalarExpr(E->getArg(0));21489Value *RHS = EmitScalarExpr(E->getArg(1));21490Value *Acc = EmitScalarExpr(E->getArg(2));21491Function *Callee =21492CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_bf16x8_add_f32);21493return Builder.CreateCall(Callee, {LHS, RHS, Acc});21494}21495case WebAssembly::BI__builtin_wasm_loadf16_f32: {21496Value *Addr = EmitScalarExpr(E->getArg(0));21497Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_loadf16_f32);21498return Builder.CreateCall(Callee, {Addr});21499}21500case WebAssembly::BI__builtin_wasm_storef16_f32: {21501Value *Val = EmitScalarExpr(E->getArg(0));21502Value *Addr = EmitScalarExpr(E->getArg(1));21503Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_storef16_f32);21504return Builder.CreateCall(Callee, {Val, Addr});21505}21506case WebAssembly::BI__builtin_wasm_splat_f16x8: {21507Value *Val = EmitScalarExpr(E->getArg(0));21508Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_splat_f16x8);21509return Builder.CreateCall(Callee, {Val});21510}21511case WebAssembly::BI__builtin_wasm_extract_lane_f16x8: {21512Value *Vector = EmitScalarExpr(E->getArg(0));21513Value *Index = EmitScalarExpr(E->getArg(1));21514Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_extract_lane_f16x8);21515return Builder.CreateCall(Callee, {Vector, Index});21516}21517case WebAssembly::BI__builtin_wasm_table_get: {21518assert(E->getArg(0)->getType()->isArrayType());21519Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);21520Value *Index = EmitScalarExpr(E->getArg(1));21521Function *Callee;21522if (E->getType().isWebAssemblyExternrefType())21523Callee = CGM.getIntrinsic(Intrinsic::wasm_table_get_externref);21524else if (E->getType().isWebAssemblyFuncrefType())21525Callee = CGM.getIntrinsic(Intrinsic::wasm_table_get_funcref);21526else21527llvm_unreachable(21528"Unexpected reference type for __builtin_wasm_table_get");21529return Builder.CreateCall(Callee, {Table, Index});21530}21531case WebAssembly::BI__builtin_wasm_table_set: {21532assert(E->getArg(0)->getType()->isArrayType());21533Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);21534Value *Index = EmitScalarExpr(E->getArg(1));21535Value *Val = EmitScalarExpr(E->getArg(2));21536Function *Callee;21537if (E->getArg(2)->getType().isWebAssemblyExternrefType())21538Callee = CGM.getIntrinsic(Intrinsic::wasm_table_set_externref);21539else if (E->getArg(2)->getType().isWebAssemblyFuncrefType())21540Callee = CGM.getIntrinsic(Intrinsic::wasm_table_set_funcref);21541else21542llvm_unreachable(21543"Unexpected reference type for __builtin_wasm_table_set");21544return Builder.CreateCall(Callee, {Table, Index, Val});21545}21546case WebAssembly::BI__builtin_wasm_table_size: {21547assert(E->getArg(0)->getType()->isArrayType());21548Value *Value = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);21549Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_table_size);21550return Builder.CreateCall(Callee, Value);21551}21552case WebAssembly::BI__builtin_wasm_table_grow: {21553assert(E->getArg(0)->getType()->isArrayType());21554Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);21555Value *Val = EmitScalarExpr(E->getArg(1));21556Value *NElems = EmitScalarExpr(E->getArg(2));2155721558Function *Callee;21559if (E->getArg(1)->getType().isWebAssemblyExternrefType())21560Callee = CGM.getIntrinsic(Intrinsic::wasm_table_grow_externref);21561else if (E->getArg(2)->getType().isWebAssemblyFuncrefType())21562Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_funcref);21563else21564llvm_unreachable(21565"Unexpected reference type for __builtin_wasm_table_grow");2156621567return Builder.CreateCall(Callee, {Table, Val, NElems});21568}21569case WebAssembly::BI__builtin_wasm_table_fill: {21570assert(E->getArg(0)->getType()->isArrayType());21571Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);21572Value *Index = EmitScalarExpr(E->getArg(1));21573Value *Val = EmitScalarExpr(E->getArg(2));21574Value *NElems = EmitScalarExpr(E->getArg(3));2157521576Function *Callee;21577if (E->getArg(2)->getType().isWebAssemblyExternrefType())21578Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_externref);21579else if (E->getArg(2)->getType().isWebAssemblyFuncrefType())21580Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_funcref);21581else21582llvm_unreachable(21583"Unexpected reference type for __builtin_wasm_table_fill");2158421585return Builder.CreateCall(Callee, {Table, Index, Val, NElems});21586}21587case WebAssembly::BI__builtin_wasm_table_copy: {21588assert(E->getArg(0)->getType()->isArrayType());21589Value *TableX = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this);21590Value *TableY = EmitArrayToPointerDecay(E->getArg(1)).emitRawPointer(*this);21591Value *DstIdx = EmitScalarExpr(E->getArg(2));21592Value *SrcIdx = EmitScalarExpr(E->getArg(3));21593Value *NElems = EmitScalarExpr(E->getArg(4));2159421595Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_table_copy);2159621597return Builder.CreateCall(Callee, {TableX, TableY, SrcIdx, DstIdx, NElems});21598}21599default:21600return nullptr;21601}21602}2160321604static std::pair<Intrinsic::ID, unsigned>21605getIntrinsicForHexagonNonClangBuiltin(unsigned BuiltinID) {21606struct Info {21607unsigned BuiltinID;21608Intrinsic::ID IntrinsicID;21609unsigned VecLen;21610};21611static Info Infos[] = {21612#define CUSTOM_BUILTIN_MAPPING(x,s) \21613{ Hexagon::BI__builtin_HEXAGON_##x, Intrinsic::hexagon_##x, s },21614CUSTOM_BUILTIN_MAPPING(L2_loadrub_pci, 0)21615CUSTOM_BUILTIN_MAPPING(L2_loadrb_pci, 0)21616CUSTOM_BUILTIN_MAPPING(L2_loadruh_pci, 0)21617CUSTOM_BUILTIN_MAPPING(L2_loadrh_pci, 0)21618CUSTOM_BUILTIN_MAPPING(L2_loadri_pci, 0)21619CUSTOM_BUILTIN_MAPPING(L2_loadrd_pci, 0)21620CUSTOM_BUILTIN_MAPPING(L2_loadrub_pcr, 0)21621CUSTOM_BUILTIN_MAPPING(L2_loadrb_pcr, 0)21622CUSTOM_BUILTIN_MAPPING(L2_loadruh_pcr, 0)21623CUSTOM_BUILTIN_MAPPING(L2_loadrh_pcr, 0)21624CUSTOM_BUILTIN_MAPPING(L2_loadri_pcr, 0)21625CUSTOM_BUILTIN_MAPPING(L2_loadrd_pcr, 0)21626CUSTOM_BUILTIN_MAPPING(S2_storerb_pci, 0)21627CUSTOM_BUILTIN_MAPPING(S2_storerh_pci, 0)21628CUSTOM_BUILTIN_MAPPING(S2_storerf_pci, 0)21629CUSTOM_BUILTIN_MAPPING(S2_storeri_pci, 0)21630CUSTOM_BUILTIN_MAPPING(S2_storerd_pci, 0)21631CUSTOM_BUILTIN_MAPPING(S2_storerb_pcr, 0)21632CUSTOM_BUILTIN_MAPPING(S2_storerh_pcr, 0)21633CUSTOM_BUILTIN_MAPPING(S2_storerf_pcr, 0)21634CUSTOM_BUILTIN_MAPPING(S2_storeri_pcr, 0)21635CUSTOM_BUILTIN_MAPPING(S2_storerd_pcr, 0)21636// Legacy builtins that take a vector in place of a vector predicate.21637CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq, 64)21638CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq, 64)21639CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq, 64)21640CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq, 64)21641CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq_128B, 128)21642CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq_128B, 128)21643CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq_128B, 128)21644CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq_128B, 128)21645#include "clang/Basic/BuiltinsHexagonMapCustomDep.def"21646#undef CUSTOM_BUILTIN_MAPPING21647};2164821649auto CmpInfo = [] (Info A, Info B) { return A.BuiltinID < B.BuiltinID; };21650static const bool SortOnce = (llvm::sort(Infos, CmpInfo), true);21651(void)SortOnce;2165221653const Info *F = llvm::lower_bound(Infos, Info{BuiltinID, 0, 0}, CmpInfo);21654if (F == std::end(Infos) || F->BuiltinID != BuiltinID)21655return {Intrinsic::not_intrinsic, 0};2165621657return {F->IntrinsicID, F->VecLen};21658}2165921660Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID,21661const CallExpr *E) {21662Intrinsic::ID ID;21663unsigned VecLen;21664std::tie(ID, VecLen) = getIntrinsicForHexagonNonClangBuiltin(BuiltinID);2166521666auto MakeCircOp = [this, E](unsigned IntID, bool IsLoad) {21667// The base pointer is passed by address, so it needs to be loaded.21668Address A = EmitPointerWithAlignment(E->getArg(0));21669Address BP = Address(A.emitRawPointer(*this), Int8PtrTy, A.getAlignment());21670llvm::Value *Base = Builder.CreateLoad(BP);21671// The treatment of both loads and stores is the same: the arguments for21672// the builtin are the same as the arguments for the intrinsic.21673// Load:21674// builtin(Base, Inc, Mod, Start) -> intr(Base, Inc, Mod, Start)21675// builtin(Base, Mod, Start) -> intr(Base, Mod, Start)21676// Store:21677// builtin(Base, Inc, Mod, Val, Start) -> intr(Base, Inc, Mod, Val, Start)21678// builtin(Base, Mod, Val, Start) -> intr(Base, Mod, Val, Start)21679SmallVector<llvm::Value*,5> Ops = { Base };21680for (unsigned i = 1, e = E->getNumArgs(); i != e; ++i)21681Ops.push_back(EmitScalarExpr(E->getArg(i)));2168221683llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops);21684// The load intrinsics generate two results (Value, NewBase), stores21685// generate one (NewBase). The new base address needs to be stored.21686llvm::Value *NewBase = IsLoad ? Builder.CreateExtractValue(Result, 1)21687: Result;21688llvm::Value *LV = EmitScalarExpr(E->getArg(0));21689Address Dest = EmitPointerWithAlignment(E->getArg(0));21690llvm::Value *RetVal =21691Builder.CreateAlignedStore(NewBase, LV, Dest.getAlignment());21692if (IsLoad)21693RetVal = Builder.CreateExtractValue(Result, 0);21694return RetVal;21695};2169621697// Handle the conversion of bit-reverse load intrinsics to bit code.21698// The intrinsic call after this function only reads from memory and the21699// write to memory is dealt by the store instruction.21700auto MakeBrevLd = [this, E](unsigned IntID, llvm::Type *DestTy) {21701// The intrinsic generates one result, which is the new value for the base21702// pointer. It needs to be returned. The result of the load instruction is21703// passed to intrinsic by address, so the value needs to be stored.21704llvm::Value *BaseAddress = EmitScalarExpr(E->getArg(0));2170521706// Expressions like &(*pt++) will be incremented per evaluation.21707// EmitPointerWithAlignment and EmitScalarExpr evaluates the expression21708// per call.21709Address DestAddr = EmitPointerWithAlignment(E->getArg(1));21710DestAddr = DestAddr.withElementType(Int8Ty);21711llvm::Value *DestAddress = DestAddr.emitRawPointer(*this);2171221713// Operands are Base, Dest, Modifier.21714// The intrinsic format in LLVM IR is defined as21715// { ValueType, i8* } (i8*, i32).21716llvm::Value *Result = Builder.CreateCall(21717CGM.getIntrinsic(IntID), {BaseAddress, EmitScalarExpr(E->getArg(2))});2171821719// The value needs to be stored as the variable is passed by reference.21720llvm::Value *DestVal = Builder.CreateExtractValue(Result, 0);2172121722// The store needs to be truncated to fit the destination type.21723// While i32 and i64 are natively supported on Hexagon, i8 and i16 needs21724// to be handled with stores of respective destination type.21725DestVal = Builder.CreateTrunc(DestVal, DestTy);2172621727Builder.CreateAlignedStore(DestVal, DestAddress, DestAddr.getAlignment());21728// The updated value of the base pointer is returned.21729return Builder.CreateExtractValue(Result, 1);21730};2173121732auto V2Q = [this, VecLen] (llvm::Value *Vec) {21733Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandvrt_128B21734: Intrinsic::hexagon_V6_vandvrt;21735return Builder.CreateCall(CGM.getIntrinsic(ID),21736{Vec, Builder.getInt32(-1)});21737};21738auto Q2V = [this, VecLen] (llvm::Value *Pred) {21739Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandqrt_128B21740: Intrinsic::hexagon_V6_vandqrt;21741return Builder.CreateCall(CGM.getIntrinsic(ID),21742{Pred, Builder.getInt32(-1)});21743};2174421745switch (BuiltinID) {21746// These intrinsics return a tuple {Vector, VectorPred} in LLVM IR,21747// and the corresponding C/C++ builtins use loads/stores to update21748// the predicate.21749case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry:21750case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry_128B:21751case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry:21752case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry_128B: {21753// Get the type from the 0-th argument.21754llvm::Type *VecType = ConvertType(E->getArg(0)->getType());21755Address PredAddr =21756EmitPointerWithAlignment(E->getArg(2)).withElementType(VecType);21757llvm::Value *PredIn = V2Q(Builder.CreateLoad(PredAddr));21758llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID),21759{EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), PredIn});2176021761llvm::Value *PredOut = Builder.CreateExtractValue(Result, 1);21762Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.emitRawPointer(*this),21763PredAddr.getAlignment());21764return Builder.CreateExtractValue(Result, 0);21765}21766// These are identical to the builtins above, except they don't consume21767// input carry, only generate carry-out. Since they still produce two21768// outputs, generate the store of the predicate, but no load.21769case Hexagon::BI__builtin_HEXAGON_V6_vaddcarryo:21770case Hexagon::BI__builtin_HEXAGON_V6_vaddcarryo_128B:21771case Hexagon::BI__builtin_HEXAGON_V6_vsubcarryo:21772case Hexagon::BI__builtin_HEXAGON_V6_vsubcarryo_128B: {21773// Get the type from the 0-th argument.21774llvm::Type *VecType = ConvertType(E->getArg(0)->getType());21775Address PredAddr =21776EmitPointerWithAlignment(E->getArg(2)).withElementType(VecType);21777llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID),21778{EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))});2177921780llvm::Value *PredOut = Builder.CreateExtractValue(Result, 1);21781Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.emitRawPointer(*this),21782PredAddr.getAlignment());21783return Builder.CreateExtractValue(Result, 0);21784}2178521786case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq:21787case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq:21788case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq:21789case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq:21790case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq_128B:21791case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq_128B:21792case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq_128B:21793case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq_128B: {21794SmallVector<llvm::Value*,4> Ops;21795const Expr *PredOp = E->getArg(0);21796// There will be an implicit cast to a boolean vector. Strip it.21797if (auto *Cast = dyn_cast<ImplicitCastExpr>(PredOp)) {21798if (Cast->getCastKind() == CK_BitCast)21799PredOp = Cast->getSubExpr();21800Ops.push_back(V2Q(EmitScalarExpr(PredOp)));21801}21802for (int i = 1, e = E->getNumArgs(); i != e; ++i)21803Ops.push_back(EmitScalarExpr(E->getArg(i)));21804return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);21805}2180621807case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pci:21808case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pci:21809case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pci:21810case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pci:21811case Hexagon::BI__builtin_HEXAGON_L2_loadri_pci:21812case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pci:21813case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pcr:21814case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pcr:21815case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pcr:21816case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pcr:21817case Hexagon::BI__builtin_HEXAGON_L2_loadri_pcr:21818case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pcr:21819return MakeCircOp(ID, /*IsLoad=*/true);21820case Hexagon::BI__builtin_HEXAGON_S2_storerb_pci:21821case Hexagon::BI__builtin_HEXAGON_S2_storerh_pci:21822case Hexagon::BI__builtin_HEXAGON_S2_storerf_pci:21823case Hexagon::BI__builtin_HEXAGON_S2_storeri_pci:21824case Hexagon::BI__builtin_HEXAGON_S2_storerd_pci:21825case Hexagon::BI__builtin_HEXAGON_S2_storerb_pcr:21826case Hexagon::BI__builtin_HEXAGON_S2_storerh_pcr:21827case Hexagon::BI__builtin_HEXAGON_S2_storerf_pcr:21828case Hexagon::BI__builtin_HEXAGON_S2_storeri_pcr:21829case Hexagon::BI__builtin_HEXAGON_S2_storerd_pcr:21830return MakeCircOp(ID, /*IsLoad=*/false);21831case Hexagon::BI__builtin_brev_ldub:21832return MakeBrevLd(Intrinsic::hexagon_L2_loadrub_pbr, Int8Ty);21833case Hexagon::BI__builtin_brev_ldb:21834return MakeBrevLd(Intrinsic::hexagon_L2_loadrb_pbr, Int8Ty);21835case Hexagon::BI__builtin_brev_lduh:21836return MakeBrevLd(Intrinsic::hexagon_L2_loadruh_pbr, Int16Ty);21837case Hexagon::BI__builtin_brev_ldh:21838return MakeBrevLd(Intrinsic::hexagon_L2_loadrh_pbr, Int16Ty);21839case Hexagon::BI__builtin_brev_ldw:21840return MakeBrevLd(Intrinsic::hexagon_L2_loadri_pbr, Int32Ty);21841case Hexagon::BI__builtin_brev_ldd:21842return MakeBrevLd(Intrinsic::hexagon_L2_loadrd_pbr, Int64Ty);21843} // switch2184421845return nullptr;21846}2184721848Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,21849const CallExpr *E,21850ReturnValueSlot ReturnValue) {21851SmallVector<Value *, 4> Ops;21852llvm::Type *ResultType = ConvertType(E->getType());2185321854// Find out if any arguments are required to be integer constant expressions.21855unsigned ICEArguments = 0;21856ASTContext::GetBuiltinTypeError Error;21857getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);21858if (Error == ASTContext::GE_Missing_type) {21859// Vector intrinsics don't have a type string.21860assert(BuiltinID >= clang::RISCV::FirstRVVBuiltin &&21861BuiltinID <= clang::RISCV::LastRVVBuiltin);21862ICEArguments = 0;21863if (BuiltinID == RISCVVector::BI__builtin_rvv_vget_v ||21864BuiltinID == RISCVVector::BI__builtin_rvv_vset_v)21865ICEArguments = 1 << 1;21866} else {21867assert(Error == ASTContext::GE_None && "Unexpected error");21868}2186921870if (BuiltinID == RISCV::BI__builtin_riscv_ntl_load)21871ICEArguments |= (1 << 1);21872if (BuiltinID == RISCV::BI__builtin_riscv_ntl_store)21873ICEArguments |= (1 << 2);2187421875for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {21876// Handle aggregate argument, namely RVV tuple types in segment load/store21877if (hasAggregateEvaluationKind(E->getArg(i)->getType())) {21878LValue L = EmitAggExprToLValue(E->getArg(i));21879llvm::Value *AggValue = Builder.CreateLoad(L.getAddress());21880Ops.push_back(AggValue);21881continue;21882}21883Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E));21884}2188521886Intrinsic::ID ID = Intrinsic::not_intrinsic;21887unsigned NF = 1;21888// The 0th bit simulates the `vta` of RVV21889// The 1st bit simulates the `vma` of RVV21890constexpr unsigned RVV_VTA = 0x1;21891constexpr unsigned RVV_VMA = 0x2;21892int PolicyAttrs = 0;21893bool IsMasked = false;2189421895// Required for overloaded intrinsics.21896llvm::SmallVector<llvm::Type *, 2> IntrinsicTypes;21897switch (BuiltinID) {21898default: llvm_unreachable("unexpected builtin ID");21899case RISCV::BI__builtin_riscv_orc_b_32:21900case RISCV::BI__builtin_riscv_orc_b_64:21901case RISCV::BI__builtin_riscv_clz_32:21902case RISCV::BI__builtin_riscv_clz_64:21903case RISCV::BI__builtin_riscv_ctz_32:21904case RISCV::BI__builtin_riscv_ctz_64:21905case RISCV::BI__builtin_riscv_clmul_32:21906case RISCV::BI__builtin_riscv_clmul_64:21907case RISCV::BI__builtin_riscv_clmulh_32:21908case RISCV::BI__builtin_riscv_clmulh_64:21909case RISCV::BI__builtin_riscv_clmulr_32:21910case RISCV::BI__builtin_riscv_clmulr_64:21911case RISCV::BI__builtin_riscv_xperm4_32:21912case RISCV::BI__builtin_riscv_xperm4_64:21913case RISCV::BI__builtin_riscv_xperm8_32:21914case RISCV::BI__builtin_riscv_xperm8_64:21915case RISCV::BI__builtin_riscv_brev8_32:21916case RISCV::BI__builtin_riscv_brev8_64:21917case RISCV::BI__builtin_riscv_zip_32:21918case RISCV::BI__builtin_riscv_unzip_32: {21919switch (BuiltinID) {21920default: llvm_unreachable("unexpected builtin ID");21921// Zbb21922case RISCV::BI__builtin_riscv_orc_b_32:21923case RISCV::BI__builtin_riscv_orc_b_64:21924ID = Intrinsic::riscv_orc_b;21925break;21926case RISCV::BI__builtin_riscv_clz_32:21927case RISCV::BI__builtin_riscv_clz_64: {21928Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());21929Value *Result = Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});21930if (Result->getType() != ResultType)21931Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,21932"cast");21933return Result;21934}21935case RISCV::BI__builtin_riscv_ctz_32:21936case RISCV::BI__builtin_riscv_ctz_64: {21937Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType());21938Value *Result = Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});21939if (Result->getType() != ResultType)21940Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,21941"cast");21942return Result;21943}2194421945// Zbc21946case RISCV::BI__builtin_riscv_clmul_32:21947case RISCV::BI__builtin_riscv_clmul_64:21948ID = Intrinsic::riscv_clmul;21949break;21950case RISCV::BI__builtin_riscv_clmulh_32:21951case RISCV::BI__builtin_riscv_clmulh_64:21952ID = Intrinsic::riscv_clmulh;21953break;21954case RISCV::BI__builtin_riscv_clmulr_32:21955case RISCV::BI__builtin_riscv_clmulr_64:21956ID = Intrinsic::riscv_clmulr;21957break;2195821959// Zbkx21960case RISCV::BI__builtin_riscv_xperm8_32:21961case RISCV::BI__builtin_riscv_xperm8_64:21962ID = Intrinsic::riscv_xperm8;21963break;21964case RISCV::BI__builtin_riscv_xperm4_32:21965case RISCV::BI__builtin_riscv_xperm4_64:21966ID = Intrinsic::riscv_xperm4;21967break;2196821969// Zbkb21970case RISCV::BI__builtin_riscv_brev8_32:21971case RISCV::BI__builtin_riscv_brev8_64:21972ID = Intrinsic::riscv_brev8;21973break;21974case RISCV::BI__builtin_riscv_zip_32:21975ID = Intrinsic::riscv_zip;21976break;21977case RISCV::BI__builtin_riscv_unzip_32:21978ID = Intrinsic::riscv_unzip;21979break;21980}2198121982IntrinsicTypes = {ResultType};21983break;21984}2198521986// Zk builtins2198721988// Zknh21989case RISCV::BI__builtin_riscv_sha256sig0:21990ID = Intrinsic::riscv_sha256sig0;21991break;21992case RISCV::BI__builtin_riscv_sha256sig1:21993ID = Intrinsic::riscv_sha256sig1;21994break;21995case RISCV::BI__builtin_riscv_sha256sum0:21996ID = Intrinsic::riscv_sha256sum0;21997break;21998case RISCV::BI__builtin_riscv_sha256sum1:21999ID = Intrinsic::riscv_sha256sum1;22000break;2200122002// Zksed22003case RISCV::BI__builtin_riscv_sm4ks:22004ID = Intrinsic::riscv_sm4ks;22005break;22006case RISCV::BI__builtin_riscv_sm4ed:22007ID = Intrinsic::riscv_sm4ed;22008break;2200922010// Zksh22011case RISCV::BI__builtin_riscv_sm3p0:22012ID = Intrinsic::riscv_sm3p0;22013break;22014case RISCV::BI__builtin_riscv_sm3p1:22015ID = Intrinsic::riscv_sm3p1;22016break;2201722018// Zihintntl22019case RISCV::BI__builtin_riscv_ntl_load: {22020llvm::Type *ResTy = ConvertType(E->getType());22021unsigned DomainVal = 5; // Default __RISCV_NTLH_ALL22022if (Ops.size() == 2)22023DomainVal = cast<ConstantInt>(Ops[1])->getZExtValue();2202422025llvm::MDNode *RISCVDomainNode = llvm::MDNode::get(22026getLLVMContext(),22027llvm::ConstantAsMetadata::get(Builder.getInt32(DomainVal)));22028llvm::MDNode *NontemporalNode = llvm::MDNode::get(22029getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));2203022031int Width;22032if(ResTy->isScalableTy()) {22033const ScalableVectorType *SVTy = cast<ScalableVectorType>(ResTy);22034llvm::Type *ScalarTy = ResTy->getScalarType();22035Width = ScalarTy->getPrimitiveSizeInBits() *22036SVTy->getElementCount().getKnownMinValue();22037} else22038Width = ResTy->getPrimitiveSizeInBits();22039LoadInst *Load = Builder.CreateLoad(22040Address(Ops[0], ResTy, CharUnits::fromQuantity(Width / 8)));2204122042Load->setMetadata(llvm::LLVMContext::MD_nontemporal, NontemporalNode);22043Load->setMetadata(CGM.getModule().getMDKindID("riscv-nontemporal-domain"),22044RISCVDomainNode);2204522046return Load;22047}22048case RISCV::BI__builtin_riscv_ntl_store: {22049unsigned DomainVal = 5; // Default __RISCV_NTLH_ALL22050if (Ops.size() == 3)22051DomainVal = cast<ConstantInt>(Ops[2])->getZExtValue();2205222053llvm::MDNode *RISCVDomainNode = llvm::MDNode::get(22054getLLVMContext(),22055llvm::ConstantAsMetadata::get(Builder.getInt32(DomainVal)));22056llvm::MDNode *NontemporalNode = llvm::MDNode::get(22057getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));2205822059StoreInst *Store = Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);22060Store->setMetadata(llvm::LLVMContext::MD_nontemporal, NontemporalNode);22061Store->setMetadata(CGM.getModule().getMDKindID("riscv-nontemporal-domain"),22062RISCVDomainNode);2206322064return Store;22065}2206622067// Vector builtins are handled from here.22068#include "clang/Basic/riscv_vector_builtin_cg.inc"22069// SiFive Vector builtins are handled from here.22070#include "clang/Basic/riscv_sifive_vector_builtin_cg.inc"22071}2207222073assert(ID != Intrinsic::not_intrinsic);2207422075llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes);22076return Builder.CreateCall(F, Ops, "");22077}220782207922080