Path: blob/main/contrib/llvm-project/llvm/lib/IR/AutoUpgrade.cpp
35234 views
//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file implements the auto-upgrade helper functions.9// This is where deprecated IR intrinsics and other IR features are updated to10// current specifications.11//12//===----------------------------------------------------------------------===//1314#include "llvm/IR/AutoUpgrade.h"15#include "llvm/ADT/StringRef.h"16#include "llvm/ADT/StringSwitch.h"17#include "llvm/BinaryFormat/Dwarf.h"18#include "llvm/IR/AttributeMask.h"19#include "llvm/IR/Constants.h"20#include "llvm/IR/DebugInfo.h"21#include "llvm/IR/DebugInfoMetadata.h"22#include "llvm/IR/DiagnosticInfo.h"23#include "llvm/IR/Function.h"24#include "llvm/IR/IRBuilder.h"25#include "llvm/IR/InstVisitor.h"26#include "llvm/IR/Instruction.h"27#include "llvm/IR/IntrinsicInst.h"28#include "llvm/IR/Intrinsics.h"29#include "llvm/IR/IntrinsicsAArch64.h"30#include "llvm/IR/IntrinsicsARM.h"31#include "llvm/IR/IntrinsicsNVPTX.h"32#include "llvm/IR/IntrinsicsRISCV.h"33#include "llvm/IR/IntrinsicsWebAssembly.h"34#include "llvm/IR/IntrinsicsX86.h"35#include "llvm/IR/LLVMContext.h"36#include "llvm/IR/Metadata.h"37#include "llvm/IR/Module.h"38#include "llvm/IR/Verifier.h"39#include "llvm/Support/CommandLine.h"40#include "llvm/Support/ErrorHandling.h"41#include "llvm/Support/Regex.h"42#include "llvm/TargetParser/Triple.h"43#include <cstring>4445using namespace llvm;4647static cl::opt<bool>48DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info",49cl::desc("Disable autoupgrade of debug info"));5051static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }5253// Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have54// changed their type from v4f32 to v2i64.55static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID,56Function *&NewFn) {57// Check whether this is an old version of the function, which received58// v4f32 arguments.59Type *Arg0Type = F->getFunctionType()->getParamType(0);60if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))61return false;6263// Yes, it's old, replace it with new version.64rename(F);65NewFn = Intrinsic::getDeclaration(F->getParent(), IID);66return true;67}6869// Upgrade the declarations of intrinsic functions whose 8-bit immediate mask70// arguments have changed their type from i32 to i8.71static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,72Function *&NewFn) {73// Check that the last argument is an i32.74Type *LastArgType = F->getFunctionType()->getParamType(75F->getFunctionType()->getNumParams() - 1);76if (!LastArgType->isIntegerTy(32))77return false;7879// Move this function aside and map down.80rename(F);81NewFn = Intrinsic::getDeclaration(F->getParent(), IID);82return true;83}8485// Upgrade the declaration of fp compare intrinsics that change return type86// from scalar to vXi1 mask.87static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID,88Function *&NewFn) {89// Check if the return type is a vector.90if (F->getReturnType()->isVectorTy())91return false;9293rename(F);94NewFn = Intrinsic::getDeclaration(F->getParent(), IID);95return true;96}9798static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID,99Function *&NewFn) {100if (F->getReturnType()->getScalarType()->isBFloatTy())101return false;102103rename(F);104NewFn = Intrinsic::getDeclaration(F->getParent(), IID);105return true;106}107108static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID,109Function *&NewFn) {110if (F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())111return false;112113rename(F);114NewFn = Intrinsic::getDeclaration(F->getParent(), IID);115return true;116}117118static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name) {119// All of the intrinsics matches below should be marked with which llvm120// version started autoupgrading them. At some point in the future we would121// like to use this information to remove upgrade code for some older122// intrinsics. It is currently undecided how we will determine that future123// point.124if (Name.consume_front("avx."))125return (Name.starts_with("blend.p") || // Added in 3.7126Name == "cvt.ps2.pd.256" || // Added in 3.9127Name == "cvtdq2.pd.256" || // Added in 3.9128Name == "cvtdq2.ps.256" || // Added in 7.0129Name.starts_with("movnt.") || // Added in 3.2130Name.starts_with("sqrt.p") || // Added in 7.0131Name.starts_with("storeu.") || // Added in 3.9132Name.starts_with("vbroadcast.s") || // Added in 3.5133Name.starts_with("vbroadcastf128") || // Added in 4.0134Name.starts_with("vextractf128.") || // Added in 3.7135Name.starts_with("vinsertf128.") || // Added in 3.7136Name.starts_with("vperm2f128.") || // Added in 6.0137Name.starts_with("vpermil.")); // Added in 3.1138139if (Name.consume_front("avx2."))140return (Name == "movntdqa" || // Added in 5.0141Name.starts_with("pabs.") || // Added in 6.0142Name.starts_with("padds.") || // Added in 8.0143Name.starts_with("paddus.") || // Added in 8.0144Name.starts_with("pblendd.") || // Added in 3.7145Name == "pblendw" || // Added in 3.7146Name.starts_with("pbroadcast") || // Added in 3.8147Name.starts_with("pcmpeq.") || // Added in 3.1148Name.starts_with("pcmpgt.") || // Added in 3.1149Name.starts_with("pmax") || // Added in 3.9150Name.starts_with("pmin") || // Added in 3.9151Name.starts_with("pmovsx") || // Added in 3.9152Name.starts_with("pmovzx") || // Added in 3.9153Name == "pmul.dq" || // Added in 7.0154Name == "pmulu.dq" || // Added in 7.0155Name.starts_with("psll.dq") || // Added in 3.7156Name.starts_with("psrl.dq") || // Added in 3.7157Name.starts_with("psubs.") || // Added in 8.0158Name.starts_with("psubus.") || // Added in 8.0159Name.starts_with("vbroadcast") || // Added in 3.8160Name == "vbroadcasti128" || // Added in 3.7161Name == "vextracti128" || // Added in 3.7162Name == "vinserti128" || // Added in 3.7163Name == "vperm2i128"); // Added in 6.0164165if (Name.consume_front("avx512.")) {166if (Name.consume_front("mask."))167// 'avx512.mask.*'168return (Name.starts_with("add.p") || // Added in 7.0. 128/256 in 4.0169Name.starts_with("and.") || // Added in 3.9170Name.starts_with("andn.") || // Added in 3.9171Name.starts_with("broadcast.s") || // Added in 3.9172Name.starts_with("broadcastf32x4.") || // Added in 6.0173Name.starts_with("broadcastf32x8.") || // Added in 6.0174Name.starts_with("broadcastf64x2.") || // Added in 6.0175Name.starts_with("broadcastf64x4.") || // Added in 6.0176Name.starts_with("broadcasti32x4.") || // Added in 6.0177Name.starts_with("broadcasti32x8.") || // Added in 6.0178Name.starts_with("broadcasti64x2.") || // Added in 6.0179Name.starts_with("broadcasti64x4.") || // Added in 6.0180Name.starts_with("cmp.b") || // Added in 5.0181Name.starts_with("cmp.d") || // Added in 5.0182Name.starts_with("cmp.q") || // Added in 5.0183Name.starts_with("cmp.w") || // Added in 5.0184Name.starts_with("compress.b") || // Added in 9.0185Name.starts_with("compress.d") || // Added in 9.0186Name.starts_with("compress.p") || // Added in 9.0187Name.starts_with("compress.q") || // Added in 9.0188Name.starts_with("compress.store.") || // Added in 7.0189Name.starts_with("compress.w") || // Added in 9.0190Name.starts_with("conflict.") || // Added in 9.0191Name.starts_with("cvtdq2pd.") || // Added in 4.0192Name.starts_with("cvtdq2ps.") || // Added in 7.0 updated 9.0193Name == "cvtpd2dq.256" || // Added in 7.0194Name == "cvtpd2ps.256" || // Added in 7.0195Name == "cvtps2pd.128" || // Added in 7.0196Name == "cvtps2pd.256" || // Added in 7.0197Name.starts_with("cvtqq2pd.") || // Added in 7.0 updated 9.0198Name == "cvtqq2ps.256" || // Added in 9.0199Name == "cvtqq2ps.512" || // Added in 9.0200Name == "cvttpd2dq.256" || // Added in 7.0201Name == "cvttps2dq.128" || // Added in 7.0202Name == "cvttps2dq.256" || // Added in 7.0203Name.starts_with("cvtudq2pd.") || // Added in 4.0204Name.starts_with("cvtudq2ps.") || // Added in 7.0 updated 9.0205Name.starts_with("cvtuqq2pd.") || // Added in 7.0 updated 9.0206Name == "cvtuqq2ps.256" || // Added in 9.0207Name == "cvtuqq2ps.512" || // Added in 9.0208Name.starts_with("dbpsadbw.") || // Added in 7.0209Name.starts_with("div.p") || // Added in 7.0. 128/256 in 4.0210Name.starts_with("expand.b") || // Added in 9.0211Name.starts_with("expand.d") || // Added in 9.0212Name.starts_with("expand.load.") || // Added in 7.0213Name.starts_with("expand.p") || // Added in 9.0214Name.starts_with("expand.q") || // Added in 9.0215Name.starts_with("expand.w") || // Added in 9.0216Name.starts_with("fpclass.p") || // Added in 7.0217Name.starts_with("insert") || // Added in 4.0218Name.starts_with("load.") || // Added in 3.9219Name.starts_with("loadu.") || // Added in 3.9220Name.starts_with("lzcnt.") || // Added in 5.0221Name.starts_with("max.p") || // Added in 7.0. 128/256 in 5.0222Name.starts_with("min.p") || // Added in 7.0. 128/256 in 5.0223Name.starts_with("movddup") || // Added in 3.9224Name.starts_with("move.s") || // Added in 4.0225Name.starts_with("movshdup") || // Added in 3.9226Name.starts_with("movsldup") || // Added in 3.9227Name.starts_with("mul.p") || // Added in 7.0. 128/256 in 4.0228Name.starts_with("or.") || // Added in 3.9229Name.starts_with("pabs.") || // Added in 6.0230Name.starts_with("packssdw.") || // Added in 5.0231Name.starts_with("packsswb.") || // Added in 5.0232Name.starts_with("packusdw.") || // Added in 5.0233Name.starts_with("packuswb.") || // Added in 5.0234Name.starts_with("padd.") || // Added in 4.0235Name.starts_with("padds.") || // Added in 8.0236Name.starts_with("paddus.") || // Added in 8.0237Name.starts_with("palignr.") || // Added in 3.9238Name.starts_with("pand.") || // Added in 3.9239Name.starts_with("pandn.") || // Added in 3.9240Name.starts_with("pavg") || // Added in 6.0241Name.starts_with("pbroadcast") || // Added in 6.0242Name.starts_with("pcmpeq.") || // Added in 3.9243Name.starts_with("pcmpgt.") || // Added in 3.9244Name.starts_with("perm.df.") || // Added in 3.9245Name.starts_with("perm.di.") || // Added in 3.9246Name.starts_with("permvar.") || // Added in 7.0247Name.starts_with("pmaddubs.w.") || // Added in 7.0248Name.starts_with("pmaddw.d.") || // Added in 7.0249Name.starts_with("pmax") || // Added in 4.0250Name.starts_with("pmin") || // Added in 4.0251Name == "pmov.qd.256" || // Added in 9.0252Name == "pmov.qd.512" || // Added in 9.0253Name == "pmov.wb.256" || // Added in 9.0254Name == "pmov.wb.512" || // Added in 9.0255Name.starts_with("pmovsx") || // Added in 4.0256Name.starts_with("pmovzx") || // Added in 4.0257Name.starts_with("pmul.dq.") || // Added in 4.0258Name.starts_with("pmul.hr.sw.") || // Added in 7.0259Name.starts_with("pmulh.w.") || // Added in 7.0260Name.starts_with("pmulhu.w.") || // Added in 7.0261Name.starts_with("pmull.") || // Added in 4.0262Name.starts_with("pmultishift.qb.") || // Added in 8.0263Name.starts_with("pmulu.dq.") || // Added in 4.0264Name.starts_with("por.") || // Added in 3.9265Name.starts_with("prol.") || // Added in 8.0266Name.starts_with("prolv.") || // Added in 8.0267Name.starts_with("pror.") || // Added in 8.0268Name.starts_with("prorv.") || // Added in 8.0269Name.starts_with("pshuf.b.") || // Added in 4.0270Name.starts_with("pshuf.d.") || // Added in 3.9271Name.starts_with("pshufh.w.") || // Added in 3.9272Name.starts_with("pshufl.w.") || // Added in 3.9273Name.starts_with("psll.d") || // Added in 4.0274Name.starts_with("psll.q") || // Added in 4.0275Name.starts_with("psll.w") || // Added in 4.0276Name.starts_with("pslli") || // Added in 4.0277Name.starts_with("psllv") || // Added in 4.0278Name.starts_with("psra.d") || // Added in 4.0279Name.starts_with("psra.q") || // Added in 4.0280Name.starts_with("psra.w") || // Added in 4.0281Name.starts_with("psrai") || // Added in 4.0282Name.starts_with("psrav") || // Added in 4.0283Name.starts_with("psrl.d") || // Added in 4.0284Name.starts_with("psrl.q") || // Added in 4.0285Name.starts_with("psrl.w") || // Added in 4.0286Name.starts_with("psrli") || // Added in 4.0287Name.starts_with("psrlv") || // Added in 4.0288Name.starts_with("psub.") || // Added in 4.0289Name.starts_with("psubs.") || // Added in 8.0290Name.starts_with("psubus.") || // Added in 8.0291Name.starts_with("pternlog.") || // Added in 7.0292Name.starts_with("punpckh") || // Added in 3.9293Name.starts_with("punpckl") || // Added in 3.9294Name.starts_with("pxor.") || // Added in 3.9295Name.starts_with("shuf.f") || // Added in 6.0296Name.starts_with("shuf.i") || // Added in 6.0297Name.starts_with("shuf.p") || // Added in 4.0298Name.starts_with("sqrt.p") || // Added in 7.0299Name.starts_with("store.b.") || // Added in 3.9300Name.starts_with("store.d.") || // Added in 3.9301Name.starts_with("store.p") || // Added in 3.9302Name.starts_with("store.q.") || // Added in 3.9303Name.starts_with("store.w.") || // Added in 3.9304Name == "store.ss" || // Added in 7.0305Name.starts_with("storeu.") || // Added in 3.9306Name.starts_with("sub.p") || // Added in 7.0. 128/256 in 4.0307Name.starts_with("ucmp.") || // Added in 5.0308Name.starts_with("unpckh.") || // Added in 3.9309Name.starts_with("unpckl.") || // Added in 3.9310Name.starts_with("valign.") || // Added in 4.0311Name == "vcvtph2ps.128" || // Added in 11.0312Name == "vcvtph2ps.256" || // Added in 11.0313Name.starts_with("vextract") || // Added in 4.0314Name.starts_with("vfmadd.") || // Added in 7.0315Name.starts_with("vfmaddsub.") || // Added in 7.0316Name.starts_with("vfnmadd.") || // Added in 7.0317Name.starts_with("vfnmsub.") || // Added in 7.0318Name.starts_with("vpdpbusd.") || // Added in 7.0319Name.starts_with("vpdpbusds.") || // Added in 7.0320Name.starts_with("vpdpwssd.") || // Added in 7.0321Name.starts_with("vpdpwssds.") || // Added in 7.0322Name.starts_with("vpermi2var.") || // Added in 7.0323Name.starts_with("vpermil.p") || // Added in 3.9324Name.starts_with("vpermilvar.") || // Added in 4.0325Name.starts_with("vpermt2var.") || // Added in 7.0326Name.starts_with("vpmadd52") || // Added in 7.0327Name.starts_with("vpshld.") || // Added in 7.0328Name.starts_with("vpshldv.") || // Added in 8.0329Name.starts_with("vpshrd.") || // Added in 7.0330Name.starts_with("vpshrdv.") || // Added in 8.0331Name.starts_with("vpshufbitqmb.") || // Added in 8.0332Name.starts_with("xor.")); // Added in 3.9333334if (Name.consume_front("mask3."))335// 'avx512.mask3.*'336return (Name.starts_with("vfmadd.") || // Added in 7.0337Name.starts_with("vfmaddsub.") || // Added in 7.0338Name.starts_with("vfmsub.") || // Added in 7.0339Name.starts_with("vfmsubadd.") || // Added in 7.0340Name.starts_with("vfnmsub.")); // Added in 7.0341342if (Name.consume_front("maskz."))343// 'avx512.maskz.*'344return (Name.starts_with("pternlog.") || // Added in 7.0345Name.starts_with("vfmadd.") || // Added in 7.0346Name.starts_with("vfmaddsub.") || // Added in 7.0347Name.starts_with("vpdpbusd.") || // Added in 7.0348Name.starts_with("vpdpbusds.") || // Added in 7.0349Name.starts_with("vpdpwssd.") || // Added in 7.0350Name.starts_with("vpdpwssds.") || // Added in 7.0351Name.starts_with("vpermt2var.") || // Added in 7.0352Name.starts_with("vpmadd52") || // Added in 7.0353Name.starts_with("vpshldv.") || // Added in 8.0354Name.starts_with("vpshrdv.")); // Added in 8.0355356// 'avx512.*'357return (Name == "movntdqa" || // Added in 5.0358Name == "pmul.dq.512" || // Added in 7.0359Name == "pmulu.dq.512" || // Added in 7.0360Name.starts_with("broadcastm") || // Added in 6.0361Name.starts_with("cmp.p") || // Added in 12.0362Name.starts_with("cvtb2mask.") || // Added in 7.0363Name.starts_with("cvtd2mask.") || // Added in 7.0364Name.starts_with("cvtmask2") || // Added in 5.0365Name.starts_with("cvtq2mask.") || // Added in 7.0366Name == "cvtusi2sd" || // Added in 7.0367Name.starts_with("cvtw2mask.") || // Added in 7.0368Name == "kand.w" || // Added in 7.0369Name == "kandn.w" || // Added in 7.0370Name == "knot.w" || // Added in 7.0371Name == "kor.w" || // Added in 7.0372Name == "kortestc.w" || // Added in 7.0373Name == "kortestz.w" || // Added in 7.0374Name.starts_with("kunpck") || // added in 6.0375Name == "kxnor.w" || // Added in 7.0376Name == "kxor.w" || // Added in 7.0377Name.starts_with("padds.") || // Added in 8.0378Name.starts_with("pbroadcast") || // Added in 3.9379Name.starts_with("prol") || // Added in 8.0380Name.starts_with("pror") || // Added in 8.0381Name.starts_with("psll.dq") || // Added in 3.9382Name.starts_with("psrl.dq") || // Added in 3.9383Name.starts_with("psubs.") || // Added in 8.0384Name.starts_with("ptestm") || // Added in 6.0385Name.starts_with("ptestnm") || // Added in 6.0386Name.starts_with("storent.") || // Added in 3.9387Name.starts_with("vbroadcast.s") || // Added in 7.0388Name.starts_with("vpshld.") || // Added in 8.0389Name.starts_with("vpshrd.")); // Added in 8.0390}391392if (Name.consume_front("fma."))393return (Name.starts_with("vfmadd.") || // Added in 7.0394Name.starts_with("vfmsub.") || // Added in 7.0395Name.starts_with("vfmsubadd.") || // Added in 7.0396Name.starts_with("vfnmadd.") || // Added in 7.0397Name.starts_with("vfnmsub.")); // Added in 7.0398399if (Name.consume_front("fma4."))400return Name.starts_with("vfmadd.s"); // Added in 7.0401402if (Name.consume_front("sse."))403return (Name == "add.ss" || // Added in 4.0404Name == "cvtsi2ss" || // Added in 7.0405Name == "cvtsi642ss" || // Added in 7.0406Name == "div.ss" || // Added in 4.0407Name == "mul.ss" || // Added in 4.0408Name.starts_with("sqrt.p") || // Added in 7.0409Name == "sqrt.ss" || // Added in 7.0410Name.starts_with("storeu.") || // Added in 3.9411Name == "sub.ss"); // Added in 4.0412413if (Name.consume_front("sse2."))414return (Name == "add.sd" || // Added in 4.0415Name == "cvtdq2pd" || // Added in 3.9416Name == "cvtdq2ps" || // Added in 7.0417Name == "cvtps2pd" || // Added in 3.9418Name == "cvtsi2sd" || // Added in 7.0419Name == "cvtsi642sd" || // Added in 7.0420Name == "cvtss2sd" || // Added in 7.0421Name == "div.sd" || // Added in 4.0422Name == "mul.sd" || // Added in 4.0423Name.starts_with("padds.") || // Added in 8.0424Name.starts_with("paddus.") || // Added in 8.0425Name.starts_with("pcmpeq.") || // Added in 3.1426Name.starts_with("pcmpgt.") || // Added in 3.1427Name == "pmaxs.w" || // Added in 3.9428Name == "pmaxu.b" || // Added in 3.9429Name == "pmins.w" || // Added in 3.9430Name == "pminu.b" || // Added in 3.9431Name == "pmulu.dq" || // Added in 7.0432Name.starts_with("pshuf") || // Added in 3.9433Name.starts_with("psll.dq") || // Added in 3.7434Name.starts_with("psrl.dq") || // Added in 3.7435Name.starts_with("psubs.") || // Added in 8.0436Name.starts_with("psubus.") || // Added in 8.0437Name.starts_with("sqrt.p") || // Added in 7.0438Name == "sqrt.sd" || // Added in 7.0439Name == "storel.dq" || // Added in 3.9440Name.starts_with("storeu.") || // Added in 3.9441Name == "sub.sd"); // Added in 4.0442443if (Name.consume_front("sse41."))444return (Name.starts_with("blendp") || // Added in 3.7445Name == "movntdqa" || // Added in 5.0446Name == "pblendw" || // Added in 3.7447Name == "pmaxsb" || // Added in 3.9448Name == "pmaxsd" || // Added in 3.9449Name == "pmaxud" || // Added in 3.9450Name == "pmaxuw" || // Added in 3.9451Name == "pminsb" || // Added in 3.9452Name == "pminsd" || // Added in 3.9453Name == "pminud" || // Added in 3.9454Name == "pminuw" || // Added in 3.9455Name.starts_with("pmovsx") || // Added in 3.8456Name.starts_with("pmovzx") || // Added in 3.9457Name == "pmuldq"); // Added in 7.0458459if (Name.consume_front("sse42."))460return Name == "crc32.64.8"; // Added in 3.4461462if (Name.consume_front("sse4a."))463return Name.starts_with("movnt."); // Added in 3.9464465if (Name.consume_front("ssse3."))466return (Name == "pabs.b.128" || // Added in 6.0467Name == "pabs.d.128" || // Added in 6.0468Name == "pabs.w.128"); // Added in 6.0469470if (Name.consume_front("xop."))471return (Name == "vpcmov" || // Added in 3.8472Name == "vpcmov.256" || // Added in 5.0473Name.starts_with("vpcom") || // Added in 3.2, Updated in 9.0474Name.starts_with("vprot")); // Added in 8.0475476return (Name == "addcarry.u32" || // Added in 8.0477Name == "addcarry.u64" || // Added in 8.0478Name == "addcarryx.u32" || // Added in 8.0479Name == "addcarryx.u64" || // Added in 8.0480Name == "subborrow.u32" || // Added in 8.0481Name == "subborrow.u64" || // Added in 8.0482Name.starts_with("vcvtph2ps.")); // Added in 11.0483}484485static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name,486Function *&NewFn) {487// Only handle intrinsics that start with "x86.".488if (!Name.consume_front("x86."))489return false;490491if (shouldUpgradeX86Intrinsic(F, Name)) {492NewFn = nullptr;493return true;494}495496if (Name == "rdtscp") { // Added in 8.0497// If this intrinsic has 0 operands, it's the new version.498if (F->getFunctionType()->getNumParams() == 0)499return false;500501rename(F);502NewFn = Intrinsic::getDeclaration(F->getParent(),503Intrinsic::x86_rdtscp);504return true;505}506507Intrinsic::ID ID;508509// SSE4.1 ptest functions may have an old signature.510if (Name.consume_front("sse41.ptest")) { // Added in 3.2511ID = StringSwitch<Intrinsic::ID>(Name)512.Case("c", Intrinsic::x86_sse41_ptestc)513.Case("z", Intrinsic::x86_sse41_ptestz)514.Case("nzc", Intrinsic::x86_sse41_ptestnzc)515.Default(Intrinsic::not_intrinsic);516if (ID != Intrinsic::not_intrinsic)517return upgradePTESTIntrinsic(F, ID, NewFn);518519return false;520}521522// Several blend and other instructions with masks used the wrong number of523// bits.524525// Added in 3.6526ID = StringSwitch<Intrinsic::ID>(Name)527.Case("sse41.insertps", Intrinsic::x86_sse41_insertps)528.Case("sse41.dppd", Intrinsic::x86_sse41_dppd)529.Case("sse41.dpps", Intrinsic::x86_sse41_dpps)530.Case("sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)531.Case("avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)532.Case("avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)533.Default(Intrinsic::not_intrinsic);534if (ID != Intrinsic::not_intrinsic)535return upgradeX86IntrinsicsWith8BitMask(F, ID, NewFn);536537if (Name.consume_front("avx512.mask.cmp.")) {538// Added in 7.0539ID = StringSwitch<Intrinsic::ID>(Name)540.Case("pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)541.Case("pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)542.Case("pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)543.Case("ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)544.Case("ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)545.Case("ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)546.Default(Intrinsic::not_intrinsic);547if (ID != Intrinsic::not_intrinsic)548return upgradeX86MaskedFPCompare(F, ID, NewFn);549return false; // No other 'x86.avx523.mask.cmp.*'.550}551552if (Name.consume_front("avx512bf16.")) {553// Added in 9.0554ID = StringSwitch<Intrinsic::ID>(Name)555.Case("cvtne2ps2bf16.128",556Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)557.Case("cvtne2ps2bf16.256",558Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)559.Case("cvtne2ps2bf16.512",560Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)561.Case("mask.cvtneps2bf16.128",562Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)563.Case("cvtneps2bf16.256",564Intrinsic::x86_avx512bf16_cvtneps2bf16_256)565.Case("cvtneps2bf16.512",566Intrinsic::x86_avx512bf16_cvtneps2bf16_512)567.Default(Intrinsic::not_intrinsic);568if (ID != Intrinsic::not_intrinsic)569return upgradeX86BF16Intrinsic(F, ID, NewFn);570571// Added in 9.0572ID = StringSwitch<Intrinsic::ID>(Name)573.Case("dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)574.Case("dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)575.Case("dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)576.Default(Intrinsic::not_intrinsic);577if (ID != Intrinsic::not_intrinsic)578return upgradeX86BF16DPIntrinsic(F, ID, NewFn);579return false; // No other 'x86.avx512bf16.*'.580}581582if (Name.consume_front("xop.")) {583Intrinsic::ID ID = Intrinsic::not_intrinsic;584if (Name.starts_with("vpermil2")) { // Added in 3.9585// Upgrade any XOP PERMIL2 index operand still using a float/double586// vector.587auto Idx = F->getFunctionType()->getParamType(2);588if (Idx->isFPOrFPVectorTy()) {589unsigned IdxSize = Idx->getPrimitiveSizeInBits();590unsigned EltSize = Idx->getScalarSizeInBits();591if (EltSize == 64 && IdxSize == 128)592ID = Intrinsic::x86_xop_vpermil2pd;593else if (EltSize == 32 && IdxSize == 128)594ID = Intrinsic::x86_xop_vpermil2ps;595else if (EltSize == 64 && IdxSize == 256)596ID = Intrinsic::x86_xop_vpermil2pd_256;597else598ID = Intrinsic::x86_xop_vpermil2ps_256;599}600} else if (F->arg_size() == 2)601// frcz.ss/sd may need to have an argument dropped. Added in 3.2602ID = StringSwitch<Intrinsic::ID>(Name)603.Case("vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)604.Case("vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)605.Default(Intrinsic::not_intrinsic);606607if (ID != Intrinsic::not_intrinsic) {608rename(F);609NewFn = Intrinsic::getDeclaration(F->getParent(), ID);610return true;611}612return false; // No other 'x86.xop.*'613}614615if (Name == "seh.recoverfp") {616NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_recoverfp);617return true;618}619620return false;621}622623// Upgrade ARM (IsArm) or Aarch64 (!IsArm) intrinsic fns. Return true iff so.624// IsArm: 'arm.*', !IsArm: 'aarch64.*'.625static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F,626StringRef Name,627Function *&NewFn) {628if (Name.starts_with("rbit")) {629// '(arm|aarch64).rbit'.630NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,631F->arg_begin()->getType());632return true;633}634635if (Name == "thread.pointer") {636// '(arm|aarch64).thread.pointer'.637NewFn =638Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);639return true;640}641642bool Neon = Name.consume_front("neon.");643if (Neon) {644// '(arm|aarch64).neon.*'.645// Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and646// v16i8 respectively.647if (Name.consume_front("bfdot.")) {648// (arm|aarch64).neon.bfdot.*'.649Intrinsic::ID ID =650StringSwitch<Intrinsic::ID>(Name)651.Cases("v2f32.v8i8", "v4f32.v16i8",652IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfdot653: (Intrinsic::ID)Intrinsic::aarch64_neon_bfdot)654.Default(Intrinsic::not_intrinsic);655if (ID != Intrinsic::not_intrinsic) {656size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();657assert((OperandWidth == 64 || OperandWidth == 128) &&658"Unexpected operand width");659LLVMContext &Ctx = F->getParent()->getContext();660std::array<Type *, 2> Tys{661{F->getReturnType(),662FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)}};663NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);664return true;665}666return false; // No other '(arm|aarch64).neon.bfdot.*'.667}668669// Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic670// anymore and accept v8bf16 instead of v16i8.671if (Name.consume_front("bfm")) {672// (arm|aarch64).neon.bfm*'.673if (Name.consume_back(".v4f32.v16i8")) {674// (arm|aarch64).neon.bfm*.v4f32.v16i8'.675Intrinsic::ID ID =676StringSwitch<Intrinsic::ID>(Name)677.Case("mla",678IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmmla679: (Intrinsic::ID)Intrinsic::aarch64_neon_bfmmla)680.Case("lalb",681IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalb682: (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalb)683.Case("lalt",684IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalt685: (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalt)686.Default(Intrinsic::not_intrinsic);687if (ID != Intrinsic::not_intrinsic) {688NewFn = Intrinsic::getDeclaration(F->getParent(), ID);689return true;690}691return false; // No other '(arm|aarch64).neon.bfm*.v16i8'.692}693return false; // No other '(arm|aarch64).neon.bfm*.694}695// Continue on to Aarch64 Neon or Arm Neon.696}697// Continue on to Arm or Aarch64.698699if (IsArm) {700// 'arm.*'.701if (Neon) {702// 'arm.neon.*'.703Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name)704.StartsWith("vclz.", Intrinsic::ctlz)705.StartsWith("vcnt.", Intrinsic::ctpop)706.StartsWith("vqadds.", Intrinsic::sadd_sat)707.StartsWith("vqaddu.", Intrinsic::uadd_sat)708.StartsWith("vqsubs.", Intrinsic::ssub_sat)709.StartsWith("vqsubu.", Intrinsic::usub_sat)710.Default(Intrinsic::not_intrinsic);711if (ID != Intrinsic::not_intrinsic) {712NewFn = Intrinsic::getDeclaration(F->getParent(), ID,713F->arg_begin()->getType());714return true;715}716717if (Name.consume_front("vst")) {718// 'arm.neon.vst*'.719static const Regex vstRegex("^([1234]|[234]lane)\\.v[a-z0-9]*$");720SmallVector<StringRef, 2> Groups;721if (vstRegex.match(Name, &Groups)) {722static const Intrinsic::ID StoreInts[] = {723Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,724Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};725726static const Intrinsic::ID StoreLaneInts[] = {727Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,728Intrinsic::arm_neon_vst4lane};729730auto fArgs = F->getFunctionType()->params();731Type *Tys[] = {fArgs[0], fArgs[1]};732if (Groups[1].size() == 1)733NewFn = Intrinsic::getDeclaration(F->getParent(),734StoreInts[fArgs.size() - 3], Tys);735else736NewFn = Intrinsic::getDeclaration(737F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);738return true;739}740return false; // No other 'arm.neon.vst*'.741}742743return false; // No other 'arm.neon.*'.744}745746if (Name.consume_front("mve.")) {747// 'arm.mve.*'.748if (Name == "vctp64") {749if (cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) {750// A vctp64 returning a v4i1 is converted to return a v2i1. Rename751// the function and deal with it below in UpgradeIntrinsicCall.752rename(F);753return true;754}755return false; // Not 'arm.mve.vctp64'.756}757758// These too are changed to accept a v2i1 instead of the old v4i1.759if (Name.consume_back(".v4i1")) {760// 'arm.mve.*.v4i1'.761if (Name.consume_back(".predicated.v2i64.v4i32"))762// 'arm.mve.*.predicated.v2i64.v4i32.v4i1'763return Name == "mull.int" || Name == "vqdmull";764765if (Name.consume_back(".v2i64")) {766// 'arm.mve.*.v2i64.v4i1'767bool IsGather = Name.consume_front("vldr.gather.");768if (IsGather || Name.consume_front("vstr.scatter.")) {769if (Name.consume_front("base.")) {770// Optional 'wb.' prefix.771Name.consume_front("wb.");772// 'arm.mve.(vldr.gather|vstr.scatter).base.(wb.)?773// predicated.v2i64.v2i64.v4i1'.774return Name == "predicated.v2i64";775}776777if (Name.consume_front("offset.predicated."))778return Name == (IsGather ? "v2i64.p0i64" : "p0i64.v2i64") ||779Name == (IsGather ? "v2i64.p0" : "p0.v2i64");780781// No other 'arm.mve.(vldr.gather|vstr.scatter).*.v2i64.v4i1'.782return false;783}784785return false; // No other 'arm.mve.*.v2i64.v4i1'.786}787return false; // No other 'arm.mve.*.v4i1'.788}789return false; // No other 'arm.mve.*'.790}791792if (Name.consume_front("cde.vcx")) {793// 'arm.cde.vcx*'.794if (Name.consume_back(".predicated.v2i64.v4i1"))795// 'arm.cde.vcx*.predicated.v2i64.v4i1'.796return Name == "1q" || Name == "1qa" || Name == "2q" || Name == "2qa" ||797Name == "3q" || Name == "3qa";798799return false; // No other 'arm.cde.vcx*'.800}801} else {802// 'aarch64.*'.803if (Neon) {804// 'aarch64.neon.*'.805Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name)806.StartsWith("frintn", Intrinsic::roundeven)807.StartsWith("rbit", Intrinsic::bitreverse)808.Default(Intrinsic::not_intrinsic);809if (ID != Intrinsic::not_intrinsic) {810NewFn = Intrinsic::getDeclaration(F->getParent(), ID,811F->arg_begin()->getType());812return true;813}814815if (Name.starts_with("addp")) {816// 'aarch64.neon.addp*'.817if (F->arg_size() != 2)818return false; // Invalid IR.819VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());820if (Ty && Ty->getElementType()->isFloatingPointTy()) {821NewFn = Intrinsic::getDeclaration(F->getParent(),822Intrinsic::aarch64_neon_faddp, Ty);823return true;824}825}826return false; // No other 'aarch64.neon.*'.827}828if (Name.consume_front("sve.")) {829// 'aarch64.sve.*'.830if (Name.consume_front("bf")) {831if (Name.consume_back(".lane")) {832// 'aarch64.sve.bf*.lane'.833Intrinsic::ID ID =834StringSwitch<Intrinsic::ID>(Name)835.Case("dot", Intrinsic::aarch64_sve_bfdot_lane_v2)836.Case("mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)837.Case("mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)838.Default(Intrinsic::not_intrinsic);839if (ID != Intrinsic::not_intrinsic) {840NewFn = Intrinsic::getDeclaration(F->getParent(), ID);841return true;842}843return false; // No other 'aarch64.sve.bf*.lane'.844}845return false; // No other 'aarch64.sve.bf*'.846}847848if (Name.consume_front("addqv")) {849// 'aarch64.sve.addqv'.850if (!F->getReturnType()->isFPOrFPVectorTy())851return false;852853auto Args = F->getFunctionType()->params();854Type *Tys[] = {F->getReturnType(), Args[1]};855NewFn = Intrinsic::getDeclaration(F->getParent(),856Intrinsic::aarch64_sve_faddqv, Tys);857return true;858}859860if (Name.consume_front("ld")) {861// 'aarch64.sve.ld*'.862static const Regex LdRegex("^[234](.nxv[a-z0-9]+|$)");863if (LdRegex.match(Name)) {864Type *ScalarTy =865cast<VectorType>(F->getReturnType())->getElementType();866ElementCount EC =867cast<VectorType>(F->arg_begin()->getType())->getElementCount();868Type *Ty = VectorType::get(ScalarTy, EC);869static const Intrinsic::ID LoadIDs[] = {870Intrinsic::aarch64_sve_ld2_sret,871Intrinsic::aarch64_sve_ld3_sret,872Intrinsic::aarch64_sve_ld4_sret,873};874NewFn = Intrinsic::getDeclaration(F->getParent(),875LoadIDs[Name[0] - '2'], Ty);876return true;877}878return false; // No other 'aarch64.sve.ld*'.879}880881if (Name.consume_front("tuple.")) {882// 'aarch64.sve.tuple.*'.883if (Name.starts_with("get")) {884// 'aarch64.sve.tuple.get*'.885Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};886NewFn = Intrinsic::getDeclaration(F->getParent(),887Intrinsic::vector_extract, Tys);888return true;889}890891if (Name.starts_with("set")) {892// 'aarch64.sve.tuple.set*'.893auto Args = F->getFunctionType()->params();894Type *Tys[] = {Args[0], Args[2], Args[1]};895NewFn = Intrinsic::getDeclaration(F->getParent(),896Intrinsic::vector_insert, Tys);897return true;898}899900static const Regex CreateTupleRegex("^create[234](.nxv[a-z0-9]+|$)");901if (CreateTupleRegex.match(Name)) {902// 'aarch64.sve.tuple.create*'.903auto Args = F->getFunctionType()->params();904Type *Tys[] = {F->getReturnType(), Args[1]};905NewFn = Intrinsic::getDeclaration(F->getParent(),906Intrinsic::vector_insert, Tys);907return true;908}909return false; // No other 'aarch64.sve.tuple.*'.910}911return false; // No other 'aarch64.sve.*'.912}913}914return false; // No other 'arm.*', 'aarch64.*'.915}916917static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name) {918if (Name.consume_front("abs."))919return StringSwitch<Intrinsic::ID>(Name)920.Case("bf16", Intrinsic::nvvm_abs_bf16)921.Case("bf16x2", Intrinsic::nvvm_abs_bf16x2)922.Default(Intrinsic::not_intrinsic);923924if (Name.consume_front("fma.rn."))925return StringSwitch<Intrinsic::ID>(Name)926.Case("bf16", Intrinsic::nvvm_fma_rn_bf16)927.Case("bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)928.Case("ftz.bf16", Intrinsic::nvvm_fma_rn_ftz_bf16)929.Case("ftz.bf16x2", Intrinsic::nvvm_fma_rn_ftz_bf16x2)930.Case("ftz.relu.bf16", Intrinsic::nvvm_fma_rn_ftz_relu_bf16)931.Case("ftz.relu.bf16x2", Intrinsic::nvvm_fma_rn_ftz_relu_bf16x2)932.Case("ftz.sat.bf16", Intrinsic::nvvm_fma_rn_ftz_sat_bf16)933.Case("ftz.sat.bf16x2", Intrinsic::nvvm_fma_rn_ftz_sat_bf16x2)934.Case("relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)935.Case("relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)936.Case("sat.bf16", Intrinsic::nvvm_fma_rn_sat_bf16)937.Case("sat.bf16x2", Intrinsic::nvvm_fma_rn_sat_bf16x2)938.Default(Intrinsic::not_intrinsic);939940if (Name.consume_front("fmax."))941return StringSwitch<Intrinsic::ID>(Name)942.Case("bf16", Intrinsic::nvvm_fmax_bf16)943.Case("bf16x2", Intrinsic::nvvm_fmax_bf16x2)944.Case("ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)945.Case("ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)946.Case("ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)947.Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)948.Case("ftz.nan.xorsign.abs.bf16",949Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)950.Case("ftz.nan.xorsign.abs.bf16x2",951Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)952.Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)953.Case("ftz.xorsign.abs.bf16x2",954Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)955.Case("nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)956.Case("nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)957.Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)958.Case("nan.xorsign.abs.bf16x2",959Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)960.Case("xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)961.Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)962.Default(Intrinsic::not_intrinsic);963964if (Name.consume_front("fmin."))965return StringSwitch<Intrinsic::ID>(Name)966.Case("bf16", Intrinsic::nvvm_fmin_bf16)967.Case("bf16x2", Intrinsic::nvvm_fmin_bf16x2)968.Case("ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)969.Case("ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)970.Case("ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)971.Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)972.Case("ftz.nan.xorsign.abs.bf16",973Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)974.Case("ftz.nan.xorsign.abs.bf16x2",975Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)976.Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)977.Case("ftz.xorsign.abs.bf16x2",978Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)979.Case("nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)980.Case("nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)981.Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)982.Case("nan.xorsign.abs.bf16x2",983Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)984.Case("xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)985.Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)986.Default(Intrinsic::not_intrinsic);987988if (Name.consume_front("neg."))989return StringSwitch<Intrinsic::ID>(Name)990.Case("bf16", Intrinsic::nvvm_neg_bf16)991.Case("bf16x2", Intrinsic::nvvm_neg_bf16x2)992.Default(Intrinsic::not_intrinsic);993994return Intrinsic::not_intrinsic;995}996997static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn,998bool CanUpgradeDebugIntrinsicsToRecords) {999assert(F && "Illegal to upgrade a non-existent Function.");10001001StringRef Name = F->getName();10021003// Quickly eliminate it, if it's not a candidate.1004if (!Name.consume_front("llvm.") || Name.empty())1005return false;10061007switch (Name[0]) {1008default: break;1009case 'a': {1010bool IsArm = Name.consume_front("arm.");1011if (IsArm || Name.consume_front("aarch64.")) {1012if (upgradeArmOrAarch64IntrinsicFunction(IsArm, F, Name, NewFn))1013return true;1014break;1015}10161017if (Name.consume_front("amdgcn.")) {1018if (Name == "alignbit") {1019// Target specific intrinsic became redundant1020NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::fshr,1021{F->getReturnType()});1022return true;1023}10241025if (Name.consume_front("atomic.")) {1026if (Name.starts_with("inc") || Name.starts_with("dec")) {1027// These were replaced with atomicrmw uinc_wrap and udec_wrap, so1028// there's no new declaration.1029NewFn = nullptr;1030return true;1031}1032break; // No other 'amdgcn.atomic.*'1033}10341035if (Name.starts_with("ds.fadd") || Name.starts_with("ds.fmin") ||1036Name.starts_with("ds.fmax")) {1037// Replaced with atomicrmw fadd/fmin/fmax, so there's no new1038// declaration.1039NewFn = nullptr;1040return true;1041}10421043if (Name.starts_with("ldexp.")) {1044// Target specific intrinsic became redundant1045NewFn = Intrinsic::getDeclaration(1046F->getParent(), Intrinsic::ldexp,1047{F->getReturnType(), F->getArg(1)->getType()});1048return true;1049}1050break; // No other 'amdgcn.*'1051}10521053break;1054}1055case 'c': {1056if (F->arg_size() == 1) {1057Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name)1058.StartsWith("ctlz.", Intrinsic::ctlz)1059.StartsWith("cttz.", Intrinsic::cttz)1060.Default(Intrinsic::not_intrinsic);1061if (ID != Intrinsic::not_intrinsic) {1062rename(F);1063NewFn = Intrinsic::getDeclaration(F->getParent(), ID,1064F->arg_begin()->getType());1065return true;1066}1067}10681069if (F->arg_size() == 2 && Name == "coro.end") {1070rename(F);1071NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::coro_end);1072return true;1073}10741075break;1076}1077case 'd':1078if (Name.consume_front("dbg.")) {1079// Mark debug intrinsics for upgrade to new debug format.1080if (CanUpgradeDebugIntrinsicsToRecords &&1081F->getParent()->IsNewDbgInfoFormat) {1082if (Name == "addr" || Name == "value" || Name == "assign" ||1083Name == "declare" || Name == "label") {1084// There's no function to replace these with.1085NewFn = nullptr;1086// But we do want these to get upgraded.1087return true;1088}1089}1090// Update llvm.dbg.addr intrinsics even in "new debug mode"; they'll get1091// converted to DbgVariableRecords later.1092if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) {1093rename(F);1094NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);1095return true;1096}1097break; // No other 'dbg.*'.1098}1099break;1100case 'e':1101if (Name.consume_front("experimental.vector.")) {1102Intrinsic::ID ID =1103StringSwitch<Intrinsic::ID>(Name)1104.StartsWith("extract.", Intrinsic::vector_extract)1105.StartsWith("insert.", Intrinsic::vector_insert)1106.StartsWith("splice.", Intrinsic::vector_splice)1107.StartsWith("reverse.", Intrinsic::vector_reverse)1108.StartsWith("interleave2.", Intrinsic::vector_interleave2)1109.StartsWith("deinterleave2.", Intrinsic::vector_deinterleave2)1110.Default(Intrinsic::not_intrinsic);1111if (ID != Intrinsic::not_intrinsic) {1112const auto *FT = F->getFunctionType();1113SmallVector<Type *, 2> Tys;1114if (ID == Intrinsic::vector_extract ||1115ID == Intrinsic::vector_interleave2)1116// Extracting overloads the return type.1117Tys.push_back(FT->getReturnType());1118if (ID != Intrinsic::vector_interleave2)1119Tys.push_back(FT->getParamType(0));1120if (ID == Intrinsic::vector_insert)1121// Inserting overloads the inserted type.1122Tys.push_back(FT->getParamType(1));1123rename(F);1124NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);1125return true;1126}11271128if (Name.consume_front("reduce.")) {1129SmallVector<StringRef, 2> Groups;1130static const Regex R("^([a-z]+)\\.[a-z][0-9]+");1131if (R.match(Name, &Groups))1132ID = StringSwitch<Intrinsic::ID>(Groups[1])1133.Case("add", Intrinsic::vector_reduce_add)1134.Case("mul", Intrinsic::vector_reduce_mul)1135.Case("and", Intrinsic::vector_reduce_and)1136.Case("or", Intrinsic::vector_reduce_or)1137.Case("xor", Intrinsic::vector_reduce_xor)1138.Case("smax", Intrinsic::vector_reduce_smax)1139.Case("smin", Intrinsic::vector_reduce_smin)1140.Case("umax", Intrinsic::vector_reduce_umax)1141.Case("umin", Intrinsic::vector_reduce_umin)1142.Case("fmax", Intrinsic::vector_reduce_fmax)1143.Case("fmin", Intrinsic::vector_reduce_fmin)1144.Default(Intrinsic::not_intrinsic);11451146bool V2 = false;1147if (ID == Intrinsic::not_intrinsic) {1148static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+");1149Groups.clear();1150V2 = true;1151if (R2.match(Name, &Groups))1152ID = StringSwitch<Intrinsic::ID>(Groups[1])1153.Case("fadd", Intrinsic::vector_reduce_fadd)1154.Case("fmul", Intrinsic::vector_reduce_fmul)1155.Default(Intrinsic::not_intrinsic);1156}1157if (ID != Intrinsic::not_intrinsic) {1158rename(F);1159auto Args = F->getFunctionType()->params();1160NewFn =1161Intrinsic::getDeclaration(F->getParent(), ID, {Args[V2 ? 1 : 0]});1162return true;1163}1164break; // No other 'expermental.vector.reduce.*'.1165}1166break; // No other 'experimental.vector.*'.1167}1168break; // No other 'e*'.1169case 'f':1170if (Name.starts_with("flt.rounds")) {1171rename(F);1172NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::get_rounding);1173return true;1174}1175break;1176case 'i':1177if (Name.starts_with("invariant.group.barrier")) {1178// Rename invariant.group.barrier to launder.invariant.group1179auto Args = F->getFunctionType()->params();1180Type* ObjectPtr[1] = {Args[0]};1181rename(F);1182NewFn = Intrinsic::getDeclaration(F->getParent(),1183Intrinsic::launder_invariant_group, ObjectPtr);1184return true;1185}1186break;1187case 'm': {1188// Updating the memory intrinsics (memcpy/memmove/memset) that have an1189// alignment parameter to embedding the alignment as an attribute of1190// the pointer args.1191if (unsigned ID = StringSwitch<unsigned>(Name)1192.StartsWith("memcpy.", Intrinsic::memcpy)1193.StartsWith("memmove.", Intrinsic::memmove)1194.Default(0)) {1195if (F->arg_size() == 5) {1196rename(F);1197// Get the types of dest, src, and len1198ArrayRef<Type *> ParamTypes =1199F->getFunctionType()->params().slice(0, 3);1200NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ParamTypes);1201return true;1202}1203}1204if (Name.starts_with("memset.") && F->arg_size() == 5) {1205rename(F);1206// Get the types of dest, and len1207const auto *FT = F->getFunctionType();1208Type *ParamTypes[2] = {1209FT->getParamType(0), // Dest1210FT->getParamType(2) // len1211};1212NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memset,1213ParamTypes);1214return true;1215}1216break;1217}1218case 'n': {1219if (Name.consume_front("nvvm.")) {1220// Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic.1221if (F->arg_size() == 1) {1222Intrinsic::ID IID =1223StringSwitch<Intrinsic::ID>(Name)1224.Cases("brev32", "brev64", Intrinsic::bitreverse)1225.Case("clz.i", Intrinsic::ctlz)1226.Case("popc.i", Intrinsic::ctpop)1227.Default(Intrinsic::not_intrinsic);1228if (IID != Intrinsic::not_intrinsic) {1229NewFn = Intrinsic::getDeclaration(F->getParent(), IID,1230{F->getReturnType()});1231return true;1232}1233}12341235// Check for nvvm intrinsics that need a return type adjustment.1236if (!F->getReturnType()->getScalarType()->isBFloatTy()) {1237Intrinsic::ID IID = shouldUpgradeNVPTXBF16Intrinsic(Name);1238if (IID != Intrinsic::not_intrinsic) {1239NewFn = nullptr;1240return true;1241}1242}12431244// The following nvvm intrinsics correspond exactly to an LLVM idiom, but1245// not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.1246//1247// TODO: We could add lohi.i2d.1248bool Expand = false;1249if (Name.consume_front("abs."))1250// nvvm.abs.{i,ii}1251Expand = Name == "i" || Name == "ll";1252else if (Name == "clz.ll" || Name == "popc.ll" || Name == "h2f")1253Expand = true;1254else if (Name.consume_front("max.") || Name.consume_front("min."))1255// nvvm.{min,max}.{i,ii,ui,ull}1256Expand = Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||1257Name == "ui" || Name == "ull";1258else if (Name.consume_front("atomic.load.add."))1259// nvvm.atomic.load.add.{f32.p,f64.p}1260Expand = Name.starts_with("f32.p") || Name.starts_with("f64.p");1261else1262Expand = false;12631264if (Expand) {1265NewFn = nullptr;1266return true;1267}1268break; // No other 'nvvm.*'.1269}1270break;1271}1272case 'o':1273// We only need to change the name to match the mangling including the1274// address space.1275if (Name.starts_with("objectsize.")) {1276Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };1277if (F->arg_size() == 2 || F->arg_size() == 3 ||1278F->getName() !=1279Intrinsic::getName(Intrinsic::objectsize, Tys, F->getParent())) {1280rename(F);1281NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,1282Tys);1283return true;1284}1285}1286break;12871288case 'p':1289if (Name.starts_with("ptr.annotation.") && F->arg_size() == 4) {1290rename(F);1291NewFn = Intrinsic::getDeclaration(1292F->getParent(), Intrinsic::ptr_annotation,1293{F->arg_begin()->getType(), F->getArg(1)->getType()});1294return true;1295}1296break;12971298case 'r': {1299if (Name.consume_front("riscv.")) {1300Intrinsic::ID ID;1301ID = StringSwitch<Intrinsic::ID>(Name)1302.Case("aes32dsi", Intrinsic::riscv_aes32dsi)1303.Case("aes32dsmi", Intrinsic::riscv_aes32dsmi)1304.Case("aes32esi", Intrinsic::riscv_aes32esi)1305.Case("aes32esmi", Intrinsic::riscv_aes32esmi)1306.Default(Intrinsic::not_intrinsic);1307if (ID != Intrinsic::not_intrinsic) {1308if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {1309rename(F);1310NewFn = Intrinsic::getDeclaration(F->getParent(), ID);1311return true;1312}1313break; // No other applicable upgrades.1314}13151316ID = StringSwitch<Intrinsic::ID>(Name)1317.StartsWith("sm4ks", Intrinsic::riscv_sm4ks)1318.StartsWith("sm4ed", Intrinsic::riscv_sm4ed)1319.Default(Intrinsic::not_intrinsic);1320if (ID != Intrinsic::not_intrinsic) {1321if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||1322F->getFunctionType()->getReturnType()->isIntegerTy(64)) {1323rename(F);1324NewFn = Intrinsic::getDeclaration(F->getParent(), ID);1325return true;1326}1327break; // No other applicable upgrades.1328}13291330ID = StringSwitch<Intrinsic::ID>(Name)1331.StartsWith("sha256sig0", Intrinsic::riscv_sha256sig0)1332.StartsWith("sha256sig1", Intrinsic::riscv_sha256sig1)1333.StartsWith("sha256sum0", Intrinsic::riscv_sha256sum0)1334.StartsWith("sha256sum1", Intrinsic::riscv_sha256sum1)1335.StartsWith("sm3p0", Intrinsic::riscv_sm3p0)1336.StartsWith("sm3p1", Intrinsic::riscv_sm3p1)1337.Default(Intrinsic::not_intrinsic);1338if (ID != Intrinsic::not_intrinsic) {1339if (F->getFunctionType()->getReturnType()->isIntegerTy(64)) {1340rename(F);1341NewFn = Intrinsic::getDeclaration(F->getParent(), ID);1342return true;1343}1344break; // No other applicable upgrades.1345}1346break; // No other 'riscv.*' intrinsics1347}1348} break;13491350case 's':1351if (Name == "stackprotectorcheck") {1352NewFn = nullptr;1353return true;1354}1355break;13561357case 'v': {1358if (Name == "var.annotation" && F->arg_size() == 4) {1359rename(F);1360NewFn = Intrinsic::getDeclaration(1361F->getParent(), Intrinsic::var_annotation,1362{{F->arg_begin()->getType(), F->getArg(1)->getType()}});1363return true;1364}1365break;1366}13671368case 'w':1369if (Name.consume_front("wasm.")) {1370Intrinsic::ID ID =1371StringSwitch<Intrinsic::ID>(Name)1372.StartsWith("fma.", Intrinsic::wasm_relaxed_madd)1373.StartsWith("fms.", Intrinsic::wasm_relaxed_nmadd)1374.StartsWith("laneselect.", Intrinsic::wasm_relaxed_laneselect)1375.Default(Intrinsic::not_intrinsic);1376if (ID != Intrinsic::not_intrinsic) {1377rename(F);1378NewFn =1379Intrinsic::getDeclaration(F->getParent(), ID, F->getReturnType());1380return true;1381}13821383if (Name.consume_front("dot.i8x16.i7x16.")) {1384ID = StringSwitch<Intrinsic::ID>(Name)1385.Case("signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)1386.Case("add.signed",1387Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)1388.Default(Intrinsic::not_intrinsic);1389if (ID != Intrinsic::not_intrinsic) {1390rename(F);1391NewFn = Intrinsic::getDeclaration(F->getParent(), ID);1392return true;1393}1394break; // No other 'wasm.dot.i8x16.i7x16.*'.1395}1396break; // No other 'wasm.*'.1397}1398break;13991400case 'x':1401if (upgradeX86IntrinsicFunction(F, Name, NewFn))1402return true;1403}14041405auto *ST = dyn_cast<StructType>(F->getReturnType());1406if (ST && (!ST->isLiteral() || ST->isPacked()) &&1407F->getIntrinsicID() != Intrinsic::not_intrinsic) {1408// Replace return type with literal non-packed struct. Only do this for1409// intrinsics declared to return a struct, not for intrinsics with1410// overloaded return type, in which case the exact struct type will be1411// mangled into the name.1412SmallVector<Intrinsic::IITDescriptor> Desc;1413Intrinsic::getIntrinsicInfoTableEntries(F->getIntrinsicID(), Desc);1414if (Desc.front().Kind == Intrinsic::IITDescriptor::Struct) {1415auto *FT = F->getFunctionType();1416auto *NewST = StructType::get(ST->getContext(), ST->elements());1417auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());1418std::string Name = F->getName().str();1419rename(F);1420NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),1421Name, F->getParent());14221423// The new function may also need remangling.1424if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(NewFn))1425NewFn = *Result;1426return true;1427}1428}14291430// Remangle our intrinsic since we upgrade the mangling1431auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);1432if (Result != std::nullopt) {1433NewFn = *Result;1434return true;1435}14361437// This may not belong here. This function is effectively being overloaded1438// to both detect an intrinsic which needs upgrading, and to provide the1439// upgraded form of the intrinsic. We should perhaps have two separate1440// functions for this.1441return false;1442}14431444bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn,1445bool CanUpgradeDebugIntrinsicsToRecords) {1446NewFn = nullptr;1447bool Upgraded =1448upgradeIntrinsicFunction1(F, NewFn, CanUpgradeDebugIntrinsicsToRecords);1449assert(F != NewFn && "Intrinsic function upgraded to the same function");14501451// Upgrade intrinsic attributes. This does not change the function.1452if (NewFn)1453F = NewFn;1454if (Intrinsic::ID id = F->getIntrinsicID())1455F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));1456return Upgraded;1457}14581459GlobalVariable *llvm::UpgradeGlobalVariable(GlobalVariable *GV) {1460if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||1461GV->getName() == "llvm.global_dtors")) ||1462!GV->hasInitializer())1463return nullptr;1464ArrayType *ATy = dyn_cast<ArrayType>(GV->getValueType());1465if (!ATy)1466return nullptr;1467StructType *STy = dyn_cast<StructType>(ATy->getElementType());1468if (!STy || STy->getNumElements() != 2)1469return nullptr;14701471LLVMContext &C = GV->getContext();1472IRBuilder<> IRB(C);1473auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),1474IRB.getPtrTy());1475Constant *Init = GV->getInitializer();1476unsigned N = Init->getNumOperands();1477std::vector<Constant *> NewCtors(N);1478for (unsigned i = 0; i != N; ++i) {1479auto Ctor = cast<Constant>(Init->getOperand(i));1480NewCtors[i] = ConstantStruct::get(EltTy, Ctor->getAggregateElement(0u),1481Ctor->getAggregateElement(1),1482Constant::getNullValue(IRB.getPtrTy()));1483}1484Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);14851486return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),1487NewInit, GV->getName());1488}14891490// Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them1491// to byte shuffles.1492static Value *upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op,1493unsigned Shift) {1494auto *ResultTy = cast<FixedVectorType>(Op->getType());1495unsigned NumElts = ResultTy->getNumElements() * 8;14961497// Bitcast from a 64-bit element type to a byte element type.1498Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);1499Op = Builder.CreateBitCast(Op, VecTy, "cast");15001501// We'll be shuffling in zeroes.1502Value *Res = Constant::getNullValue(VecTy);15031504// If shift is less than 16, emit a shuffle to move the bytes. Otherwise,1505// we'll just return the zero vector.1506if (Shift < 16) {1507int Idxs[64];1508// 256/512-bit version is split into 2/4 16-byte lanes.1509for (unsigned l = 0; l != NumElts; l += 16)1510for (unsigned i = 0; i != 16; ++i) {1511unsigned Idx = NumElts + i - Shift;1512if (Idx < NumElts)1513Idx -= NumElts - 16; // end of lane, switch operand.1514Idxs[l + i] = Idx + l;1515}15161517Res = Builder.CreateShuffleVector(Res, Op, ArrayRef(Idxs, NumElts));1518}15191520// Bitcast back to a 64-bit element type.1521return Builder.CreateBitCast(Res, ResultTy, "cast");1522}15231524// Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them1525// to byte shuffles.1526static Value *upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op,1527unsigned Shift) {1528auto *ResultTy = cast<FixedVectorType>(Op->getType());1529unsigned NumElts = ResultTy->getNumElements() * 8;15301531// Bitcast from a 64-bit element type to a byte element type.1532Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);1533Op = Builder.CreateBitCast(Op, VecTy, "cast");15341535// We'll be shuffling in zeroes.1536Value *Res = Constant::getNullValue(VecTy);15371538// If shift is less than 16, emit a shuffle to move the bytes. Otherwise,1539// we'll just return the zero vector.1540if (Shift < 16) {1541int Idxs[64];1542// 256/512-bit version is split into 2/4 16-byte lanes.1543for (unsigned l = 0; l != NumElts; l += 16)1544for (unsigned i = 0; i != 16; ++i) {1545unsigned Idx = i + Shift;1546if (Idx >= 16)1547Idx += NumElts - 16; // end of lane, switch operand.1548Idxs[l + i] = Idx + l;1549}15501551Res = Builder.CreateShuffleVector(Op, Res, ArrayRef(Idxs, NumElts));1552}15531554// Bitcast back to a 64-bit element type.1555return Builder.CreateBitCast(Res, ResultTy, "cast");1556}15571558static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,1559unsigned NumElts) {1560assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");1561llvm::VectorType *MaskTy = FixedVectorType::get(1562Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());1563Mask = Builder.CreateBitCast(Mask, MaskTy);15641565// If we have less than 8 elements (1, 2 or 4), then the starting mask was an1566// i8 and we need to extract down to the right number of elements.1567if (NumElts <= 4) {1568int Indices[4];1569for (unsigned i = 0; i != NumElts; ++i)1570Indices[i] = i;1571Mask = Builder.CreateShuffleVector(Mask, Mask, ArrayRef(Indices, NumElts),1572"extract");1573}15741575return Mask;1576}15771578static Value *emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0,1579Value *Op1) {1580// If the mask is all ones just emit the first operation.1581if (const auto *C = dyn_cast<Constant>(Mask))1582if (C->isAllOnesValue())1583return Op0;15841585Mask = getX86MaskVec(Builder, Mask,1586cast<FixedVectorType>(Op0->getType())->getNumElements());1587return Builder.CreateSelect(Mask, Op0, Op1);1588}15891590static Value *emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0,1591Value *Op1) {1592// If the mask is all ones just emit the first operation.1593if (const auto *C = dyn_cast<Constant>(Mask))1594if (C->isAllOnesValue())1595return Op0;15961597auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),1598Mask->getType()->getIntegerBitWidth());1599Mask = Builder.CreateBitCast(Mask, MaskTy);1600Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);1601return Builder.CreateSelect(Mask, Op0, Op1);1602}16031604// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.1605// PALIGNR handles large immediates by shifting while VALIGN masks the immediate1606// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.1607static Value *upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,1608Value *Op1, Value *Shift,1609Value *Passthru, Value *Mask,1610bool IsVALIGN) {1611unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();16121613unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();1614assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");1615assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");1616assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");16171618// Mask the immediate for VALIGN.1619if (IsVALIGN)1620ShiftVal &= (NumElts - 1);16211622// If palignr is shifting the pair of vectors more than the size of two1623// lanes, emit zero.1624if (ShiftVal >= 32)1625return llvm::Constant::getNullValue(Op0->getType());16261627// If palignr is shifting the pair of input vectors more than one lane,1628// but less than two lanes, convert to shifting in zeroes.1629if (ShiftVal > 16) {1630ShiftVal -= 16;1631Op1 = Op0;1632Op0 = llvm::Constant::getNullValue(Op0->getType());1633}16341635int Indices[64];1636// 256-bit palignr operates on 128-bit lanes so we need to handle that1637for (unsigned l = 0; l < NumElts; l += 16) {1638for (unsigned i = 0; i != 16; ++i) {1639unsigned Idx = ShiftVal + i;1640if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.1641Idx += NumElts - 16; // End of lane, switch operand.1642Indices[l + i] = Idx + l;1643}1644}16451646Value *Align = Builder.CreateShuffleVector(1647Op1, Op0, ArrayRef(Indices, NumElts), "palignr");16481649return emitX86Select(Builder, Mask, Align, Passthru);1650}16511652static Value *upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI,1653bool ZeroMask, bool IndexForm) {1654Type *Ty = CI.getType();1655unsigned VecWidth = Ty->getPrimitiveSizeInBits();1656unsigned EltWidth = Ty->getScalarSizeInBits();1657bool IsFloat = Ty->isFPOrFPVectorTy();1658Intrinsic::ID IID;1659if (VecWidth == 128 && EltWidth == 32 && IsFloat)1660IID = Intrinsic::x86_avx512_vpermi2var_ps_128;1661else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)1662IID = Intrinsic::x86_avx512_vpermi2var_d_128;1663else if (VecWidth == 128 && EltWidth == 64 && IsFloat)1664IID = Intrinsic::x86_avx512_vpermi2var_pd_128;1665else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)1666IID = Intrinsic::x86_avx512_vpermi2var_q_128;1667else if (VecWidth == 256 && EltWidth == 32 && IsFloat)1668IID = Intrinsic::x86_avx512_vpermi2var_ps_256;1669else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)1670IID = Intrinsic::x86_avx512_vpermi2var_d_256;1671else if (VecWidth == 256 && EltWidth == 64 && IsFloat)1672IID = Intrinsic::x86_avx512_vpermi2var_pd_256;1673else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)1674IID = Intrinsic::x86_avx512_vpermi2var_q_256;1675else if (VecWidth == 512 && EltWidth == 32 && IsFloat)1676IID = Intrinsic::x86_avx512_vpermi2var_ps_512;1677else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)1678IID = Intrinsic::x86_avx512_vpermi2var_d_512;1679else if (VecWidth == 512 && EltWidth == 64 && IsFloat)1680IID = Intrinsic::x86_avx512_vpermi2var_pd_512;1681else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)1682IID = Intrinsic::x86_avx512_vpermi2var_q_512;1683else if (VecWidth == 128 && EltWidth == 16)1684IID = Intrinsic::x86_avx512_vpermi2var_hi_128;1685else if (VecWidth == 256 && EltWidth == 16)1686IID = Intrinsic::x86_avx512_vpermi2var_hi_256;1687else if (VecWidth == 512 && EltWidth == 16)1688IID = Intrinsic::x86_avx512_vpermi2var_hi_512;1689else if (VecWidth == 128 && EltWidth == 8)1690IID = Intrinsic::x86_avx512_vpermi2var_qi_128;1691else if (VecWidth == 256 && EltWidth == 8)1692IID = Intrinsic::x86_avx512_vpermi2var_qi_256;1693else if (VecWidth == 512 && EltWidth == 8)1694IID = Intrinsic::x86_avx512_vpermi2var_qi_512;1695else1696llvm_unreachable("Unexpected intrinsic");16971698Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),1699CI.getArgOperand(2) };17001701// If this isn't index form we need to swap operand 0 and 1.1702if (!IndexForm)1703std::swap(Args[0], Args[1]);17041705Value *V = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),1706Args);1707Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)1708: Builder.CreateBitCast(CI.getArgOperand(1),1709Ty);1710return emitX86Select(Builder, CI.getArgOperand(3), V, PassThru);1711}17121713static Value *upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI,1714Intrinsic::ID IID) {1715Type *Ty = CI.getType();1716Value *Op0 = CI.getOperand(0);1717Value *Op1 = CI.getOperand(1);1718Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);1719Value *Res = Builder.CreateCall(Intrin, {Op0, Op1});17201721if (CI.arg_size() == 4) { // For masked intrinsics.1722Value *VecSrc = CI.getOperand(2);1723Value *Mask = CI.getOperand(3);1724Res = emitX86Select(Builder, Mask, Res, VecSrc);1725}1726return Res;1727}17281729static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI,1730bool IsRotateRight) {1731Type *Ty = CI.getType();1732Value *Src = CI.getArgOperand(0);1733Value *Amt = CI.getArgOperand(1);17341735// Amount may be scalar immediate, in which case create a splat vector.1736// Funnel shifts amounts are treated as modulo and types are all power-of-2 so1737// we only care about the lowest log2 bits anyway.1738if (Amt->getType() != Ty) {1739unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();1740Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);1741Amt = Builder.CreateVectorSplat(NumElts, Amt);1742}17431744Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;1745Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);1746Value *Res = Builder.CreateCall(Intrin, {Src, Src, Amt});17471748if (CI.arg_size() == 4) { // For masked intrinsics.1749Value *VecSrc = CI.getOperand(2);1750Value *Mask = CI.getOperand(3);1751Res = emitX86Select(Builder, Mask, Res, VecSrc);1752}1753return Res;1754}17551756static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm,1757bool IsSigned) {1758Type *Ty = CI.getType();1759Value *LHS = CI.getArgOperand(0);1760Value *RHS = CI.getArgOperand(1);17611762CmpInst::Predicate Pred;1763switch (Imm) {1764case 0x0:1765Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;1766break;1767case 0x1:1768Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;1769break;1770case 0x2:1771Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;1772break;1773case 0x3:1774Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;1775break;1776case 0x4:1777Pred = ICmpInst::ICMP_EQ;1778break;1779case 0x5:1780Pred = ICmpInst::ICMP_NE;1781break;1782case 0x6:1783return Constant::getNullValue(Ty); // FALSE1784case 0x7:1785return Constant::getAllOnesValue(Ty); // TRUE1786default:1787llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");1788}17891790Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);1791Value *Ext = Builder.CreateSExt(Cmp, Ty);1792return Ext;1793}17941795static Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI,1796bool IsShiftRight, bool ZeroMask) {1797Type *Ty = CI.getType();1798Value *Op0 = CI.getArgOperand(0);1799Value *Op1 = CI.getArgOperand(1);1800Value *Amt = CI.getArgOperand(2);18011802if (IsShiftRight)1803std::swap(Op0, Op1);18041805// Amount may be scalar immediate, in which case create a splat vector.1806// Funnel shifts amounts are treated as modulo and types are all power-of-2 so1807// we only care about the lowest log2 bits anyway.1808if (Amt->getType() != Ty) {1809unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();1810Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);1811Amt = Builder.CreateVectorSplat(NumElts, Amt);1812}18131814Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;1815Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);1816Value *Res = Builder.CreateCall(Intrin, {Op0, Op1, Amt});18171818unsigned NumArgs = CI.arg_size();1819if (NumArgs >= 4) { // For masked intrinsics.1820Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :1821ZeroMask ? ConstantAggregateZero::get(CI.getType()) :1822CI.getArgOperand(0);1823Value *Mask = CI.getOperand(NumArgs - 1);1824Res = emitX86Select(Builder, Mask, Res, VecSrc);1825}1826return Res;1827}18281829static Value *upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data,1830Value *Mask, bool Aligned) {1831// Cast the pointer to the right type.1832Ptr = Builder.CreateBitCast(Ptr,1833llvm::PointerType::getUnqual(Data->getType()));1834const Align Alignment =1835Aligned1836? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)1837: Align(1);18381839// If the mask is all ones just emit a regular store.1840if (const auto *C = dyn_cast<Constant>(Mask))1841if (C->isAllOnesValue())1842return Builder.CreateAlignedStore(Data, Ptr, Alignment);18431844// Convert the mask from an integer type to a vector of i1.1845unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();1846Mask = getX86MaskVec(Builder, Mask, NumElts);1847return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);1848}18491850static Value *upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr,1851Value *Passthru, Value *Mask, bool Aligned) {1852Type *ValTy = Passthru->getType();1853// Cast the pointer to the right type.1854Ptr = Builder.CreateBitCast(Ptr, llvm::PointerType::getUnqual(ValTy));1855const Align Alignment =1856Aligned1857? Align(1858Passthru->getType()->getPrimitiveSizeInBits().getFixedValue() /18598)1860: Align(1);18611862// If the mask is all ones just emit a regular store.1863if (const auto *C = dyn_cast<Constant>(Mask))1864if (C->isAllOnesValue())1865return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);18661867// Convert the mask from an integer type to a vector of i1.1868unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();1869Mask = getX86MaskVec(Builder, Mask, NumElts);1870return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);1871}18721873static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) {1874Type *Ty = CI.getType();1875Value *Op0 = CI.getArgOperand(0);1876Function *F = Intrinsic::getDeclaration(CI.getModule(), Intrinsic::abs, Ty);1877Value *Res = Builder.CreateCall(F, {Op0, Builder.getInt1(false)});1878if (CI.arg_size() == 3)1879Res = emitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));1880return Res;1881}18821883static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {1884Type *Ty = CI.getType();18851886// Arguments have a vXi32 type so cast to vXi64.1887Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);1888Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);18891890if (IsSigned) {1891// Shift left then arithmetic shift right.1892Constant *ShiftAmt = ConstantInt::get(Ty, 32);1893LHS = Builder.CreateShl(LHS, ShiftAmt);1894LHS = Builder.CreateAShr(LHS, ShiftAmt);1895RHS = Builder.CreateShl(RHS, ShiftAmt);1896RHS = Builder.CreateAShr(RHS, ShiftAmt);1897} else {1898// Clear the upper bits.1899Constant *Mask = ConstantInt::get(Ty, 0xffffffff);1900LHS = Builder.CreateAnd(LHS, Mask);1901RHS = Builder.CreateAnd(RHS, Mask);1902}19031904Value *Res = Builder.CreateMul(LHS, RHS);19051906if (CI.arg_size() == 4)1907Res = emitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));19081909return Res;1910}19111912// Applying mask on vector of i1's and make sure result is at least 8 bits wide.1913static Value *applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec,1914Value *Mask) {1915unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();1916if (Mask) {1917const auto *C = dyn_cast<Constant>(Mask);1918if (!C || !C->isAllOnesValue())1919Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));1920}19211922if (NumElts < 8) {1923int Indices[8];1924for (unsigned i = 0; i != NumElts; ++i)1925Indices[i] = i;1926for (unsigned i = NumElts; i != 8; ++i)1927Indices[i] = NumElts + i % NumElts;1928Vec = Builder.CreateShuffleVector(Vec,1929Constant::getNullValue(Vec->getType()),1930Indices);1931}1932return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));1933}19341935static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI,1936unsigned CC, bool Signed) {1937Value *Op0 = CI.getArgOperand(0);1938unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();19391940Value *Cmp;1941if (CC == 3) {1942Cmp = Constant::getNullValue(1943FixedVectorType::get(Builder.getInt1Ty(), NumElts));1944} else if (CC == 7) {1945Cmp = Constant::getAllOnesValue(1946FixedVectorType::get(Builder.getInt1Ty(), NumElts));1947} else {1948ICmpInst::Predicate Pred;1949switch (CC) {1950default: llvm_unreachable("Unknown condition code");1951case 0: Pred = ICmpInst::ICMP_EQ; break;1952case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;1953case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;1954case 4: Pred = ICmpInst::ICMP_NE; break;1955case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;1956case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;1957}1958Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));1959}19601961Value *Mask = CI.getArgOperand(CI.arg_size() - 1);19621963return applyX86MaskOn1BitsVec(Builder, Cmp, Mask);1964}19651966// Replace a masked intrinsic with an older unmasked intrinsic.1967static Value *upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI,1968Intrinsic::ID IID) {1969Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID);1970Value *Rep = Builder.CreateCall(Intrin,1971{ CI.getArgOperand(0), CI.getArgOperand(1) });1972return emitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));1973}19741975static Value *upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI) {1976Value* A = CI.getArgOperand(0);1977Value* B = CI.getArgOperand(1);1978Value* Src = CI.getArgOperand(2);1979Value* Mask = CI.getArgOperand(3);19801981Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));1982Value* Cmp = Builder.CreateIsNotNull(AndNode);1983Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);1984Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);1985Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);1986return Builder.CreateInsertElement(A, Select, (uint64_t)0);1987}19881989static Value *upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI) {1990Value* Op = CI.getArgOperand(0);1991Type* ReturnOp = CI.getType();1992unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();1993Value *Mask = getX86MaskVec(Builder, Op, NumElts);1994return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");1995}19961997// Replace intrinsic with unmasked version and a select.1998static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,1999CallBase &CI, Value *&Rep) {2000Name = Name.substr(12); // Remove avx512.mask.20012002unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();2003unsigned EltWidth = CI.getType()->getScalarSizeInBits();2004Intrinsic::ID IID;2005if (Name.starts_with("max.p")) {2006if (VecWidth == 128 && EltWidth == 32)2007IID = Intrinsic::x86_sse_max_ps;2008else if (VecWidth == 128 && EltWidth == 64)2009IID = Intrinsic::x86_sse2_max_pd;2010else if (VecWidth == 256 && EltWidth == 32)2011IID = Intrinsic::x86_avx_max_ps_256;2012else if (VecWidth == 256 && EltWidth == 64)2013IID = Intrinsic::x86_avx_max_pd_256;2014else2015llvm_unreachable("Unexpected intrinsic");2016} else if (Name.starts_with("min.p")) {2017if (VecWidth == 128 && EltWidth == 32)2018IID = Intrinsic::x86_sse_min_ps;2019else if (VecWidth == 128 && EltWidth == 64)2020IID = Intrinsic::x86_sse2_min_pd;2021else if (VecWidth == 256 && EltWidth == 32)2022IID = Intrinsic::x86_avx_min_ps_256;2023else if (VecWidth == 256 && EltWidth == 64)2024IID = Intrinsic::x86_avx_min_pd_256;2025else2026llvm_unreachable("Unexpected intrinsic");2027} else if (Name.starts_with("pshuf.b.")) {2028if (VecWidth == 128)2029IID = Intrinsic::x86_ssse3_pshuf_b_128;2030else if (VecWidth == 256)2031IID = Intrinsic::x86_avx2_pshuf_b;2032else if (VecWidth == 512)2033IID = Intrinsic::x86_avx512_pshuf_b_512;2034else2035llvm_unreachable("Unexpected intrinsic");2036} else if (Name.starts_with("pmul.hr.sw.")) {2037if (VecWidth == 128)2038IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;2039else if (VecWidth == 256)2040IID = Intrinsic::x86_avx2_pmul_hr_sw;2041else if (VecWidth == 512)2042IID = Intrinsic::x86_avx512_pmul_hr_sw_512;2043else2044llvm_unreachable("Unexpected intrinsic");2045} else if (Name.starts_with("pmulh.w.")) {2046if (VecWidth == 128)2047IID = Intrinsic::x86_sse2_pmulh_w;2048else if (VecWidth == 256)2049IID = Intrinsic::x86_avx2_pmulh_w;2050else if (VecWidth == 512)2051IID = Intrinsic::x86_avx512_pmulh_w_512;2052else2053llvm_unreachable("Unexpected intrinsic");2054} else if (Name.starts_with("pmulhu.w.")) {2055if (VecWidth == 128)2056IID = Intrinsic::x86_sse2_pmulhu_w;2057else if (VecWidth == 256)2058IID = Intrinsic::x86_avx2_pmulhu_w;2059else if (VecWidth == 512)2060IID = Intrinsic::x86_avx512_pmulhu_w_512;2061else2062llvm_unreachable("Unexpected intrinsic");2063} else if (Name.starts_with("pmaddw.d.")) {2064if (VecWidth == 128)2065IID = Intrinsic::x86_sse2_pmadd_wd;2066else if (VecWidth == 256)2067IID = Intrinsic::x86_avx2_pmadd_wd;2068else if (VecWidth == 512)2069IID = Intrinsic::x86_avx512_pmaddw_d_512;2070else2071llvm_unreachable("Unexpected intrinsic");2072} else if (Name.starts_with("pmaddubs.w.")) {2073if (VecWidth == 128)2074IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;2075else if (VecWidth == 256)2076IID = Intrinsic::x86_avx2_pmadd_ub_sw;2077else if (VecWidth == 512)2078IID = Intrinsic::x86_avx512_pmaddubs_w_512;2079else2080llvm_unreachable("Unexpected intrinsic");2081} else if (Name.starts_with("packsswb.")) {2082if (VecWidth == 128)2083IID = Intrinsic::x86_sse2_packsswb_128;2084else if (VecWidth == 256)2085IID = Intrinsic::x86_avx2_packsswb;2086else if (VecWidth == 512)2087IID = Intrinsic::x86_avx512_packsswb_512;2088else2089llvm_unreachable("Unexpected intrinsic");2090} else if (Name.starts_with("packssdw.")) {2091if (VecWidth == 128)2092IID = Intrinsic::x86_sse2_packssdw_128;2093else if (VecWidth == 256)2094IID = Intrinsic::x86_avx2_packssdw;2095else if (VecWidth == 512)2096IID = Intrinsic::x86_avx512_packssdw_512;2097else2098llvm_unreachable("Unexpected intrinsic");2099} else if (Name.starts_with("packuswb.")) {2100if (VecWidth == 128)2101IID = Intrinsic::x86_sse2_packuswb_128;2102else if (VecWidth == 256)2103IID = Intrinsic::x86_avx2_packuswb;2104else if (VecWidth == 512)2105IID = Intrinsic::x86_avx512_packuswb_512;2106else2107llvm_unreachable("Unexpected intrinsic");2108} else if (Name.starts_with("packusdw.")) {2109if (VecWidth == 128)2110IID = Intrinsic::x86_sse41_packusdw;2111else if (VecWidth == 256)2112IID = Intrinsic::x86_avx2_packusdw;2113else if (VecWidth == 512)2114IID = Intrinsic::x86_avx512_packusdw_512;2115else2116llvm_unreachable("Unexpected intrinsic");2117} else if (Name.starts_with("vpermilvar.")) {2118if (VecWidth == 128 && EltWidth == 32)2119IID = Intrinsic::x86_avx_vpermilvar_ps;2120else if (VecWidth == 128 && EltWidth == 64)2121IID = Intrinsic::x86_avx_vpermilvar_pd;2122else if (VecWidth == 256 && EltWidth == 32)2123IID = Intrinsic::x86_avx_vpermilvar_ps_256;2124else if (VecWidth == 256 && EltWidth == 64)2125IID = Intrinsic::x86_avx_vpermilvar_pd_256;2126else if (VecWidth == 512 && EltWidth == 32)2127IID = Intrinsic::x86_avx512_vpermilvar_ps_512;2128else if (VecWidth == 512 && EltWidth == 64)2129IID = Intrinsic::x86_avx512_vpermilvar_pd_512;2130else2131llvm_unreachable("Unexpected intrinsic");2132} else if (Name == "cvtpd2dq.256") {2133IID = Intrinsic::x86_avx_cvt_pd2dq_256;2134} else if (Name == "cvtpd2ps.256") {2135IID = Intrinsic::x86_avx_cvt_pd2_ps_256;2136} else if (Name == "cvttpd2dq.256") {2137IID = Intrinsic::x86_avx_cvtt_pd2dq_256;2138} else if (Name == "cvttps2dq.128") {2139IID = Intrinsic::x86_sse2_cvttps2dq;2140} else if (Name == "cvttps2dq.256") {2141IID = Intrinsic::x86_avx_cvtt_ps2dq_256;2142} else if (Name.starts_with("permvar.")) {2143bool IsFloat = CI.getType()->isFPOrFPVectorTy();2144if (VecWidth == 256 && EltWidth == 32 && IsFloat)2145IID = Intrinsic::x86_avx2_permps;2146else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)2147IID = Intrinsic::x86_avx2_permd;2148else if (VecWidth == 256 && EltWidth == 64 && IsFloat)2149IID = Intrinsic::x86_avx512_permvar_df_256;2150else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)2151IID = Intrinsic::x86_avx512_permvar_di_256;2152else if (VecWidth == 512 && EltWidth == 32 && IsFloat)2153IID = Intrinsic::x86_avx512_permvar_sf_512;2154else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)2155IID = Intrinsic::x86_avx512_permvar_si_512;2156else if (VecWidth == 512 && EltWidth == 64 && IsFloat)2157IID = Intrinsic::x86_avx512_permvar_df_512;2158else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)2159IID = Intrinsic::x86_avx512_permvar_di_512;2160else if (VecWidth == 128 && EltWidth == 16)2161IID = Intrinsic::x86_avx512_permvar_hi_128;2162else if (VecWidth == 256 && EltWidth == 16)2163IID = Intrinsic::x86_avx512_permvar_hi_256;2164else if (VecWidth == 512 && EltWidth == 16)2165IID = Intrinsic::x86_avx512_permvar_hi_512;2166else if (VecWidth == 128 && EltWidth == 8)2167IID = Intrinsic::x86_avx512_permvar_qi_128;2168else if (VecWidth == 256 && EltWidth == 8)2169IID = Intrinsic::x86_avx512_permvar_qi_256;2170else if (VecWidth == 512 && EltWidth == 8)2171IID = Intrinsic::x86_avx512_permvar_qi_512;2172else2173llvm_unreachable("Unexpected intrinsic");2174} else if (Name.starts_with("dbpsadbw.")) {2175if (VecWidth == 128)2176IID = Intrinsic::x86_avx512_dbpsadbw_128;2177else if (VecWidth == 256)2178IID = Intrinsic::x86_avx512_dbpsadbw_256;2179else if (VecWidth == 512)2180IID = Intrinsic::x86_avx512_dbpsadbw_512;2181else2182llvm_unreachable("Unexpected intrinsic");2183} else if (Name.starts_with("pmultishift.qb.")) {2184if (VecWidth == 128)2185IID = Intrinsic::x86_avx512_pmultishift_qb_128;2186else if (VecWidth == 256)2187IID = Intrinsic::x86_avx512_pmultishift_qb_256;2188else if (VecWidth == 512)2189IID = Intrinsic::x86_avx512_pmultishift_qb_512;2190else2191llvm_unreachable("Unexpected intrinsic");2192} else if (Name.starts_with("conflict.")) {2193if (Name[9] == 'd' && VecWidth == 128)2194IID = Intrinsic::x86_avx512_conflict_d_128;2195else if (Name[9] == 'd' && VecWidth == 256)2196IID = Intrinsic::x86_avx512_conflict_d_256;2197else if (Name[9] == 'd' && VecWidth == 512)2198IID = Intrinsic::x86_avx512_conflict_d_512;2199else if (Name[9] == 'q' && VecWidth == 128)2200IID = Intrinsic::x86_avx512_conflict_q_128;2201else if (Name[9] == 'q' && VecWidth == 256)2202IID = Intrinsic::x86_avx512_conflict_q_256;2203else if (Name[9] == 'q' && VecWidth == 512)2204IID = Intrinsic::x86_avx512_conflict_q_512;2205else2206llvm_unreachable("Unexpected intrinsic");2207} else if (Name.starts_with("pavg.")) {2208if (Name[5] == 'b' && VecWidth == 128)2209IID = Intrinsic::x86_sse2_pavg_b;2210else if (Name[5] == 'b' && VecWidth == 256)2211IID = Intrinsic::x86_avx2_pavg_b;2212else if (Name[5] == 'b' && VecWidth == 512)2213IID = Intrinsic::x86_avx512_pavg_b_512;2214else if (Name[5] == 'w' && VecWidth == 128)2215IID = Intrinsic::x86_sse2_pavg_w;2216else if (Name[5] == 'w' && VecWidth == 256)2217IID = Intrinsic::x86_avx2_pavg_w;2218else if (Name[5] == 'w' && VecWidth == 512)2219IID = Intrinsic::x86_avx512_pavg_w_512;2220else2221llvm_unreachable("Unexpected intrinsic");2222} else2223return false;22242225SmallVector<Value *, 4> Args(CI.args());2226Args.pop_back();2227Args.pop_back();2228Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),2229Args);2230unsigned NumArgs = CI.arg_size();2231Rep = emitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,2232CI.getArgOperand(NumArgs - 2));2233return true;2234}22352236/// Upgrade comment in call to inline asm that represents an objc retain release2237/// marker.2238void llvm::UpgradeInlineAsmString(std::string *AsmStr) {2239size_t Pos;2240if (AsmStr->find("mov\tfp") == 0 &&2241AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&2242(Pos = AsmStr->find("# marker")) != std::string::npos) {2243AsmStr->replace(Pos, 1, ";");2244}2245}22462247static Value *upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F,2248IRBuilder<> &Builder) {2249LLVMContext &C = F->getContext();2250Value *Rep = nullptr;22512252if (Name.starts_with("sse4a.movnt.")) {2253SmallVector<Metadata *, 1> Elts;2254Elts.push_back(2255ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));2256MDNode *Node = MDNode::get(C, Elts);22572258Value *Arg0 = CI->getArgOperand(0);2259Value *Arg1 = CI->getArgOperand(1);22602261// Nontemporal (unaligned) store of the 0'th element of the float/double2262// vector.2263Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();2264PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);2265Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");2266Value *Extract =2267Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");22682269StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, Align(1));2270SI->setMetadata(LLVMContext::MD_nontemporal, Node);2271} else if (Name.starts_with("avx.movnt.") ||2272Name.starts_with("avx512.storent.")) {2273SmallVector<Metadata *, 1> Elts;2274Elts.push_back(2275ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));2276MDNode *Node = MDNode::get(C, Elts);22772278Value *Arg0 = CI->getArgOperand(0);2279Value *Arg1 = CI->getArgOperand(1);22802281// Convert the type of the pointer to a pointer to the stored type.2282Value *BC = Builder.CreateBitCast(2283Arg0, PointerType::getUnqual(Arg1->getType()), "cast");2284StoreInst *SI = Builder.CreateAlignedStore(2285Arg1, BC,2286Align(Arg1->getType()->getPrimitiveSizeInBits().getFixedValue() / 8));2287SI->setMetadata(LLVMContext::MD_nontemporal, Node);2288} else if (Name == "sse2.storel.dq") {2289Value *Arg0 = CI->getArgOperand(0);2290Value *Arg1 = CI->getArgOperand(1);22912292auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);2293Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");2294Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);2295Value *BC = Builder.CreateBitCast(2296Arg0, PointerType::getUnqual(Elt->getType()), "cast");2297Builder.CreateAlignedStore(Elt, BC, Align(1));2298} else if (Name.starts_with("sse.storeu.") ||2299Name.starts_with("sse2.storeu.") ||2300Name.starts_with("avx.storeu.")) {2301Value *Arg0 = CI->getArgOperand(0);2302Value *Arg1 = CI->getArgOperand(1);23032304Arg0 = Builder.CreateBitCast(Arg0, PointerType::getUnqual(Arg1->getType()),2305"cast");2306Builder.CreateAlignedStore(Arg1, Arg0, Align(1));2307} else if (Name == "avx512.mask.store.ss") {2308Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));2309upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),2310Mask, false);2311} else if (Name.starts_with("avx512.mask.store")) {2312// "avx512.mask.storeu." or "avx512.mask.store."2313bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".2314upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),2315CI->getArgOperand(2), Aligned);2316} else if (Name.starts_with("sse2.pcmp") || Name.starts_with("avx2.pcmp")) {2317// Upgrade packed integer vector compare intrinsics to compare instructions.2318// "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."2319bool CmpEq = Name[9] == 'e';2320Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,2321CI->getArgOperand(0), CI->getArgOperand(1));2322Rep = Builder.CreateSExt(Rep, CI->getType(), "");2323} else if (Name.starts_with("avx512.broadcastm")) {2324Type *ExtTy = Type::getInt32Ty(C);2325if (CI->getOperand(0)->getType()->isIntegerTy(8))2326ExtTy = Type::getInt64Ty(C);2327unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /2328ExtTy->getPrimitiveSizeInBits();2329Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);2330Rep = Builder.CreateVectorSplat(NumElts, Rep);2331} else if (Name == "sse.sqrt.ss" || Name == "sse2.sqrt.sd") {2332Value *Vec = CI->getArgOperand(0);2333Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);2334Function *Intr = Intrinsic::getDeclaration(F->getParent(), Intrinsic::sqrt,2335Elt0->getType());2336Elt0 = Builder.CreateCall(Intr, Elt0);2337Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);2338} else if (Name.starts_with("avx.sqrt.p") ||2339Name.starts_with("sse2.sqrt.p") ||2340Name.starts_with("sse.sqrt.p")) {2341Rep =2342Builder.CreateCall(Intrinsic::getDeclaration(2343F->getParent(), Intrinsic::sqrt, CI->getType()),2344{CI->getArgOperand(0)});2345} else if (Name.starts_with("avx512.mask.sqrt.p")) {2346if (CI->arg_size() == 4 &&2347(!isa<ConstantInt>(CI->getArgOperand(3)) ||2348cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {2349Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_5122350: Intrinsic::x86_avx512_sqrt_pd_512;23512352Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(3)};2353Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),2354Args);2355} else {2356Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),2357Intrinsic::sqrt,2358CI->getType()),2359{CI->getArgOperand(0)});2360}2361Rep =2362emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));2363} else if (Name.starts_with("avx512.ptestm") ||2364Name.starts_with("avx512.ptestnm")) {2365Value *Op0 = CI->getArgOperand(0);2366Value *Op1 = CI->getArgOperand(1);2367Value *Mask = CI->getArgOperand(2);2368Rep = Builder.CreateAnd(Op0, Op1);2369llvm::Type *Ty = Op0->getType();2370Value *Zero = llvm::Constant::getNullValue(Ty);2371ICmpInst::Predicate Pred = Name.starts_with("avx512.ptestm")2372? ICmpInst::ICMP_NE2373: ICmpInst::ICMP_EQ;2374Rep = Builder.CreateICmp(Pred, Rep, Zero);2375Rep = applyX86MaskOn1BitsVec(Builder, Rep, Mask);2376} else if (Name.starts_with("avx512.mask.pbroadcast")) {2377unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())2378->getNumElements();2379Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));2380Rep =2381emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));2382} else if (Name.starts_with("avx512.kunpck")) {2383unsigned NumElts = CI->getType()->getScalarSizeInBits();2384Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);2385Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);2386int Indices[64];2387for (unsigned i = 0; i != NumElts; ++i)2388Indices[i] = i;23892390// First extract half of each vector. This gives better codegen than2391// doing it in a single shuffle.2392LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));2393RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));2394// Concat the vectors.2395// NOTE: Operands have to be swapped to match intrinsic definition.2396Rep = Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));2397Rep = Builder.CreateBitCast(Rep, CI->getType());2398} else if (Name == "avx512.kand.w") {2399Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);2400Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);2401Rep = Builder.CreateAnd(LHS, RHS);2402Rep = Builder.CreateBitCast(Rep, CI->getType());2403} else if (Name == "avx512.kandn.w") {2404Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);2405Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);2406LHS = Builder.CreateNot(LHS);2407Rep = Builder.CreateAnd(LHS, RHS);2408Rep = Builder.CreateBitCast(Rep, CI->getType());2409} else if (Name == "avx512.kor.w") {2410Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);2411Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);2412Rep = Builder.CreateOr(LHS, RHS);2413Rep = Builder.CreateBitCast(Rep, CI->getType());2414} else if (Name == "avx512.kxor.w") {2415Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);2416Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);2417Rep = Builder.CreateXor(LHS, RHS);2418Rep = Builder.CreateBitCast(Rep, CI->getType());2419} else if (Name == "avx512.kxnor.w") {2420Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);2421Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);2422LHS = Builder.CreateNot(LHS);2423Rep = Builder.CreateXor(LHS, RHS);2424Rep = Builder.CreateBitCast(Rep, CI->getType());2425} else if (Name == "avx512.knot.w") {2426Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);2427Rep = Builder.CreateNot(Rep);2428Rep = Builder.CreateBitCast(Rep, CI->getType());2429} else if (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w") {2430Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);2431Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);2432Rep = Builder.CreateOr(LHS, RHS);2433Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());2434Value *C;2435if (Name[14] == 'c')2436C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());2437else2438C = ConstantInt::getNullValue(Builder.getInt16Ty());2439Rep = Builder.CreateICmpEQ(Rep, C);2440Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());2441} else if (Name == "sse.add.ss" || Name == "sse2.add.sd" ||2442Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||2443Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||2444Name == "sse.div.ss" || Name == "sse2.div.sd") {2445Type *I32Ty = Type::getInt32Ty(C);2446Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),2447ConstantInt::get(I32Ty, 0));2448Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),2449ConstantInt::get(I32Ty, 0));2450Value *EltOp;2451if (Name.contains(".add."))2452EltOp = Builder.CreateFAdd(Elt0, Elt1);2453else if (Name.contains(".sub."))2454EltOp = Builder.CreateFSub(Elt0, Elt1);2455else if (Name.contains(".mul."))2456EltOp = Builder.CreateFMul(Elt0, Elt1);2457else2458EltOp = Builder.CreateFDiv(Elt0, Elt1);2459Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,2460ConstantInt::get(I32Ty, 0));2461} else if (Name.starts_with("avx512.mask.pcmp")) {2462// "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."2463bool CmpEq = Name[16] == 'e';2464Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);2465} else if (Name.starts_with("avx512.mask.vpshufbitqmb.")) {2466Type *OpTy = CI->getArgOperand(0)->getType();2467unsigned VecWidth = OpTy->getPrimitiveSizeInBits();2468Intrinsic::ID IID;2469switch (VecWidth) {2470default:2471llvm_unreachable("Unexpected intrinsic");2472case 128:2473IID = Intrinsic::x86_avx512_vpshufbitqmb_128;2474break;2475case 256:2476IID = Intrinsic::x86_avx512_vpshufbitqmb_256;2477break;2478case 512:2479IID = Intrinsic::x86_avx512_vpshufbitqmb_512;2480break;2481}24822483Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),2484{CI->getOperand(0), CI->getArgOperand(1)});2485Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));2486} else if (Name.starts_with("avx512.mask.fpclass.p")) {2487Type *OpTy = CI->getArgOperand(0)->getType();2488unsigned VecWidth = OpTy->getPrimitiveSizeInBits();2489unsigned EltWidth = OpTy->getScalarSizeInBits();2490Intrinsic::ID IID;2491if (VecWidth == 128 && EltWidth == 32)2492IID = Intrinsic::x86_avx512_fpclass_ps_128;2493else if (VecWidth == 256 && EltWidth == 32)2494IID = Intrinsic::x86_avx512_fpclass_ps_256;2495else if (VecWidth == 512 && EltWidth == 32)2496IID = Intrinsic::x86_avx512_fpclass_ps_512;2497else if (VecWidth == 128 && EltWidth == 64)2498IID = Intrinsic::x86_avx512_fpclass_pd_128;2499else if (VecWidth == 256 && EltWidth == 64)2500IID = Intrinsic::x86_avx512_fpclass_pd_256;2501else if (VecWidth == 512 && EltWidth == 64)2502IID = Intrinsic::x86_avx512_fpclass_pd_512;2503else2504llvm_unreachable("Unexpected intrinsic");25052506Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),2507{CI->getOperand(0), CI->getArgOperand(1)});2508Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));2509} else if (Name.starts_with("avx512.cmp.p")) {2510SmallVector<Value *, 4> Args(CI->args());2511Type *OpTy = Args[0]->getType();2512unsigned VecWidth = OpTy->getPrimitiveSizeInBits();2513unsigned EltWidth = OpTy->getScalarSizeInBits();2514Intrinsic::ID IID;2515if (VecWidth == 128 && EltWidth == 32)2516IID = Intrinsic::x86_avx512_mask_cmp_ps_128;2517else if (VecWidth == 256 && EltWidth == 32)2518IID = Intrinsic::x86_avx512_mask_cmp_ps_256;2519else if (VecWidth == 512 && EltWidth == 32)2520IID = Intrinsic::x86_avx512_mask_cmp_ps_512;2521else if (VecWidth == 128 && EltWidth == 64)2522IID = Intrinsic::x86_avx512_mask_cmp_pd_128;2523else if (VecWidth == 256 && EltWidth == 64)2524IID = Intrinsic::x86_avx512_mask_cmp_pd_256;2525else if (VecWidth == 512 && EltWidth == 64)2526IID = Intrinsic::x86_avx512_mask_cmp_pd_512;2527else2528llvm_unreachable("Unexpected intrinsic");25292530Value *Mask = Constant::getAllOnesValue(CI->getType());2531if (VecWidth == 512)2532std::swap(Mask, Args.back());2533Args.push_back(Mask);25342535Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),2536Args);2537} else if (Name.starts_with("avx512.mask.cmp.")) {2538// Integer compare intrinsics.2539unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();2540Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);2541} else if (Name.starts_with("avx512.mask.ucmp.")) {2542unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();2543Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);2544} else if (Name.starts_with("avx512.cvtb2mask.") ||2545Name.starts_with("avx512.cvtw2mask.") ||2546Name.starts_with("avx512.cvtd2mask.") ||2547Name.starts_with("avx512.cvtq2mask.")) {2548Value *Op = CI->getArgOperand(0);2549Value *Zero = llvm::Constant::getNullValue(Op->getType());2550Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);2551Rep = applyX86MaskOn1BitsVec(Builder, Rep, nullptr);2552} else if (Name == "ssse3.pabs.b.128" || Name == "ssse3.pabs.w.128" ||2553Name == "ssse3.pabs.d.128" || Name.starts_with("avx2.pabs") ||2554Name.starts_with("avx512.mask.pabs")) {2555Rep = upgradeAbs(Builder, *CI);2556} else if (Name == "sse41.pmaxsb" || Name == "sse2.pmaxs.w" ||2557Name == "sse41.pmaxsd" || Name.starts_with("avx2.pmaxs") ||2558Name.starts_with("avx512.mask.pmaxs")) {2559Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);2560} else if (Name == "sse2.pmaxu.b" || Name == "sse41.pmaxuw" ||2561Name == "sse41.pmaxud" || Name.starts_with("avx2.pmaxu") ||2562Name.starts_with("avx512.mask.pmaxu")) {2563Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);2564} else if (Name == "sse41.pminsb" || Name == "sse2.pmins.w" ||2565Name == "sse41.pminsd" || Name.starts_with("avx2.pmins") ||2566Name.starts_with("avx512.mask.pmins")) {2567Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);2568} else if (Name == "sse2.pminu.b" || Name == "sse41.pminuw" ||2569Name == "sse41.pminud" || Name.starts_with("avx2.pminu") ||2570Name.starts_with("avx512.mask.pminu")) {2571Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);2572} else if (Name == "sse2.pmulu.dq" || Name == "avx2.pmulu.dq" ||2573Name == "avx512.pmulu.dq.512" ||2574Name.starts_with("avx512.mask.pmulu.dq.")) {2575Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ false);2576} else if (Name == "sse41.pmuldq" || Name == "avx2.pmul.dq" ||2577Name == "avx512.pmul.dq.512" ||2578Name.starts_with("avx512.mask.pmul.dq.")) {2579Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ true);2580} else if (Name == "sse.cvtsi2ss" || Name == "sse2.cvtsi2sd" ||2581Name == "sse.cvtsi642ss" || Name == "sse2.cvtsi642sd") {2582Rep =2583Builder.CreateSIToFP(CI->getArgOperand(1),2584cast<VectorType>(CI->getType())->getElementType());2585Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);2586} else if (Name == "avx512.cvtusi2sd") {2587Rep =2588Builder.CreateUIToFP(CI->getArgOperand(1),2589cast<VectorType>(CI->getType())->getElementType());2590Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);2591} else if (Name == "sse2.cvtss2sd") {2592Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);2593Rep = Builder.CreateFPExt(2594Rep, cast<VectorType>(CI->getType())->getElementType());2595Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);2596} else if (Name == "sse2.cvtdq2pd" || Name == "sse2.cvtdq2ps" ||2597Name == "avx.cvtdq2.pd.256" || Name == "avx.cvtdq2.ps.256" ||2598Name.starts_with("avx512.mask.cvtdq2pd.") ||2599Name.starts_with("avx512.mask.cvtudq2pd.") ||2600Name.starts_with("avx512.mask.cvtdq2ps.") ||2601Name.starts_with("avx512.mask.cvtudq2ps.") ||2602Name.starts_with("avx512.mask.cvtqq2pd.") ||2603Name.starts_with("avx512.mask.cvtuqq2pd.") ||2604Name == "avx512.mask.cvtqq2ps.256" ||2605Name == "avx512.mask.cvtqq2ps.512" ||2606Name == "avx512.mask.cvtuqq2ps.256" ||2607Name == "avx512.mask.cvtuqq2ps.512" || Name == "sse2.cvtps2pd" ||2608Name == "avx.cvt.ps2.pd.256" ||2609Name == "avx512.mask.cvtps2pd.128" ||2610Name == "avx512.mask.cvtps2pd.256") {2611auto *DstTy = cast<FixedVectorType>(CI->getType());2612Rep = CI->getArgOperand(0);2613auto *SrcTy = cast<FixedVectorType>(Rep->getType());26142615unsigned NumDstElts = DstTy->getNumElements();2616if (NumDstElts < SrcTy->getNumElements()) {2617assert(NumDstElts == 2 && "Unexpected vector size");2618Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});2619}26202621bool IsPS2PD = SrcTy->getElementType()->isFloatTy();2622bool IsUnsigned = Name.contains("cvtu");2623if (IsPS2PD)2624Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");2625else if (CI->arg_size() == 4 &&2626(!isa<ConstantInt>(CI->getArgOperand(3)) ||2627cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {2628Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round2629: Intrinsic::x86_avx512_sitofp_round;2630Function *F =2631Intrinsic::getDeclaration(CI->getModule(), IID, {DstTy, SrcTy});2632Rep = Builder.CreateCall(F, {Rep, CI->getArgOperand(3)});2633} else {2634Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")2635: Builder.CreateSIToFP(Rep, DstTy, "cvt");2636}26372638if (CI->arg_size() >= 3)2639Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,2640CI->getArgOperand(1));2641} else if (Name.starts_with("avx512.mask.vcvtph2ps.") ||2642Name.starts_with("vcvtph2ps.")) {2643auto *DstTy = cast<FixedVectorType>(CI->getType());2644Rep = CI->getArgOperand(0);2645auto *SrcTy = cast<FixedVectorType>(Rep->getType());2646unsigned NumDstElts = DstTy->getNumElements();2647if (NumDstElts != SrcTy->getNumElements()) {2648assert(NumDstElts == 4 && "Unexpected vector size");2649Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});2650}2651Rep = Builder.CreateBitCast(2652Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));2653Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");2654if (CI->arg_size() >= 3)2655Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,2656CI->getArgOperand(1));2657} else if (Name.starts_with("avx512.mask.load")) {2658// "avx512.mask.loadu." or "avx512.mask.load."2659bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".2660Rep = upgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1),2661CI->getArgOperand(2), Aligned);2662} else if (Name.starts_with("avx512.mask.expand.load.")) {2663auto *ResultTy = cast<FixedVectorType>(CI->getType());2664Type *PtrTy = ResultTy->getElementType();26652666// Cast the pointer to element type.2667Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),2668llvm::PointerType::getUnqual(PtrTy));26692670Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),2671ResultTy->getNumElements());26722673Function *ELd = Intrinsic::getDeclaration(2674F->getParent(), Intrinsic::masked_expandload, ResultTy);2675Rep = Builder.CreateCall(ELd, {Ptr, MaskVec, CI->getOperand(1)});2676} else if (Name.starts_with("avx512.mask.compress.store.")) {2677auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());2678Type *PtrTy = ResultTy->getElementType();26792680// Cast the pointer to element type.2681Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),2682llvm::PointerType::getUnqual(PtrTy));26832684Value *MaskVec =2685getX86MaskVec(Builder, CI->getArgOperand(2),2686cast<FixedVectorType>(ResultTy)->getNumElements());26872688Function *CSt = Intrinsic::getDeclaration(2689F->getParent(), Intrinsic::masked_compressstore, ResultTy);2690Rep = Builder.CreateCall(CSt, {CI->getArgOperand(1), Ptr, MaskVec});2691} else if (Name.starts_with("avx512.mask.compress.") ||2692Name.starts_with("avx512.mask.expand.")) {2693auto *ResultTy = cast<FixedVectorType>(CI->getType());26942695Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),2696ResultTy->getNumElements());26972698bool IsCompress = Name[12] == 'c';2699Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress2700: Intrinsic::x86_avx512_mask_expand;2701Function *Intr = Intrinsic::getDeclaration(F->getParent(), IID, ResultTy);2702Rep = Builder.CreateCall(Intr,2703{CI->getOperand(0), CI->getOperand(1), MaskVec});2704} else if (Name.starts_with("xop.vpcom")) {2705bool IsSigned;2706if (Name.ends_with("ub") || Name.ends_with("uw") || Name.ends_with("ud") ||2707Name.ends_with("uq"))2708IsSigned = false;2709else if (Name.ends_with("b") || Name.ends_with("w") ||2710Name.ends_with("d") || Name.ends_with("q"))2711IsSigned = true;2712else2713llvm_unreachable("Unknown suffix");27142715unsigned Imm;2716if (CI->arg_size() == 3) {2717Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();2718} else {2719Name = Name.substr(9); // strip off "xop.vpcom"2720if (Name.starts_with("lt"))2721Imm = 0;2722else if (Name.starts_with("le"))2723Imm = 1;2724else if (Name.starts_with("gt"))2725Imm = 2;2726else if (Name.starts_with("ge"))2727Imm = 3;2728else if (Name.starts_with("eq"))2729Imm = 4;2730else if (Name.starts_with("ne"))2731Imm = 5;2732else if (Name.starts_with("false"))2733Imm = 6;2734else if (Name.starts_with("true"))2735Imm = 7;2736else2737llvm_unreachable("Unknown condition");2738}27392740Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);2741} else if (Name.starts_with("xop.vpcmov")) {2742Value *Sel = CI->getArgOperand(2);2743Value *NotSel = Builder.CreateNot(Sel);2744Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);2745Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);2746Rep = Builder.CreateOr(Sel0, Sel1);2747} else if (Name.starts_with("xop.vprot") || Name.starts_with("avx512.prol") ||2748Name.starts_with("avx512.mask.prol")) {2749Rep = upgradeX86Rotate(Builder, *CI, false);2750} else if (Name.starts_with("avx512.pror") ||2751Name.starts_with("avx512.mask.pror")) {2752Rep = upgradeX86Rotate(Builder, *CI, true);2753} else if (Name.starts_with("avx512.vpshld.") ||2754Name.starts_with("avx512.mask.vpshld") ||2755Name.starts_with("avx512.maskz.vpshld")) {2756bool ZeroMask = Name[11] == 'z';2757Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);2758} else if (Name.starts_with("avx512.vpshrd.") ||2759Name.starts_with("avx512.mask.vpshrd") ||2760Name.starts_with("avx512.maskz.vpshrd")) {2761bool ZeroMask = Name[11] == 'z';2762Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);2763} else if (Name == "sse42.crc32.64.8") {2764Function *CRC32 = Intrinsic::getDeclaration(2765F->getParent(), Intrinsic::x86_sse42_crc32_32_8);2766Value *Trunc0 =2767Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));2768Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});2769Rep = Builder.CreateZExt(Rep, CI->getType(), "");2770} else if (Name.starts_with("avx.vbroadcast.s") ||2771Name.starts_with("avx512.vbroadcast.s")) {2772// Replace broadcasts with a series of insertelements.2773auto *VecTy = cast<FixedVectorType>(CI->getType());2774Type *EltTy = VecTy->getElementType();2775unsigned EltNum = VecTy->getNumElements();2776Value *Load = Builder.CreateLoad(EltTy, CI->getArgOperand(0));2777Type *I32Ty = Type::getInt32Ty(C);2778Rep = PoisonValue::get(VecTy);2779for (unsigned I = 0; I < EltNum; ++I)2780Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty, I));2781} else if (Name.starts_with("sse41.pmovsx") ||2782Name.starts_with("sse41.pmovzx") ||2783Name.starts_with("avx2.pmovsx") ||2784Name.starts_with("avx2.pmovzx") ||2785Name.starts_with("avx512.mask.pmovsx") ||2786Name.starts_with("avx512.mask.pmovzx")) {2787auto *DstTy = cast<FixedVectorType>(CI->getType());2788unsigned NumDstElts = DstTy->getNumElements();27892790// Extract a subvector of the first NumDstElts lanes and sign/zero extend.2791SmallVector<int, 8> ShuffleMask(NumDstElts);2792for (unsigned i = 0; i != NumDstElts; ++i)2793ShuffleMask[i] = i;27942795Value *SV = Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);27962797bool DoSext = Name.contains("pmovsx");2798Rep =2799DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);2800// If there are 3 arguments, it's a masked intrinsic so we need a select.2801if (CI->arg_size() == 3)2802Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,2803CI->getArgOperand(1));2804} else if (Name == "avx512.mask.pmov.qd.256" ||2805Name == "avx512.mask.pmov.qd.512" ||2806Name == "avx512.mask.pmov.wb.256" ||2807Name == "avx512.mask.pmov.wb.512") {2808Type *Ty = CI->getArgOperand(1)->getType();2809Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);2810Rep =2811emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));2812} else if (Name.starts_with("avx.vbroadcastf128") ||2813Name == "avx2.vbroadcasti128") {2814// Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.2815Type *EltTy = cast<VectorType>(CI->getType())->getElementType();2816unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();2817auto *VT = FixedVectorType::get(EltTy, NumSrcElts);2818Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),2819PointerType::getUnqual(VT));2820Value *Load = Builder.CreateAlignedLoad(VT, Op, Align(1));2821if (NumSrcElts == 2)2822Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});2823else2824Rep = Builder.CreateShuffleVector(Load,2825ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});2826} else if (Name.starts_with("avx512.mask.shuf.i") ||2827Name.starts_with("avx512.mask.shuf.f")) {2828unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();2829Type *VT = CI->getType();2830unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;2831unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();2832unsigned ControlBitsMask = NumLanes - 1;2833unsigned NumControlBits = NumLanes / 2;2834SmallVector<int, 8> ShuffleMask(0);28352836for (unsigned l = 0; l != NumLanes; ++l) {2837unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;2838// We actually need the other source.2839if (l >= NumLanes / 2)2840LaneMask += NumLanes;2841for (unsigned i = 0; i != NumElementsInLane; ++i)2842ShuffleMask.push_back(LaneMask * NumElementsInLane + i);2843}2844Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),2845CI->getArgOperand(1), ShuffleMask);2846Rep =2847emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));2848} else if (Name.starts_with("avx512.mask.broadcastf") ||2849Name.starts_with("avx512.mask.broadcasti")) {2850unsigned NumSrcElts = cast<FixedVectorType>(CI->getArgOperand(0)->getType())2851->getNumElements();2852unsigned NumDstElts =2853cast<FixedVectorType>(CI->getType())->getNumElements();28542855SmallVector<int, 8> ShuffleMask(NumDstElts);2856for (unsigned i = 0; i != NumDstElts; ++i)2857ShuffleMask[i] = i % NumSrcElts;28582859Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),2860CI->getArgOperand(0), ShuffleMask);2861Rep =2862emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));2863} else if (Name.starts_with("avx2.pbroadcast") ||2864Name.starts_with("avx2.vbroadcast") ||2865Name.starts_with("avx512.pbroadcast") ||2866Name.starts_with("avx512.mask.broadcast.s")) {2867// Replace vp?broadcasts with a vector shuffle.2868Value *Op = CI->getArgOperand(0);2869ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();2870Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);2871SmallVector<int, 8> M;2872ShuffleVectorInst::getShuffleMask(Constant::getNullValue(MaskTy), M);2873Rep = Builder.CreateShuffleVector(Op, M);28742875if (CI->arg_size() == 3)2876Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,2877CI->getArgOperand(1));2878} else if (Name.starts_with("sse2.padds.") ||2879Name.starts_with("avx2.padds.") ||2880Name.starts_with("avx512.padds.") ||2881Name.starts_with("avx512.mask.padds.")) {2882Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);2883} else if (Name.starts_with("sse2.psubs.") ||2884Name.starts_with("avx2.psubs.") ||2885Name.starts_with("avx512.psubs.") ||2886Name.starts_with("avx512.mask.psubs.")) {2887Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);2888} else if (Name.starts_with("sse2.paddus.") ||2889Name.starts_with("avx2.paddus.") ||2890Name.starts_with("avx512.mask.paddus.")) {2891Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);2892} else if (Name.starts_with("sse2.psubus.") ||2893Name.starts_with("avx2.psubus.") ||2894Name.starts_with("avx512.mask.psubus.")) {2895Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);2896} else if (Name.starts_with("avx512.mask.palignr.")) {2897Rep = upgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),2898CI->getArgOperand(1), CI->getArgOperand(2),2899CI->getArgOperand(3), CI->getArgOperand(4),2900false);2901} else if (Name.starts_with("avx512.mask.valign.")) {2902Rep = upgradeX86ALIGNIntrinsics(2903Builder, CI->getArgOperand(0), CI->getArgOperand(1),2904CI->getArgOperand(2), CI->getArgOperand(3), CI->getArgOperand(4), true);2905} else if (Name == "sse2.psll.dq" || Name == "avx2.psll.dq") {2906// 128/256-bit shift left specified in bits.2907unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();2908Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),2909Shift / 8); // Shift is in bits.2910} else if (Name == "sse2.psrl.dq" || Name == "avx2.psrl.dq") {2911// 128/256-bit shift right specified in bits.2912unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();2913Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),2914Shift / 8); // Shift is in bits.2915} else if (Name == "sse2.psll.dq.bs" || Name == "avx2.psll.dq.bs" ||2916Name == "avx512.psll.dq.512") {2917// 128/256/512-bit shift left specified in bytes.2918unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();2919Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);2920} else if (Name == "sse2.psrl.dq.bs" || Name == "avx2.psrl.dq.bs" ||2921Name == "avx512.psrl.dq.512") {2922// 128/256/512-bit shift right specified in bytes.2923unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();2924Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);2925} else if (Name == "sse41.pblendw" || Name.starts_with("sse41.blendp") ||2926Name.starts_with("avx.blend.p") || Name == "avx2.pblendw" ||2927Name.starts_with("avx2.pblendd.")) {2928Value *Op0 = CI->getArgOperand(0);2929Value *Op1 = CI->getArgOperand(1);2930unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();2931auto *VecTy = cast<FixedVectorType>(CI->getType());2932unsigned NumElts = VecTy->getNumElements();29332934SmallVector<int, 16> Idxs(NumElts);2935for (unsigned i = 0; i != NumElts; ++i)2936Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;29372938Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);2939} else if (Name.starts_with("avx.vinsertf128.") ||2940Name == "avx2.vinserti128" ||2941Name.starts_with("avx512.mask.insert")) {2942Value *Op0 = CI->getArgOperand(0);2943Value *Op1 = CI->getArgOperand(1);2944unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();2945unsigned DstNumElts =2946cast<FixedVectorType>(CI->getType())->getNumElements();2947unsigned SrcNumElts =2948cast<FixedVectorType>(Op1->getType())->getNumElements();2949unsigned Scale = DstNumElts / SrcNumElts;29502951// Mask off the high bits of the immediate value; hardware ignores those.2952Imm = Imm % Scale;29532954// Extend the second operand into a vector the size of the destination.2955SmallVector<int, 8> Idxs(DstNumElts);2956for (unsigned i = 0; i != SrcNumElts; ++i)2957Idxs[i] = i;2958for (unsigned i = SrcNumElts; i != DstNumElts; ++i)2959Idxs[i] = SrcNumElts;2960Rep = Builder.CreateShuffleVector(Op1, Idxs);29612962// Insert the second operand into the first operand.29632964// Note that there is no guarantee that instruction lowering will actually2965// produce a vinsertf128 instruction for the created shuffles. In2966// particular, the 0 immediate case involves no lane changes, so it can2967// be handled as a blend.29682969// Example of shuffle mask for 32-bit elements:2970// Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>2971// Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >29722973// First fill with identify mask.2974for (unsigned i = 0; i != DstNumElts; ++i)2975Idxs[i] = i;2976// Then replace the elements where we need to insert.2977for (unsigned i = 0; i != SrcNumElts; ++i)2978Idxs[i + Imm * SrcNumElts] = i + DstNumElts;2979Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);29802981// If the intrinsic has a mask operand, handle that.2982if (CI->arg_size() == 5)2983Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep,2984CI->getArgOperand(3));2985} else if (Name.starts_with("avx.vextractf128.") ||2986Name == "avx2.vextracti128" ||2987Name.starts_with("avx512.mask.vextract")) {2988Value *Op0 = CI->getArgOperand(0);2989unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();2990unsigned DstNumElts =2991cast<FixedVectorType>(CI->getType())->getNumElements();2992unsigned SrcNumElts =2993cast<FixedVectorType>(Op0->getType())->getNumElements();2994unsigned Scale = SrcNumElts / DstNumElts;29952996// Mask off the high bits of the immediate value; hardware ignores those.2997Imm = Imm % Scale;29982999// Get indexes for the subvector of the input vector.3000SmallVector<int, 8> Idxs(DstNumElts);3001for (unsigned i = 0; i != DstNumElts; ++i) {3002Idxs[i] = i + (Imm * DstNumElts);3003}3004Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);30053006// If the intrinsic has a mask operand, handle that.3007if (CI->arg_size() == 4)3008Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,3009CI->getArgOperand(2));3010} else if (Name.starts_with("avx512.mask.perm.df.") ||3011Name.starts_with("avx512.mask.perm.di.")) {3012Value *Op0 = CI->getArgOperand(0);3013unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();3014auto *VecTy = cast<FixedVectorType>(CI->getType());3015unsigned NumElts = VecTy->getNumElements();30163017SmallVector<int, 8> Idxs(NumElts);3018for (unsigned i = 0; i != NumElts; ++i)3019Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);30203021Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);30223023if (CI->arg_size() == 4)3024Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,3025CI->getArgOperand(2));3026} else if (Name.starts_with("avx.vperm2f128.") || Name == "avx2.vperm2i128") {3027// The immediate permute control byte looks like this:3028// [1:0] - select 128 bits from sources for low half of destination3029// [2] - ignore3030// [3] - zero low half of destination3031// [5:4] - select 128 bits from sources for high half of destination3032// [6] - ignore3033// [7] - zero high half of destination30343035uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();30363037unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();3038unsigned HalfSize = NumElts / 2;3039SmallVector<int, 8> ShuffleMask(NumElts);30403041// Determine which operand(s) are actually in use for this instruction.3042Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);3043Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);30443045// If needed, replace operands based on zero mask.3046V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;3047V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;30483049// Permute low half of result.3050unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;3051for (unsigned i = 0; i < HalfSize; ++i)3052ShuffleMask[i] = StartIndex + i;30533054// Permute high half of result.3055StartIndex = (Imm & 0x10) ? HalfSize : 0;3056for (unsigned i = 0; i < HalfSize; ++i)3057ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;30583059Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);30603061} else if (Name.starts_with("avx.vpermil.") || Name == "sse2.pshuf.d" ||3062Name.starts_with("avx512.mask.vpermil.p") ||3063Name.starts_with("avx512.mask.pshuf.d.")) {3064Value *Op0 = CI->getArgOperand(0);3065unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();3066auto *VecTy = cast<FixedVectorType>(CI->getType());3067unsigned NumElts = VecTy->getNumElements();3068// Calculate the size of each index in the immediate.3069unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();3070unsigned IdxMask = ((1 << IdxSize) - 1);30713072SmallVector<int, 8> Idxs(NumElts);3073// Lookup the bits for this element, wrapping around the immediate every3074// 8-bits. Elements are grouped into sets of 2 or 4 elements so we need3075// to offset by the first index of each group.3076for (unsigned i = 0; i != NumElts; ++i)3077Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);30783079Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);30803081if (CI->arg_size() == 4)3082Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,3083CI->getArgOperand(2));3084} else if (Name == "sse2.pshufl.w" ||3085Name.starts_with("avx512.mask.pshufl.w.")) {3086Value *Op0 = CI->getArgOperand(0);3087unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();3088unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();30893090SmallVector<int, 16> Idxs(NumElts);3091for (unsigned l = 0; l != NumElts; l += 8) {3092for (unsigned i = 0; i != 4; ++i)3093Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;3094for (unsigned i = 4; i != 8; ++i)3095Idxs[i + l] = i + l;3096}30973098Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);30993100if (CI->arg_size() == 4)3101Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,3102CI->getArgOperand(2));3103} else if (Name == "sse2.pshufh.w" ||3104Name.starts_with("avx512.mask.pshufh.w.")) {3105Value *Op0 = CI->getArgOperand(0);3106unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();3107unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();31083109SmallVector<int, 16> Idxs(NumElts);3110for (unsigned l = 0; l != NumElts; l += 8) {3111for (unsigned i = 0; i != 4; ++i)3112Idxs[i + l] = i + l;3113for (unsigned i = 0; i != 4; ++i)3114Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;3115}31163117Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);31183119if (CI->arg_size() == 4)3120Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,3121CI->getArgOperand(2));3122} else if (Name.starts_with("avx512.mask.shuf.p")) {3123Value *Op0 = CI->getArgOperand(0);3124Value *Op1 = CI->getArgOperand(1);3125unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();3126unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();31273128unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();3129unsigned HalfLaneElts = NumLaneElts / 2;31303131SmallVector<int, 16> Idxs(NumElts);3132for (unsigned i = 0; i != NumElts; ++i) {3133// Base index is the starting element of the lane.3134Idxs[i] = i - (i % NumLaneElts);3135// If we are half way through the lane switch to the other source.3136if ((i % NumLaneElts) >= HalfLaneElts)3137Idxs[i] += NumElts;3138// Now select the specific element. By adding HalfLaneElts bits from3139// the immediate. Wrapping around the immediate every 8-bits.3140Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);3141}31423143Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);31443145Rep =3146emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));3147} else if (Name.starts_with("avx512.mask.movddup") ||3148Name.starts_with("avx512.mask.movshdup") ||3149Name.starts_with("avx512.mask.movsldup")) {3150Value *Op0 = CI->getArgOperand(0);3151unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();3152unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();31533154unsigned Offset = 0;3155if (Name.starts_with("avx512.mask.movshdup."))3156Offset = 1;31573158SmallVector<int, 16> Idxs(NumElts);3159for (unsigned l = 0; l != NumElts; l += NumLaneElts)3160for (unsigned i = 0; i != NumLaneElts; i += 2) {3161Idxs[i + l + 0] = i + l + Offset;3162Idxs[i + l + 1] = i + l + Offset;3163}31643165Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);31663167Rep =3168emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));3169} else if (Name.starts_with("avx512.mask.punpckl") ||3170Name.starts_with("avx512.mask.unpckl.")) {3171Value *Op0 = CI->getArgOperand(0);3172Value *Op1 = CI->getArgOperand(1);3173int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();3174int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();31753176SmallVector<int, 64> Idxs(NumElts);3177for (int l = 0; l != NumElts; l += NumLaneElts)3178for (int i = 0; i != NumLaneElts; ++i)3179Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);31803181Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);31823183Rep =3184emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));3185} else if (Name.starts_with("avx512.mask.punpckh") ||3186Name.starts_with("avx512.mask.unpckh.")) {3187Value *Op0 = CI->getArgOperand(0);3188Value *Op1 = CI->getArgOperand(1);3189int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();3190int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();31913192SmallVector<int, 64> Idxs(NumElts);3193for (int l = 0; l != NumElts; l += NumLaneElts)3194for (int i = 0; i != NumLaneElts; ++i)3195Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);31963197Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);31983199Rep =3200emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));3201} else if (Name.starts_with("avx512.mask.and.") ||3202Name.starts_with("avx512.mask.pand.")) {3203VectorType *FTy = cast<VectorType>(CI->getType());3204VectorType *ITy = VectorType::getInteger(FTy);3205Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),3206Builder.CreateBitCast(CI->getArgOperand(1), ITy));3207Rep = Builder.CreateBitCast(Rep, FTy);3208Rep =3209emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));3210} else if (Name.starts_with("avx512.mask.andn.") ||3211Name.starts_with("avx512.mask.pandn.")) {3212VectorType *FTy = cast<VectorType>(CI->getType());3213VectorType *ITy = VectorType::getInteger(FTy);3214Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));3215Rep = Builder.CreateAnd(Rep,3216Builder.CreateBitCast(CI->getArgOperand(1), ITy));3217Rep = Builder.CreateBitCast(Rep, FTy);3218Rep =3219emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));3220} else if (Name.starts_with("avx512.mask.or.") ||3221Name.starts_with("avx512.mask.por.")) {3222VectorType *FTy = cast<VectorType>(CI->getType());3223VectorType *ITy = VectorType::getInteger(FTy);3224Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),3225Builder.CreateBitCast(CI->getArgOperand(1), ITy));3226Rep = Builder.CreateBitCast(Rep, FTy);3227Rep =3228emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));3229} else if (Name.starts_with("avx512.mask.xor.") ||3230Name.starts_with("avx512.mask.pxor.")) {3231VectorType *FTy = cast<VectorType>(CI->getType());3232VectorType *ITy = VectorType::getInteger(FTy);3233Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),3234Builder.CreateBitCast(CI->getArgOperand(1), ITy));3235Rep = Builder.CreateBitCast(Rep, FTy);3236Rep =3237emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));3238} else if (Name.starts_with("avx512.mask.padd.")) {3239Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));3240Rep =3241emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));3242} else if (Name.starts_with("avx512.mask.psub.")) {3243Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));3244Rep =3245emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));3246} else if (Name.starts_with("avx512.mask.pmull.")) {3247Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));3248Rep =3249emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));3250} else if (Name.starts_with("avx512.mask.add.p")) {3251if (Name.ends_with(".512")) {3252Intrinsic::ID IID;3253if (Name[17] == 's')3254IID = Intrinsic::x86_avx512_add_ps_512;3255else3256IID = Intrinsic::x86_avx512_add_pd_512;32573258Rep = Builder.CreateCall(3259Intrinsic::getDeclaration(F->getParent(), IID),3260{CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});3261} else {3262Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));3263}3264Rep =3265emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));3266} else if (Name.starts_with("avx512.mask.div.p")) {3267if (Name.ends_with(".512")) {3268Intrinsic::ID IID;3269if (Name[17] == 's')3270IID = Intrinsic::x86_avx512_div_ps_512;3271else3272IID = Intrinsic::x86_avx512_div_pd_512;32733274Rep = Builder.CreateCall(3275Intrinsic::getDeclaration(F->getParent(), IID),3276{CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});3277} else {3278Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));3279}3280Rep =3281emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));3282} else if (Name.starts_with("avx512.mask.mul.p")) {3283if (Name.ends_with(".512")) {3284Intrinsic::ID IID;3285if (Name[17] == 's')3286IID = Intrinsic::x86_avx512_mul_ps_512;3287else3288IID = Intrinsic::x86_avx512_mul_pd_512;32893290Rep = Builder.CreateCall(3291Intrinsic::getDeclaration(F->getParent(), IID),3292{CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});3293} else {3294Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));3295}3296Rep =3297emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));3298} else if (Name.starts_with("avx512.mask.sub.p")) {3299if (Name.ends_with(".512")) {3300Intrinsic::ID IID;3301if (Name[17] == 's')3302IID = Intrinsic::x86_avx512_sub_ps_512;3303else3304IID = Intrinsic::x86_avx512_sub_pd_512;33053306Rep = Builder.CreateCall(3307Intrinsic::getDeclaration(F->getParent(), IID),3308{CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});3309} else {3310Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));3311}3312Rep =3313emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));3314} else if ((Name.starts_with("avx512.mask.max.p") ||3315Name.starts_with("avx512.mask.min.p")) &&3316Name.drop_front(18) == ".512") {3317bool IsDouble = Name[17] == 'd';3318bool IsMin = Name[13] == 'i';3319static const Intrinsic::ID MinMaxTbl[2][2] = {3320{Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},3321{Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};3322Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];33233324Rep = Builder.CreateCall(3325Intrinsic::getDeclaration(F->getParent(), IID),3326{CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});3327Rep =3328emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));3329} else if (Name.starts_with("avx512.mask.lzcnt.")) {3330Rep =3331Builder.CreateCall(Intrinsic::getDeclaration(3332F->getParent(), Intrinsic::ctlz, CI->getType()),3333{CI->getArgOperand(0), Builder.getInt1(false)});3334Rep =3335emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));3336} else if (Name.starts_with("avx512.mask.psll")) {3337bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');3338bool IsVariable = Name[16] == 'v';3339char Size = Name[16] == '.' ? Name[17]3340: Name[17] == '.' ? Name[18]3341: Name[18] == '.' ? Name[19]3342: Name[20];33433344Intrinsic::ID IID;3345if (IsVariable && Name[17] != '.') {3346if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di3347IID = Intrinsic::x86_avx2_psllv_q;3348else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di3349IID = Intrinsic::x86_avx2_psllv_q_256;3350else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si3351IID = Intrinsic::x86_avx2_psllv_d;3352else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si3353IID = Intrinsic::x86_avx2_psllv_d_256;3354else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi3355IID = Intrinsic::x86_avx512_psllv_w_128;3356else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi3357IID = Intrinsic::x86_avx512_psllv_w_256;3358else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi3359IID = Intrinsic::x86_avx512_psllv_w_512;3360else3361llvm_unreachable("Unexpected size");3362} else if (Name.ends_with(".128")) {3363if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.1283364IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d3365: Intrinsic::x86_sse2_psll_d;3366else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.1283367IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q3368: Intrinsic::x86_sse2_psll_q;3369else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.1283370IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w3371: Intrinsic::x86_sse2_psll_w;3372else3373llvm_unreachable("Unexpected size");3374} else if (Name.ends_with(".256")) {3375if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.2563376IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d3377: Intrinsic::x86_avx2_psll_d;3378else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.2563379IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q3380: Intrinsic::x86_avx2_psll_q;3381else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.2563382IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w3383: Intrinsic::x86_avx2_psll_w;3384else3385llvm_unreachable("Unexpected size");3386} else {3387if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.5123388IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_5123389: IsVariable ? Intrinsic::x86_avx512_psllv_d_5123390: Intrinsic::x86_avx512_psll_d_512;3391else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.5123392IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_5123393: IsVariable ? Intrinsic::x86_avx512_psllv_q_5123394: Intrinsic::x86_avx512_psll_q_512;3395else if (Size == 'w') // psll.wi.512, pslli.w, psll.w3396IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_5123397: Intrinsic::x86_avx512_psll_w_512;3398else3399llvm_unreachable("Unexpected size");3400}34013402Rep = upgradeX86MaskedShift(Builder, *CI, IID);3403} else if (Name.starts_with("avx512.mask.psrl")) {3404bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');3405bool IsVariable = Name[16] == 'v';3406char Size = Name[16] == '.' ? Name[17]3407: Name[17] == '.' ? Name[18]3408: Name[18] == '.' ? Name[19]3409: Name[20];34103411Intrinsic::ID IID;3412if (IsVariable && Name[17] != '.') {3413if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di3414IID = Intrinsic::x86_avx2_psrlv_q;3415else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di3416IID = Intrinsic::x86_avx2_psrlv_q_256;3417else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si3418IID = Intrinsic::x86_avx2_psrlv_d;3419else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si3420IID = Intrinsic::x86_avx2_psrlv_d_256;3421else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi3422IID = Intrinsic::x86_avx512_psrlv_w_128;3423else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi3424IID = Intrinsic::x86_avx512_psrlv_w_256;3425else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi3426IID = Intrinsic::x86_avx512_psrlv_w_512;3427else3428llvm_unreachable("Unexpected size");3429} else if (Name.ends_with(".128")) {3430if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.1283431IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d3432: Intrinsic::x86_sse2_psrl_d;3433else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.1283434IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q3435: Intrinsic::x86_sse2_psrl_q;3436else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.1283437IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w3438: Intrinsic::x86_sse2_psrl_w;3439else3440llvm_unreachable("Unexpected size");3441} else if (Name.ends_with(".256")) {3442if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.2563443IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d3444: Intrinsic::x86_avx2_psrl_d;3445else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.2563446IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q3447: Intrinsic::x86_avx2_psrl_q;3448else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.2563449IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w3450: Intrinsic::x86_avx2_psrl_w;3451else3452llvm_unreachable("Unexpected size");3453} else {3454if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.5123455IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_5123456: IsVariable ? Intrinsic::x86_avx512_psrlv_d_5123457: Intrinsic::x86_avx512_psrl_d_512;3458else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.5123459IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_5123460: IsVariable ? Intrinsic::x86_avx512_psrlv_q_5123461: Intrinsic::x86_avx512_psrl_q_512;3462else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)3463IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_5123464: Intrinsic::x86_avx512_psrl_w_512;3465else3466llvm_unreachable("Unexpected size");3467}34683469Rep = upgradeX86MaskedShift(Builder, *CI, IID);3470} else if (Name.starts_with("avx512.mask.psra")) {3471bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');3472bool IsVariable = Name[16] == 'v';3473char Size = Name[16] == '.' ? Name[17]3474: Name[17] == '.' ? Name[18]3475: Name[18] == '.' ? Name[19]3476: Name[20];34773478Intrinsic::ID IID;3479if (IsVariable && Name[17] != '.') {3480if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si3481IID = Intrinsic::x86_avx2_psrav_d;3482else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si3483IID = Intrinsic::x86_avx2_psrav_d_256;3484else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi3485IID = Intrinsic::x86_avx512_psrav_w_128;3486else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi3487IID = Intrinsic::x86_avx512_psrav_w_256;3488else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi3489IID = Intrinsic::x86_avx512_psrav_w_512;3490else3491llvm_unreachable("Unexpected size");3492} else if (Name.ends_with(".128")) {3493if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.1283494IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d3495: Intrinsic::x86_sse2_psra_d;3496else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.1283497IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_1283498: IsVariable ? Intrinsic::x86_avx512_psrav_q_1283499: Intrinsic::x86_avx512_psra_q_128;3500else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.1283501IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w3502: Intrinsic::x86_sse2_psra_w;3503else3504llvm_unreachable("Unexpected size");3505} else if (Name.ends_with(".256")) {3506if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.2563507IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d3508: Intrinsic::x86_avx2_psra_d;3509else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.2563510IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_2563511: IsVariable ? Intrinsic::x86_avx512_psrav_q_2563512: Intrinsic::x86_avx512_psra_q_256;3513else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.2563514IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w3515: Intrinsic::x86_avx2_psra_w;3516else3517llvm_unreachable("Unexpected size");3518} else {3519if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.5123520IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_5123521: IsVariable ? Intrinsic::x86_avx512_psrav_d_5123522: Intrinsic::x86_avx512_psra_d_512;3523else if (Size == 'q') // psra.qi.512, psrai.q, psra.q3524IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_5123525: IsVariable ? Intrinsic::x86_avx512_psrav_q_5123526: Intrinsic::x86_avx512_psra_q_512;3527else if (Size == 'w') // psra.wi.512, psrai.w, psra.w3528IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_5123529: Intrinsic::x86_avx512_psra_w_512;3530else3531llvm_unreachable("Unexpected size");3532}35333534Rep = upgradeX86MaskedShift(Builder, *CI, IID);3535} else if (Name.starts_with("avx512.mask.move.s")) {3536Rep = upgradeMaskedMove(Builder, *CI);3537} else if (Name.starts_with("avx512.cvtmask2")) {3538Rep = upgradeMaskToInt(Builder, *CI);3539} else if (Name.ends_with(".movntdqa")) {3540MDNode *Node = MDNode::get(3541C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));35423543Value *Ptr = CI->getArgOperand(0);35443545// Convert the type of the pointer to a pointer to the stored type.3546Value *BC = Builder.CreateBitCast(3547Ptr, PointerType::getUnqual(CI->getType()), "cast");3548LoadInst *LI = Builder.CreateAlignedLoad(3549CI->getType(), BC,3550Align(CI->getType()->getPrimitiveSizeInBits().getFixedValue() / 8));3551LI->setMetadata(LLVMContext::MD_nontemporal, Node);3552Rep = LI;3553} else if (Name.starts_with("fma.vfmadd.") ||3554Name.starts_with("fma.vfmsub.") ||3555Name.starts_with("fma.vfnmadd.") ||3556Name.starts_with("fma.vfnmsub.")) {3557bool NegMul = Name[6] == 'n';3558bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';3559bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';35603561Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),3562CI->getArgOperand(2)};35633564if (IsScalar) {3565Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);3566Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);3567Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);3568}35693570if (NegMul && !IsScalar)3571Ops[0] = Builder.CreateFNeg(Ops[0]);3572if (NegMul && IsScalar)3573Ops[1] = Builder.CreateFNeg(Ops[1]);3574if (NegAcc)3575Ops[2] = Builder.CreateFNeg(Ops[2]);35763577Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),3578Intrinsic::fma,3579Ops[0]->getType()),3580Ops);35813582if (IsScalar)3583Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);3584} else if (Name.starts_with("fma4.vfmadd.s")) {3585Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),3586CI->getArgOperand(2)};35873588Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);3589Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);3590Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);35913592Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),3593Intrinsic::fma,3594Ops[0]->getType()),3595Ops);35963597Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),3598Rep, (uint64_t)0);3599} else if (Name.starts_with("avx512.mask.vfmadd.s") ||3600Name.starts_with("avx512.maskz.vfmadd.s") ||3601Name.starts_with("avx512.mask3.vfmadd.s") ||3602Name.starts_with("avx512.mask3.vfmsub.s") ||3603Name.starts_with("avx512.mask3.vfnmsub.s")) {3604bool IsMask3 = Name[11] == '3';3605bool IsMaskZ = Name[11] == 'z';3606// Drop the "avx512.mask." to make it easier.3607Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);3608bool NegMul = Name[2] == 'n';3609bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';36103611Value *A = CI->getArgOperand(0);3612Value *B = CI->getArgOperand(1);3613Value *C = CI->getArgOperand(2);36143615if (NegMul && (IsMask3 || IsMaskZ))3616A = Builder.CreateFNeg(A);3617if (NegMul && !(IsMask3 || IsMaskZ))3618B = Builder.CreateFNeg(B);3619if (NegAcc)3620C = Builder.CreateFNeg(C);36213622A = Builder.CreateExtractElement(A, (uint64_t)0);3623B = Builder.CreateExtractElement(B, (uint64_t)0);3624C = Builder.CreateExtractElement(C, (uint64_t)0);36253626if (!isa<ConstantInt>(CI->getArgOperand(4)) ||3627cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {3628Value *Ops[] = {A, B, C, CI->getArgOperand(4)};36293630Intrinsic::ID IID;3631if (Name.back() == 'd')3632IID = Intrinsic::x86_avx512_vfmadd_f64;3633else3634IID = Intrinsic::x86_avx512_vfmadd_f32;3635Function *FMA = Intrinsic::getDeclaration(CI->getModule(), IID);3636Rep = Builder.CreateCall(FMA, Ops);3637} else {3638Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,3639A->getType());3640Rep = Builder.CreateCall(FMA, {A, B, C});3641}36423643Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType())3644: IsMask3 ? C3645: A;36463647// For Mask3 with NegAcc, we need to create a new extractelement that3648// avoids the negation above.3649if (NegAcc && IsMask3)3650PassThru =3651Builder.CreateExtractElement(CI->getArgOperand(2), (uint64_t)0);36523653Rep = emitX86ScalarSelect(Builder, CI->getArgOperand(3), Rep, PassThru);3654Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0), Rep,3655(uint64_t)0);3656} else if (Name.starts_with("avx512.mask.vfmadd.p") ||3657Name.starts_with("avx512.mask.vfnmadd.p") ||3658Name.starts_with("avx512.mask.vfnmsub.p") ||3659Name.starts_with("avx512.mask3.vfmadd.p") ||3660Name.starts_with("avx512.mask3.vfmsub.p") ||3661Name.starts_with("avx512.mask3.vfnmsub.p") ||3662Name.starts_with("avx512.maskz.vfmadd.p")) {3663bool IsMask3 = Name[11] == '3';3664bool IsMaskZ = Name[11] == 'z';3665// Drop the "avx512.mask." to make it easier.3666Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);3667bool NegMul = Name[2] == 'n';3668bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';36693670Value *A = CI->getArgOperand(0);3671Value *B = CI->getArgOperand(1);3672Value *C = CI->getArgOperand(2);36733674if (NegMul && (IsMask3 || IsMaskZ))3675A = Builder.CreateFNeg(A);3676if (NegMul && !(IsMask3 || IsMaskZ))3677B = Builder.CreateFNeg(B);3678if (NegAcc)3679C = Builder.CreateFNeg(C);36803681if (CI->arg_size() == 5 &&3682(!isa<ConstantInt>(CI->getArgOperand(4)) ||3683cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {3684Intrinsic::ID IID;3685// Check the character before ".512" in string.3686if (Name[Name.size() - 5] == 's')3687IID = Intrinsic::x86_avx512_vfmadd_ps_512;3688else3689IID = Intrinsic::x86_avx512_vfmadd_pd_512;36903691Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),3692{A, B, C, CI->getArgOperand(4)});3693} else {3694Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,3695A->getType());3696Rep = Builder.CreateCall(FMA, {A, B, C});3697}36983699Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())3700: IsMask3 ? CI->getArgOperand(2)3701: CI->getArgOperand(0);37023703Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);3704} else if (Name.starts_with("fma.vfmsubadd.p")) {3705unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();3706unsigned EltWidth = CI->getType()->getScalarSizeInBits();3707Intrinsic::ID IID;3708if (VecWidth == 128 && EltWidth == 32)3709IID = Intrinsic::x86_fma_vfmaddsub_ps;3710else if (VecWidth == 256 && EltWidth == 32)3711IID = Intrinsic::x86_fma_vfmaddsub_ps_256;3712else if (VecWidth == 128 && EltWidth == 64)3713IID = Intrinsic::x86_fma_vfmaddsub_pd;3714else if (VecWidth == 256 && EltWidth == 64)3715IID = Intrinsic::x86_fma_vfmaddsub_pd_256;3716else3717llvm_unreachable("Unexpected intrinsic");37183719Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),3720CI->getArgOperand(2)};3721Ops[2] = Builder.CreateFNeg(Ops[2]);3722Rep =3723Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), Ops);3724} else if (Name.starts_with("avx512.mask.vfmaddsub.p") ||3725Name.starts_with("avx512.mask3.vfmaddsub.p") ||3726Name.starts_with("avx512.maskz.vfmaddsub.p") ||3727Name.starts_with("avx512.mask3.vfmsubadd.p")) {3728bool IsMask3 = Name[11] == '3';3729bool IsMaskZ = Name[11] == 'z';3730// Drop the "avx512.mask." to make it easier.3731Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);3732bool IsSubAdd = Name[3] == 's';3733if (CI->arg_size() == 5) {3734Intrinsic::ID IID;3735// Check the character before ".512" in string.3736if (Name[Name.size() - 5] == 's')3737IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;3738else3739IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;37403741Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),3742CI->getArgOperand(2), CI->getArgOperand(4)};3743if (IsSubAdd)3744Ops[2] = Builder.CreateFNeg(Ops[2]);37453746Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),3747Ops);3748} else {3749int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();37503751Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),3752CI->getArgOperand(2)};37533754Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,3755Ops[0]->getType());3756Value *Odd = Builder.CreateCall(FMA, Ops);3757Ops[2] = Builder.CreateFNeg(Ops[2]);3758Value *Even = Builder.CreateCall(FMA, Ops);37593760if (IsSubAdd)3761std::swap(Even, Odd);37623763SmallVector<int, 32> Idxs(NumElts);3764for (int i = 0; i != NumElts; ++i)3765Idxs[i] = i + (i % 2) * NumElts;37663767Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);3768}37693770Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())3771: IsMask3 ? CI->getArgOperand(2)3772: CI->getArgOperand(0);37733774Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);3775} else if (Name.starts_with("avx512.mask.pternlog.") ||3776Name.starts_with("avx512.maskz.pternlog.")) {3777bool ZeroMask = Name[11] == 'z';3778unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();3779unsigned EltWidth = CI->getType()->getScalarSizeInBits();3780Intrinsic::ID IID;3781if (VecWidth == 128 && EltWidth == 32)3782IID = Intrinsic::x86_avx512_pternlog_d_128;3783else if (VecWidth == 256 && EltWidth == 32)3784IID = Intrinsic::x86_avx512_pternlog_d_256;3785else if (VecWidth == 512 && EltWidth == 32)3786IID = Intrinsic::x86_avx512_pternlog_d_512;3787else if (VecWidth == 128 && EltWidth == 64)3788IID = Intrinsic::x86_avx512_pternlog_q_128;3789else if (VecWidth == 256 && EltWidth == 64)3790IID = Intrinsic::x86_avx512_pternlog_q_256;3791else if (VecWidth == 512 && EltWidth == 64)3792IID = Intrinsic::x86_avx512_pternlog_q_512;3793else3794llvm_unreachable("Unexpected intrinsic");37953796Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),3797CI->getArgOperand(2), CI->getArgOperand(3)};3798Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),3799Args);3800Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())3801: CI->getArgOperand(0);3802Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);3803} else if (Name.starts_with("avx512.mask.vpmadd52") ||3804Name.starts_with("avx512.maskz.vpmadd52")) {3805bool ZeroMask = Name[11] == 'z';3806bool High = Name[20] == 'h' || Name[21] == 'h';3807unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();3808Intrinsic::ID IID;3809if (VecWidth == 128 && !High)3810IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;3811else if (VecWidth == 256 && !High)3812IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;3813else if (VecWidth == 512 && !High)3814IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;3815else if (VecWidth == 128 && High)3816IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;3817else if (VecWidth == 256 && High)3818IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;3819else if (VecWidth == 512 && High)3820IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;3821else3822llvm_unreachable("Unexpected intrinsic");38233824Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),3825CI->getArgOperand(2)};3826Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),3827Args);3828Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())3829: CI->getArgOperand(0);3830Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);3831} else if (Name.starts_with("avx512.mask.vpermi2var.") ||3832Name.starts_with("avx512.mask.vpermt2var.") ||3833Name.starts_with("avx512.maskz.vpermt2var.")) {3834bool ZeroMask = Name[11] == 'z';3835bool IndexForm = Name[17] == 'i';3836Rep = upgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);3837} else if (Name.starts_with("avx512.mask.vpdpbusd.") ||3838Name.starts_with("avx512.maskz.vpdpbusd.") ||3839Name.starts_with("avx512.mask.vpdpbusds.") ||3840Name.starts_with("avx512.maskz.vpdpbusds.")) {3841bool ZeroMask = Name[11] == 'z';3842bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';3843unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();3844Intrinsic::ID IID;3845if (VecWidth == 128 && !IsSaturating)3846IID = Intrinsic::x86_avx512_vpdpbusd_128;3847else if (VecWidth == 256 && !IsSaturating)3848IID = Intrinsic::x86_avx512_vpdpbusd_256;3849else if (VecWidth == 512 && !IsSaturating)3850IID = Intrinsic::x86_avx512_vpdpbusd_512;3851else if (VecWidth == 128 && IsSaturating)3852IID = Intrinsic::x86_avx512_vpdpbusds_128;3853else if (VecWidth == 256 && IsSaturating)3854IID = Intrinsic::x86_avx512_vpdpbusds_256;3855else if (VecWidth == 512 && IsSaturating)3856IID = Intrinsic::x86_avx512_vpdpbusds_512;3857else3858llvm_unreachable("Unexpected intrinsic");38593860Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),3861CI->getArgOperand(2)};3862Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),3863Args);3864Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())3865: CI->getArgOperand(0);3866Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);3867} else if (Name.starts_with("avx512.mask.vpdpwssd.") ||3868Name.starts_with("avx512.maskz.vpdpwssd.") ||3869Name.starts_with("avx512.mask.vpdpwssds.") ||3870Name.starts_with("avx512.maskz.vpdpwssds.")) {3871bool ZeroMask = Name[11] == 'z';3872bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';3873unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();3874Intrinsic::ID IID;3875if (VecWidth == 128 && !IsSaturating)3876IID = Intrinsic::x86_avx512_vpdpwssd_128;3877else if (VecWidth == 256 && !IsSaturating)3878IID = Intrinsic::x86_avx512_vpdpwssd_256;3879else if (VecWidth == 512 && !IsSaturating)3880IID = Intrinsic::x86_avx512_vpdpwssd_512;3881else if (VecWidth == 128 && IsSaturating)3882IID = Intrinsic::x86_avx512_vpdpwssds_128;3883else if (VecWidth == 256 && IsSaturating)3884IID = Intrinsic::x86_avx512_vpdpwssds_256;3885else if (VecWidth == 512 && IsSaturating)3886IID = Intrinsic::x86_avx512_vpdpwssds_512;3887else3888llvm_unreachable("Unexpected intrinsic");38893890Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),3891CI->getArgOperand(2)};3892Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),3893Args);3894Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())3895: CI->getArgOperand(0);3896Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);3897} else if (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||3898Name == "addcarry.u32" || Name == "addcarry.u64" ||3899Name == "subborrow.u32" || Name == "subborrow.u64") {3900Intrinsic::ID IID;3901if (Name[0] == 'a' && Name.back() == '2')3902IID = Intrinsic::x86_addcarry_32;3903else if (Name[0] == 'a' && Name.back() == '4')3904IID = Intrinsic::x86_addcarry_64;3905else if (Name[0] == 's' && Name.back() == '2')3906IID = Intrinsic::x86_subborrow_32;3907else if (Name[0] == 's' && Name.back() == '4')3908IID = Intrinsic::x86_subborrow_64;3909else3910llvm_unreachable("Unexpected intrinsic");39113912// Make a call with 3 operands.3913Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),3914CI->getArgOperand(2)};3915Value *NewCall = Builder.CreateCall(3916Intrinsic::getDeclaration(CI->getModule(), IID), Args);39173918// Extract the second result and store it.3919Value *Data = Builder.CreateExtractValue(NewCall, 1);3920// Cast the pointer to the right type.3921Value *Ptr = Builder.CreateBitCast(3922CI->getArgOperand(3), llvm::PointerType::getUnqual(Data->getType()));3923Builder.CreateAlignedStore(Data, Ptr, Align(1));3924// Replace the original call result with the first result of the new call.3925Value *CF = Builder.CreateExtractValue(NewCall, 0);39263927CI->replaceAllUsesWith(CF);3928Rep = nullptr;3929} else if (Name.starts_with("avx512.mask.") &&3930upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {3931// Rep will be updated by the call in the condition.3932}39333934return Rep;3935}39363937static Value *upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F,3938IRBuilder<> &Builder) {3939if (Name == "mve.vctp64.old") {3940// Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the3941// correct type.3942Value *VCTP = Builder.CreateCall(3943Intrinsic::getDeclaration(F->getParent(), Intrinsic::arm_mve_vctp64),3944CI->getArgOperand(0), CI->getName());3945Value *C1 = Builder.CreateCall(3946Intrinsic::getDeclaration(3947F->getParent(), Intrinsic::arm_mve_pred_v2i,3948{VectorType::get(Builder.getInt1Ty(), 2, false)}),3949VCTP);3950return Builder.CreateCall(3951Intrinsic::getDeclaration(3952F->getParent(), Intrinsic::arm_mve_pred_i2v,3953{VectorType::get(Builder.getInt1Ty(), 4, false)}),3954C1);3955} else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||3956Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||3957Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||3958Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||3959Name ==3960"mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||3961Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||3962Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||3963Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||3964Name ==3965"mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||3966Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||3967Name == "cde.vcx1q.predicated.v2i64.v4i1" ||3968Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||3969Name == "cde.vcx2q.predicated.v2i64.v4i1" ||3970Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||3971Name == "cde.vcx3q.predicated.v2i64.v4i1" ||3972Name == "cde.vcx3qa.predicated.v2i64.v4i1") {3973std::vector<Type *> Tys;3974unsigned ID = CI->getIntrinsicID();3975Type *V2I1Ty = FixedVectorType::get(Builder.getInt1Ty(), 2);3976switch (ID) {3977case Intrinsic::arm_mve_mull_int_predicated:3978case Intrinsic::arm_mve_vqdmull_predicated:3979case Intrinsic::arm_mve_vldr_gather_base_predicated:3980Tys = {CI->getType(), CI->getOperand(0)->getType(), V2I1Ty};3981break;3982case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:3983case Intrinsic::arm_mve_vstr_scatter_base_predicated:3984case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:3985Tys = {CI->getOperand(0)->getType(), CI->getOperand(0)->getType(),3986V2I1Ty};3987break;3988case Intrinsic::arm_mve_vldr_gather_offset_predicated:3989Tys = {CI->getType(), CI->getOperand(0)->getType(),3990CI->getOperand(1)->getType(), V2I1Ty};3991break;3992case Intrinsic::arm_mve_vstr_scatter_offset_predicated:3993Tys = {CI->getOperand(0)->getType(), CI->getOperand(1)->getType(),3994CI->getOperand(2)->getType(), V2I1Ty};3995break;3996case Intrinsic::arm_cde_vcx1q_predicated:3997case Intrinsic::arm_cde_vcx1qa_predicated:3998case Intrinsic::arm_cde_vcx2q_predicated:3999case Intrinsic::arm_cde_vcx2qa_predicated:4000case Intrinsic::arm_cde_vcx3q_predicated:4001case Intrinsic::arm_cde_vcx3qa_predicated:4002Tys = {CI->getOperand(1)->getType(), V2I1Ty};4003break;4004default:4005llvm_unreachable("Unhandled Intrinsic!");4006}40074008std::vector<Value *> Ops;4009for (Value *Op : CI->args()) {4010Type *Ty = Op->getType();4011if (Ty->getScalarSizeInBits() == 1) {4012Value *C1 = Builder.CreateCall(4013Intrinsic::getDeclaration(4014F->getParent(), Intrinsic::arm_mve_pred_v2i,4015{VectorType::get(Builder.getInt1Ty(), 4, false)}),4016Op);4017Op = Builder.CreateCall(4018Intrinsic::getDeclaration(F->getParent(),4019Intrinsic::arm_mve_pred_i2v, {V2I1Ty}),4020C1);4021}4022Ops.push_back(Op);4023}40244025Function *Fn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);4026return Builder.CreateCall(Fn, Ops, CI->getName());4027}4028llvm_unreachable("Unknown function for ARM CallBase upgrade.");4029}40304031// These are expected to have the arguments:4032// atomic.intrin (ptr, rmw_value, ordering, scope, isVolatile)4033//4034// Except for int_amdgcn_ds_fadd_v2bf16 which only has (ptr, rmw_value).4035//4036static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI,4037Function *F, IRBuilder<> &Builder) {4038AtomicRMWInst::BinOp RMWOp =4039StringSwitch<AtomicRMWInst::BinOp>(Name)4040.StartsWith("ds.fadd", AtomicRMWInst::FAdd)4041.StartsWith("ds.fmin", AtomicRMWInst::FMin)4042.StartsWith("ds.fmax", AtomicRMWInst::FMax)4043.StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap)4044.StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap);40454046unsigned NumOperands = CI->getNumOperands();4047if (NumOperands < 3) // Malformed bitcode.4048return nullptr;40494050Value *Ptr = CI->getArgOperand(0);4051PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());4052if (!PtrTy) // Malformed.4053return nullptr;40544055Value *Val = CI->getArgOperand(1);4056if (Val->getType() != CI->getType()) // Malformed.4057return nullptr;40584059ConstantInt *OrderArg = nullptr;4060bool IsVolatile = false;40614062// These should have 5 arguments (plus the callee). A separate version of the4063// ds_fadd intrinsic was defined for bf16 which was missing arguments.4064if (NumOperands > 3)4065OrderArg = dyn_cast<ConstantInt>(CI->getArgOperand(2));40664067// Ignore scope argument at 340684069if (NumOperands > 5) {4070ConstantInt *VolatileArg = dyn_cast<ConstantInt>(CI->getArgOperand(4));4071IsVolatile = !VolatileArg || !VolatileArg->isZero();4072}40734074AtomicOrdering Order = AtomicOrdering::SequentiallyConsistent;4075if (OrderArg && isValidAtomicOrdering(OrderArg->getZExtValue()))4076Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue());4077if (Order == AtomicOrdering::NotAtomic || Order == AtomicOrdering::Unordered)4078Order = AtomicOrdering::SequentiallyConsistent;40794080LLVMContext &Ctx = F->getContext();40814082// Handle the v2bf16 intrinsic which used <2 x i16> instead of <2 x bfloat>4083Type *RetTy = CI->getType();4084if (VectorType *VT = dyn_cast<VectorType>(RetTy)) {4085if (VT->getElementType()->isIntegerTy(16)) {4086VectorType *AsBF16 =4087VectorType::get(Type::getBFloatTy(Ctx), VT->getElementCount());4088Val = Builder.CreateBitCast(Val, AsBF16);4089}4090}40914092// The scope argument never really worked correctly. Use agent as the most4093// conservative option which should still always produce the instruction.4094SyncScope::ID SSID = Ctx.getOrInsertSyncScopeID("agent");4095AtomicRMWInst *RMW =4096Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);40974098if (PtrTy->getAddressSpace() != 3) {4099RMW->setMetadata("amdgpu.no.fine.grained.memory",4100MDNode::get(F->getContext(), {}));4101}41024103if (IsVolatile)4104RMW->setVolatile(true);41054106return Builder.CreateBitCast(RMW, RetTy);4107}41084109/// Helper to unwrap intrinsic call MetadataAsValue operands.4110template <typename MDType>4111static MDType *unwrapMAVOp(CallBase *CI, unsigned Op) {4112if (MetadataAsValue *MAV = dyn_cast<MetadataAsValue>(CI->getArgOperand(Op)))4113return dyn_cast<MDType>(MAV->getMetadata());4114return nullptr;4115}41164117/// Convert debug intrinsic calls to non-instruction debug records.4118/// \p Name - Final part of the intrinsic name, e.g. 'value' in llvm.dbg.value.4119/// \p CI - The debug intrinsic call.4120static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI) {4121DbgRecord *DR = nullptr;4122if (Name == "label") {4123DR = new DbgLabelRecord(unwrapMAVOp<DILabel>(CI, 0), CI->getDebugLoc());4124} else if (Name == "assign") {4125DR = new DbgVariableRecord(4126unwrapMAVOp<Metadata>(CI, 0), unwrapMAVOp<DILocalVariable>(CI, 1),4127unwrapMAVOp<DIExpression>(CI, 2), unwrapMAVOp<DIAssignID>(CI, 3),4128unwrapMAVOp<Metadata>(CI, 4), unwrapMAVOp<DIExpression>(CI, 5),4129CI->getDebugLoc());4130} else if (Name == "declare") {4131DR = new DbgVariableRecord(4132unwrapMAVOp<Metadata>(CI, 0), unwrapMAVOp<DILocalVariable>(CI, 1),4133unwrapMAVOp<DIExpression>(CI, 2), CI->getDebugLoc(),4134DbgVariableRecord::LocationType::Declare);4135} else if (Name == "addr") {4136// Upgrade dbg.addr to dbg.value with DW_OP_deref.4137DIExpression *Expr = unwrapMAVOp<DIExpression>(CI, 2);4138Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);4139DR = new DbgVariableRecord(unwrapMAVOp<Metadata>(CI, 0),4140unwrapMAVOp<DILocalVariable>(CI, 1), Expr,4141CI->getDebugLoc());4142} else if (Name == "value") {4143// An old version of dbg.value had an extra offset argument.4144unsigned VarOp = 1;4145unsigned ExprOp = 2;4146if (CI->arg_size() == 4) {4147auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1));4148// Nonzero offset dbg.values get dropped without a replacement.4149if (!Offset || !Offset->isZeroValue())4150return;4151VarOp = 2;4152ExprOp = 3;4153}4154DR = new DbgVariableRecord(4155unwrapMAVOp<Metadata>(CI, 0), unwrapMAVOp<DILocalVariable>(CI, VarOp),4156unwrapMAVOp<DIExpression>(CI, ExprOp), CI->getDebugLoc());4157}4158assert(DR && "Unhandled intrinsic kind in upgrade to DbgRecord");4159CI->getParent()->insertDbgRecordBefore(DR, CI->getIterator());4160}41614162/// Upgrade a call to an old intrinsic. All argument and return casting must be4163/// provided to seamlessly integrate with existing context.4164void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {4165// Note dyn_cast to Function is not quite the same as getCalledFunction, which4166// checks the callee's function type matches. It's likely we need to handle4167// type changes here.4168Function *F = dyn_cast<Function>(CI->getCalledOperand());4169if (!F)4170return;41714172LLVMContext &C = CI->getContext();4173IRBuilder<> Builder(C);4174Builder.SetInsertPoint(CI->getParent(), CI->getIterator());41754176if (!NewFn) {4177bool FallthroughToDefaultUpgrade = false;4178// Get the Function's name.4179StringRef Name = F->getName();41804181assert(Name.starts_with("llvm.") && "Intrinsic doesn't start with 'llvm.'");4182Name = Name.substr(5);41834184bool IsX86 = Name.consume_front("x86.");4185bool IsNVVM = Name.consume_front("nvvm.");4186bool IsARM = Name.consume_front("arm.");4187bool IsAMDGCN = Name.consume_front("amdgcn.");4188bool IsDbg = Name.consume_front("dbg.");4189Value *Rep = nullptr;41904191if (!IsX86 && Name == "stackprotectorcheck") {4192Rep = nullptr;4193} else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {4194Value *Arg = CI->getArgOperand(0);4195Value *Neg = Builder.CreateNeg(Arg, "neg");4196Value *Cmp = Builder.CreateICmpSGE(4197Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");4198Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");4199} else if (IsNVVM && (Name.starts_with("atomic.load.add.f32.p") ||4200Name.starts_with("atomic.load.add.f64.p"))) {4201Value *Ptr = CI->getArgOperand(0);4202Value *Val = CI->getArgOperand(1);4203Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),4204AtomicOrdering::SequentiallyConsistent);4205} else if (IsNVVM && Name.consume_front("max.") &&4206(Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||4207Name == "ui" || Name == "ull")) {4208Value *Arg0 = CI->getArgOperand(0);4209Value *Arg1 = CI->getArgOperand(1);4210Value *Cmp = Name.starts_with("u")4211? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")4212: Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");4213Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");4214} else if (IsNVVM && Name.consume_front("min.") &&4215(Name == "s" || Name == "i" || Name == "ll" || Name == "us" ||4216Name == "ui" || Name == "ull")) {4217Value *Arg0 = CI->getArgOperand(0);4218Value *Arg1 = CI->getArgOperand(1);4219Value *Cmp = Name.starts_with("u")4220? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")4221: Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");4222Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");4223} else if (IsNVVM && Name == "clz.ll") {4224// llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64.4225Value *Arg = CI->getArgOperand(0);4226Value *Ctlz = Builder.CreateCall(4227Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,4228{Arg->getType()}),4229{Arg, Builder.getFalse()}, "ctlz");4230Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");4231} else if (IsNVVM && Name == "popc.ll") {4232// llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an4233// i64.4234Value *Arg = CI->getArgOperand(0);4235Value *Popc = Builder.CreateCall(4236Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,4237{Arg->getType()}),4238Arg, "ctpop");4239Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");4240} else if (IsNVVM) {4241if (Name == "h2f") {4242Rep =4243Builder.CreateCall(Intrinsic::getDeclaration(4244F->getParent(), Intrinsic::convert_from_fp16,4245{Builder.getFloatTy()}),4246CI->getArgOperand(0), "h2f");4247} else {4248Intrinsic::ID IID = shouldUpgradeNVPTXBF16Intrinsic(Name);4249if (IID != Intrinsic::not_intrinsic &&4250!F->getReturnType()->getScalarType()->isBFloatTy()) {4251rename(F);4252NewFn = Intrinsic::getDeclaration(F->getParent(), IID);4253SmallVector<Value *, 2> Args;4254for (size_t I = 0; I < NewFn->arg_size(); ++I) {4255Value *Arg = CI->getArgOperand(I);4256Type *OldType = Arg->getType();4257Type *NewType = NewFn->getArg(I)->getType();4258Args.push_back((OldType->isIntegerTy() &&4259NewType->getScalarType()->isBFloatTy())4260? Builder.CreateBitCast(Arg, NewType)4261: Arg);4262}4263Rep = Builder.CreateCall(NewFn, Args);4264if (F->getReturnType()->isIntegerTy())4265Rep = Builder.CreateBitCast(Rep, F->getReturnType());4266}4267}4268} else if (IsX86) {4269Rep = upgradeX86IntrinsicCall(Name, CI, F, Builder);4270} else if (IsARM) {4271Rep = upgradeARMIntrinsicCall(Name, CI, F, Builder);4272} else if (IsAMDGCN) {4273Rep = upgradeAMDGCNIntrinsicCall(Name, CI, F, Builder);4274} else if (IsDbg) {4275// We might have decided we don't want the new format after all between4276// first requesting the upgrade and now; skip the conversion if that is4277// the case, and check here to see if the intrinsic needs to be upgraded4278// normally.4279if (!CI->getModule()->IsNewDbgInfoFormat) {4280bool NeedsUpgrade =4281upgradeIntrinsicFunction1(CI->getCalledFunction(), NewFn, false);4282if (!NeedsUpgrade)4283return;4284FallthroughToDefaultUpgrade = true;4285} else {4286upgradeDbgIntrinsicToDbgRecord(Name, CI);4287}4288} else {4289llvm_unreachable("Unknown function for CallBase upgrade.");4290}42914292if (!FallthroughToDefaultUpgrade) {4293if (Rep)4294CI->replaceAllUsesWith(Rep);4295CI->eraseFromParent();4296return;4297}4298}42994300const auto &DefaultCase = [&]() -> void {4301if (CI->getFunctionType() == NewFn->getFunctionType()) {4302// Handle generic mangling change.4303assert(4304(CI->getCalledFunction()->getName() != NewFn->getName()) &&4305"Unknown function for CallBase upgrade and isn't just a name change");4306CI->setCalledFunction(NewFn);4307return;4308}43094310// This must be an upgrade from a named to a literal struct.4311if (auto *OldST = dyn_cast<StructType>(CI->getType())) {4312assert(OldST != NewFn->getReturnType() &&4313"Return type must have changed");4314assert(OldST->getNumElements() ==4315cast<StructType>(NewFn->getReturnType())->getNumElements() &&4316"Must have same number of elements");43174318SmallVector<Value *> Args(CI->args());4319Value *NewCI = Builder.CreateCall(NewFn, Args);4320Value *Res = PoisonValue::get(OldST);4321for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {4322Value *Elem = Builder.CreateExtractValue(NewCI, Idx);4323Res = Builder.CreateInsertValue(Res, Elem, Idx);4324}4325CI->replaceAllUsesWith(Res);4326CI->eraseFromParent();4327return;4328}43294330// We're probably about to produce something invalid. Let the verifier catch4331// it instead of dying here.4332CI->setCalledOperand(4333ConstantExpr::getPointerCast(NewFn, CI->getCalledOperand()->getType()));4334return;4335};4336CallInst *NewCall = nullptr;4337switch (NewFn->getIntrinsicID()) {4338default: {4339DefaultCase();4340return;4341}4342case Intrinsic::arm_neon_vst1:4343case Intrinsic::arm_neon_vst2:4344case Intrinsic::arm_neon_vst3:4345case Intrinsic::arm_neon_vst4:4346case Intrinsic::arm_neon_vst2lane:4347case Intrinsic::arm_neon_vst3lane:4348case Intrinsic::arm_neon_vst4lane: {4349SmallVector<Value *, 4> Args(CI->args());4350NewCall = Builder.CreateCall(NewFn, Args);4351break;4352}4353case Intrinsic::aarch64_sve_bfmlalb_lane_v2:4354case Intrinsic::aarch64_sve_bfmlalt_lane_v2:4355case Intrinsic::aarch64_sve_bfdot_lane_v2: {4356LLVMContext &Ctx = F->getParent()->getContext();4357SmallVector<Value *, 4> Args(CI->args());4358Args[3] = ConstantInt::get(Type::getInt32Ty(Ctx),4359cast<ConstantInt>(Args[3])->getZExtValue());4360NewCall = Builder.CreateCall(NewFn, Args);4361break;4362}4363case Intrinsic::aarch64_sve_ld3_sret:4364case Intrinsic::aarch64_sve_ld4_sret:4365case Intrinsic::aarch64_sve_ld2_sret: {4366StringRef Name = F->getName();4367Name = Name.substr(5);4368unsigned N = StringSwitch<unsigned>(Name)4369.StartsWith("aarch64.sve.ld2", 2)4370.StartsWith("aarch64.sve.ld3", 3)4371.StartsWith("aarch64.sve.ld4", 4)4372.Default(0);4373auto *RetTy = cast<ScalableVectorType>(F->getReturnType());4374unsigned MinElts = RetTy->getMinNumElements() / N;4375SmallVector<Value *, 2> Args(CI->args());4376Value *NewLdCall = Builder.CreateCall(NewFn, Args);4377Value *Ret = llvm::PoisonValue::get(RetTy);4378for (unsigned I = 0; I < N; I++) {4379Value *Idx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);4380Value *SRet = Builder.CreateExtractValue(NewLdCall, I);4381Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, Idx);4382}4383NewCall = dyn_cast<CallInst>(Ret);4384break;4385}43864387case Intrinsic::coro_end: {4388SmallVector<Value *, 3> Args(CI->args());4389Args.push_back(ConstantTokenNone::get(CI->getContext()));4390NewCall = Builder.CreateCall(NewFn, Args);4391break;4392}43934394case Intrinsic::vector_extract: {4395StringRef Name = F->getName();4396Name = Name.substr(5); // Strip llvm4397if (!Name.starts_with("aarch64.sve.tuple.get")) {4398DefaultCase();4399return;4400}4401auto *RetTy = cast<ScalableVectorType>(F->getReturnType());4402unsigned MinElts = RetTy->getMinNumElements();4403unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();4404Value *NewIdx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);4405NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0), NewIdx});4406break;4407}44084409case Intrinsic::vector_insert: {4410StringRef Name = F->getName();4411Name = Name.substr(5);4412if (!Name.starts_with("aarch64.sve.tuple")) {4413DefaultCase();4414return;4415}4416if (Name.starts_with("aarch64.sve.tuple.set")) {4417unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();4418auto *Ty = cast<ScalableVectorType>(CI->getArgOperand(2)->getType());4419Value *NewIdx =4420ConstantInt::get(Type::getInt64Ty(C), I * Ty->getMinNumElements());4421NewCall = Builder.CreateCall(4422NewFn, {CI->getArgOperand(0), CI->getArgOperand(2), NewIdx});4423break;4424}4425if (Name.starts_with("aarch64.sve.tuple.create")) {4426unsigned N = StringSwitch<unsigned>(Name)4427.StartsWith("aarch64.sve.tuple.create2", 2)4428.StartsWith("aarch64.sve.tuple.create3", 3)4429.StartsWith("aarch64.sve.tuple.create4", 4)4430.Default(0);4431assert(N > 1 && "Create is expected to be between 2-4");4432auto *RetTy = cast<ScalableVectorType>(F->getReturnType());4433Value *Ret = llvm::PoisonValue::get(RetTy);4434unsigned MinElts = RetTy->getMinNumElements() / N;4435for (unsigned I = 0; I < N; I++) {4436Value *Idx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);4437Value *V = CI->getArgOperand(I);4438Ret = Builder.CreateInsertVector(RetTy, Ret, V, Idx);4439}4440NewCall = dyn_cast<CallInst>(Ret);4441}4442break;4443}44444445case Intrinsic::arm_neon_bfdot:4446case Intrinsic::arm_neon_bfmmla:4447case Intrinsic::arm_neon_bfmlalb:4448case Intrinsic::arm_neon_bfmlalt:4449case Intrinsic::aarch64_neon_bfdot:4450case Intrinsic::aarch64_neon_bfmmla:4451case Intrinsic::aarch64_neon_bfmlalb:4452case Intrinsic::aarch64_neon_bfmlalt: {4453SmallVector<Value *, 3> Args;4454assert(CI->arg_size() == 3 &&4455"Mismatch between function args and call args");4456size_t OperandWidth =4457CI->getArgOperand(1)->getType()->getPrimitiveSizeInBits();4458assert((OperandWidth == 64 || OperandWidth == 128) &&4459"Unexpected operand width");4460Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);4461auto Iter = CI->args().begin();4462Args.push_back(*Iter++);4463Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));4464Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));4465NewCall = Builder.CreateCall(NewFn, Args);4466break;4467}44684469case Intrinsic::bitreverse:4470NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});4471break;44724473case Intrinsic::ctlz:4474case Intrinsic::cttz:4475assert(CI->arg_size() == 1 &&4476"Mismatch between function args and call args");4477NewCall =4478Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});4479break;44804481case Intrinsic::objectsize: {4482Value *NullIsUnknownSize =4483CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(2);4484Value *Dynamic =4485CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(3);4486NewCall = Builder.CreateCall(4487NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});4488break;4489}44904491case Intrinsic::ctpop:4492NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});4493break;44944495case Intrinsic::convert_from_fp16:4496NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});4497break;44984499case Intrinsic::dbg_value: {4500StringRef Name = F->getName();4501Name = Name.substr(5); // Strip llvm.4502// Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`.4503if (Name.starts_with("dbg.addr")) {4504DIExpression *Expr = cast<DIExpression>(4505cast<MetadataAsValue>(CI->getArgOperand(2))->getMetadata());4506Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);4507NewCall =4508Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),4509MetadataAsValue::get(C, Expr)});4510break;4511}45124513// Upgrade from the old version that had an extra offset argument.4514assert(CI->arg_size() == 4);4515// Drop nonzero offsets instead of attempting to upgrade them.4516if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1)))4517if (Offset->isZeroValue()) {4518NewCall = Builder.CreateCall(4519NewFn,4520{CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});4521break;4522}4523CI->eraseFromParent();4524return;4525}45264527case Intrinsic::ptr_annotation:4528// Upgrade from versions that lacked the annotation attribute argument.4529if (CI->arg_size() != 4) {4530DefaultCase();4531return;4532}45334534// Create a new call with an added null annotation attribute argument.4535NewCall =4536Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),4537CI->getArgOperand(2), CI->getArgOperand(3),4538Constant::getNullValue(Builder.getPtrTy())});4539NewCall->takeName(CI);4540CI->replaceAllUsesWith(NewCall);4541CI->eraseFromParent();4542return;45434544case Intrinsic::var_annotation:4545// Upgrade from versions that lacked the annotation attribute argument.4546if (CI->arg_size() != 4) {4547DefaultCase();4548return;4549}4550// Create a new call with an added null annotation attribute argument.4551NewCall =4552Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),4553CI->getArgOperand(2), CI->getArgOperand(3),4554Constant::getNullValue(Builder.getPtrTy())});4555NewCall->takeName(CI);4556CI->replaceAllUsesWith(NewCall);4557CI->eraseFromParent();4558return;45594560case Intrinsic::riscv_aes32dsi:4561case Intrinsic::riscv_aes32dsmi:4562case Intrinsic::riscv_aes32esi:4563case Intrinsic::riscv_aes32esmi:4564case Intrinsic::riscv_sm4ks:4565case Intrinsic::riscv_sm4ed: {4566// The last argument to these intrinsics used to be i8 and changed to i32.4567// The type overload for sm4ks and sm4ed was removed.4568Value *Arg2 = CI->getArgOperand(2);4569if (Arg2->getType()->isIntegerTy(32) && !CI->getType()->isIntegerTy(64))4570return;45714572Value *Arg0 = CI->getArgOperand(0);4573Value *Arg1 = CI->getArgOperand(1);4574if (CI->getType()->isIntegerTy(64)) {4575Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());4576Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());4577}45784579Arg2 = ConstantInt::get(Type::getInt32Ty(C),4580cast<ConstantInt>(Arg2)->getZExtValue());45814582NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});4583Value *Res = NewCall;4584if (Res->getType() != CI->getType())4585Res = Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);4586NewCall->takeName(CI);4587CI->replaceAllUsesWith(Res);4588CI->eraseFromParent();4589return;4590}4591case Intrinsic::riscv_sha256sig0:4592case Intrinsic::riscv_sha256sig1:4593case Intrinsic::riscv_sha256sum0:4594case Intrinsic::riscv_sha256sum1:4595case Intrinsic::riscv_sm3p0:4596case Intrinsic::riscv_sm3p1: {4597// The last argument to these intrinsics used to be i8 and changed to i32.4598// The type overload for sm4ks and sm4ed was removed.4599if (!CI->getType()->isIntegerTy(64))4600return;46014602Value *Arg =4603Builder.CreateTrunc(CI->getArgOperand(0), Builder.getInt32Ty());46044605NewCall = Builder.CreateCall(NewFn, Arg);4606Value *Res =4607Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);4608NewCall->takeName(CI);4609CI->replaceAllUsesWith(Res);4610CI->eraseFromParent();4611return;4612}46134614case Intrinsic::x86_xop_vfrcz_ss:4615case Intrinsic::x86_xop_vfrcz_sd:4616NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});4617break;46184619case Intrinsic::x86_xop_vpermil2pd:4620case Intrinsic::x86_xop_vpermil2ps:4621case Intrinsic::x86_xop_vpermil2pd_256:4622case Intrinsic::x86_xop_vpermil2ps_256: {4623SmallVector<Value *, 4> Args(CI->args());4624VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());4625VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);4626Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);4627NewCall = Builder.CreateCall(NewFn, Args);4628break;4629}46304631case Intrinsic::x86_sse41_ptestc:4632case Intrinsic::x86_sse41_ptestz:4633case Intrinsic::x86_sse41_ptestnzc: {4634// The arguments for these intrinsics used to be v4f32, and changed4635// to v2i64. This is purely a nop, since those are bitwise intrinsics.4636// So, the only thing required is a bitcast for both arguments.4637// First, check the arguments have the old type.4638Value *Arg0 = CI->getArgOperand(0);4639if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))4640return;46414642// Old intrinsic, add bitcasts4643Value *Arg1 = CI->getArgOperand(1);46444645auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);46464647Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");4648Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");46494650NewCall = Builder.CreateCall(NewFn, {BC0, BC1});4651break;4652}46534654case Intrinsic::x86_rdtscp: {4655// This used to take 1 arguments. If we have no arguments, it is already4656// upgraded.4657if (CI->getNumOperands() == 0)4658return;46594660NewCall = Builder.CreateCall(NewFn);4661// Extract the second result and store it.4662Value *Data = Builder.CreateExtractValue(NewCall, 1);4663// Cast the pointer to the right type.4664Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(0),4665llvm::PointerType::getUnqual(Data->getType()));4666Builder.CreateAlignedStore(Data, Ptr, Align(1));4667// Replace the original call result with the first result of the new call.4668Value *TSC = Builder.CreateExtractValue(NewCall, 0);46694670NewCall->takeName(CI);4671CI->replaceAllUsesWith(TSC);4672CI->eraseFromParent();4673return;4674}46754676case Intrinsic::x86_sse41_insertps:4677case Intrinsic::x86_sse41_dppd:4678case Intrinsic::x86_sse41_dpps:4679case Intrinsic::x86_sse41_mpsadbw:4680case Intrinsic::x86_avx_dp_ps_256:4681case Intrinsic::x86_avx2_mpsadbw: {4682// Need to truncate the last argument from i32 to i8 -- this argument models4683// an inherently 8-bit immediate operand to these x86 instructions.4684SmallVector<Value *, 4> Args(CI->args());46854686// Replace the last argument with a trunc.4687Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");4688NewCall = Builder.CreateCall(NewFn, Args);4689break;4690}46914692case Intrinsic::x86_avx512_mask_cmp_pd_128:4693case Intrinsic::x86_avx512_mask_cmp_pd_256:4694case Intrinsic::x86_avx512_mask_cmp_pd_512:4695case Intrinsic::x86_avx512_mask_cmp_ps_128:4696case Intrinsic::x86_avx512_mask_cmp_ps_256:4697case Intrinsic::x86_avx512_mask_cmp_ps_512: {4698SmallVector<Value *, 4> Args(CI->args());4699unsigned NumElts =4700cast<FixedVectorType>(Args[0]->getType())->getNumElements();4701Args[3] = getX86MaskVec(Builder, Args[3], NumElts);47024703NewCall = Builder.CreateCall(NewFn, Args);4704Value *Res = applyX86MaskOn1BitsVec(Builder, NewCall, nullptr);47054706NewCall->takeName(CI);4707CI->replaceAllUsesWith(Res);4708CI->eraseFromParent();4709return;4710}47114712case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:4713case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:4714case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:4715case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:4716case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:4717case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {4718SmallVector<Value *, 4> Args(CI->args());4719unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();4720if (NewFn->getIntrinsicID() ==4721Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)4722Args[1] = Builder.CreateBitCast(4723Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));47244725NewCall = Builder.CreateCall(NewFn, Args);4726Value *Res = Builder.CreateBitCast(4727NewCall, FixedVectorType::get(Builder.getInt16Ty(), NumElts));47284729NewCall->takeName(CI);4730CI->replaceAllUsesWith(Res);4731CI->eraseFromParent();4732return;4733}4734case Intrinsic::x86_avx512bf16_dpbf16ps_128:4735case Intrinsic::x86_avx512bf16_dpbf16ps_256:4736case Intrinsic::x86_avx512bf16_dpbf16ps_512:{4737SmallVector<Value *, 4> Args(CI->args());4738unsigned NumElts =4739cast<FixedVectorType>(CI->getType())->getNumElements() * 2;4740Args[1] = Builder.CreateBitCast(4741Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));4742Args[2] = Builder.CreateBitCast(4743Args[2], FixedVectorType::get(Builder.getBFloatTy(), NumElts));47444745NewCall = Builder.CreateCall(NewFn, Args);4746break;4747}47484749case Intrinsic::thread_pointer: {4750NewCall = Builder.CreateCall(NewFn, {});4751break;4752}47534754case Intrinsic::memcpy:4755case Intrinsic::memmove:4756case Intrinsic::memset: {4757// We have to make sure that the call signature is what we're expecting.4758// We only want to change the old signatures by removing the alignment arg:4759// @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)4760// -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)4761// @llvm.memset...(i8*, i8, i[32|64], i32, i1)4762// -> @llvm.memset...(i8*, i8, i[32|64], i1)4763// Note: i8*'s in the above can be any pointer type4764if (CI->arg_size() != 5) {4765DefaultCase();4766return;4767}4768// Remove alignment argument (3), and add alignment attributes to the4769// dest/src pointers.4770Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),4771CI->getArgOperand(2), CI->getArgOperand(4)};4772NewCall = Builder.CreateCall(NewFn, Args);4773AttributeList OldAttrs = CI->getAttributes();4774AttributeList NewAttrs = AttributeList::get(4775C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),4776{OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),4777OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});4778NewCall->setAttributes(NewAttrs);4779auto *MemCI = cast<MemIntrinsic>(NewCall);4780// All mem intrinsics support dest alignment.4781const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3));4782MemCI->setDestAlignment(Align->getMaybeAlignValue());4783// Memcpy/Memmove also support source alignment.4784if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))4785MTI->setSourceAlignment(Align->getMaybeAlignValue());4786break;4787}4788}4789assert(NewCall && "Should have either set this variable or returned through "4790"the default case");4791NewCall->takeName(CI);4792CI->replaceAllUsesWith(NewCall);4793CI->eraseFromParent();4794}47954796void llvm::UpgradeCallsToIntrinsic(Function *F) {4797assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");47984799// Check if this function should be upgraded and get the replacement function4800// if there is one.4801Function *NewFn;4802if (UpgradeIntrinsicFunction(F, NewFn)) {4803// Replace all users of the old function with the new function or new4804// instructions. This is not a range loop because the call is deleted.4805for (User *U : make_early_inc_range(F->users()))4806if (CallBase *CB = dyn_cast<CallBase>(U))4807UpgradeIntrinsicCall(CB, NewFn);48084809// Remove old function, no longer used, from the module.4810F->eraseFromParent();4811}4812}48134814MDNode *llvm::UpgradeTBAANode(MDNode &MD) {4815const unsigned NumOperands = MD.getNumOperands();4816if (NumOperands == 0)4817return &MD; // Invalid, punt to a verifier error.48184819// Check if the tag uses struct-path aware TBAA format.4820if (isa<MDNode>(MD.getOperand(0)) && NumOperands >= 3)4821return &MD;48224823auto &Context = MD.getContext();4824if (NumOperands == 3) {4825Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};4826MDNode *ScalarType = MDNode::get(Context, Elts);4827// Create a MDNode <ScalarType, ScalarType, offset 0, const>4828Metadata *Elts2[] = {ScalarType, ScalarType,4829ConstantAsMetadata::get(4830Constant::getNullValue(Type::getInt64Ty(Context))),4831MD.getOperand(2)};4832return MDNode::get(Context, Elts2);4833}4834// Create a MDNode <MD, MD, offset 0>4835Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(Constant::getNullValue(4836Type::getInt64Ty(Context)))};4837return MDNode::get(Context, Elts);4838}48394840Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,4841Instruction *&Temp) {4842if (Opc != Instruction::BitCast)4843return nullptr;48444845Temp = nullptr;4846Type *SrcTy = V->getType();4847if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&4848SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {4849LLVMContext &Context = V->getContext();48504851// We have no information about target data layout, so we assume that4852// the maximum pointer size is 64bit.4853Type *MidTy = Type::getInt64Ty(Context);4854Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);48554856return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);4857}48584859return nullptr;4860}48614862Constant *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {4863if (Opc != Instruction::BitCast)4864return nullptr;48654866Type *SrcTy = C->getType();4867if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&4868SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {4869LLVMContext &Context = C->getContext();48704871// We have no information about target data layout, so we assume that4872// the maximum pointer size is 64bit.4873Type *MidTy = Type::getInt64Ty(Context);48744875return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),4876DestTy);4877}48784879return nullptr;4880}48814882/// Check the debug info version number, if it is out-dated, drop the debug4883/// info. Return true if module is modified.4884bool llvm::UpgradeDebugInfo(Module &M) {4885if (DisableAutoUpgradeDebugInfo)4886return false;48874888unsigned Version = getDebugMetadataVersionFromModule(M);4889if (Version == DEBUG_METADATA_VERSION) {4890bool BrokenDebugInfo = false;4891if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))4892report_fatal_error("Broken module found, compilation aborted!");4893if (!BrokenDebugInfo)4894// Everything is ok.4895return false;4896else {4897// Diagnose malformed debug info.4898DiagnosticInfoIgnoringInvalidDebugMetadata Diag(M);4899M.getContext().diagnose(Diag);4900}4901}4902bool Modified = StripDebugInfo(M);4903if (Modified && Version != DEBUG_METADATA_VERSION) {4904// Diagnose a version mismatch.4905DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);4906M.getContext().diagnose(DiagVersion);4907}4908return Modified;4909}49104911/// This checks for objc retain release marker which should be upgraded. It4912/// returns true if module is modified.4913static bool upgradeRetainReleaseMarker(Module &M) {4914bool Changed = false;4915const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";4916NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);4917if (ModRetainReleaseMarker) {4918MDNode *Op = ModRetainReleaseMarker->getOperand(0);4919if (Op) {4920MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));4921if (ID) {4922SmallVector<StringRef, 4> ValueComp;4923ID->getString().split(ValueComp, "#");4924if (ValueComp.size() == 2) {4925std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();4926ID = MDString::get(M.getContext(), NewValue);4927}4928M.addModuleFlag(Module::Error, MarkerKey, ID);4929M.eraseNamedMetadata(ModRetainReleaseMarker);4930Changed = true;4931}4932}4933}4934return Changed;4935}49364937void llvm::UpgradeARCRuntime(Module &M) {4938// This lambda converts normal function calls to ARC runtime functions to4939// intrinsic calls.4940auto UpgradeToIntrinsic = [&](const char *OldFunc,4941llvm::Intrinsic::ID IntrinsicFunc) {4942Function *Fn = M.getFunction(OldFunc);49434944if (!Fn)4945return;49464947Function *NewFn = llvm::Intrinsic::getDeclaration(&M, IntrinsicFunc);49484949for (User *U : make_early_inc_range(Fn->users())) {4950CallInst *CI = dyn_cast<CallInst>(U);4951if (!CI || CI->getCalledFunction() != Fn)4952continue;49534954IRBuilder<> Builder(CI->getParent(), CI->getIterator());4955FunctionType *NewFuncTy = NewFn->getFunctionType();4956SmallVector<Value *, 2> Args;49574958// Don't upgrade the intrinsic if it's not valid to bitcast the return4959// value to the return type of the old function.4960if (NewFuncTy->getReturnType() != CI->getType() &&4961!CastInst::castIsValid(Instruction::BitCast, CI,4962NewFuncTy->getReturnType()))4963continue;49644965bool InvalidCast = false;49664967for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {4968Value *Arg = CI->getArgOperand(I);49694970// Bitcast argument to the parameter type of the new function if it's4971// not a variadic argument.4972if (I < NewFuncTy->getNumParams()) {4973// Don't upgrade the intrinsic if it's not valid to bitcast the argument4974// to the parameter type of the new function.4975if (!CastInst::castIsValid(Instruction::BitCast, Arg,4976NewFuncTy->getParamType(I))) {4977InvalidCast = true;4978break;4979}4980Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));4981}4982Args.push_back(Arg);4983}49844985if (InvalidCast)4986continue;49874988// Create a call instruction that calls the new function.4989CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);4990NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());4991NewCall->takeName(CI);49924993// Bitcast the return value back to the type of the old call.4994Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());49954996if (!CI->use_empty())4997CI->replaceAllUsesWith(NewRetVal);4998CI->eraseFromParent();4999}50005001if (Fn->use_empty())5002Fn->eraseFromParent();5003};50045005// Unconditionally convert a call to "clang.arc.use" to a call to5006// "llvm.objc.clang.arc.use".5007UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);50085009// Upgrade the retain release marker. If there is no need to upgrade5010// the marker, that means either the module is already new enough to contain5011// new intrinsics or it is not ARC. There is no need to upgrade runtime call.5012if (!upgradeRetainReleaseMarker(M))5013return;50145015std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {5016{"objc_autorelease", llvm::Intrinsic::objc_autorelease},5017{"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},5018{"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},5019{"objc_autoreleaseReturnValue",5020llvm::Intrinsic::objc_autoreleaseReturnValue},5021{"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},5022{"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},5023{"objc_initWeak", llvm::Intrinsic::objc_initWeak},5024{"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},5025{"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},5026{"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},5027{"objc_release", llvm::Intrinsic::objc_release},5028{"objc_retain", llvm::Intrinsic::objc_retain},5029{"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},5030{"objc_retainAutoreleaseReturnValue",5031llvm::Intrinsic::objc_retainAutoreleaseReturnValue},5032{"objc_retainAutoreleasedReturnValue",5033llvm::Intrinsic::objc_retainAutoreleasedReturnValue},5034{"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},5035{"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},5036{"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},5037{"objc_unsafeClaimAutoreleasedReturnValue",5038llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},5039{"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},5040{"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},5041{"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},5042{"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},5043{"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},5044{"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},5045{"objc_arc_annotation_topdown_bbstart",5046llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},5047{"objc_arc_annotation_topdown_bbend",5048llvm::Intrinsic::objc_arc_annotation_topdown_bbend},5049{"objc_arc_annotation_bottomup_bbstart",5050llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},5051{"objc_arc_annotation_bottomup_bbend",5052llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};50535054for (auto &I : RuntimeFuncs)5055UpgradeToIntrinsic(I.first, I.second);5056}50575058bool llvm::UpgradeModuleFlags(Module &M) {5059NamedMDNode *ModFlags = M.getModuleFlagsMetadata();5060if (!ModFlags)5061return false;50625063bool HasObjCFlag = false, HasClassProperties = false, Changed = false;5064bool HasSwiftVersionFlag = false;5065uint8_t SwiftMajorVersion, SwiftMinorVersion;5066uint32_t SwiftABIVersion;5067auto Int8Ty = Type::getInt8Ty(M.getContext());5068auto Int32Ty = Type::getInt32Ty(M.getContext());50695070for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {5071MDNode *Op = ModFlags->getOperand(I);5072if (Op->getNumOperands() != 3)5073continue;5074MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));5075if (!ID)5076continue;5077auto SetBehavior = [&](Module::ModFlagBehavior B) {5078Metadata *Ops[3] = {ConstantAsMetadata::get(ConstantInt::get(5079Type::getInt32Ty(M.getContext()), B)),5080MDString::get(M.getContext(), ID->getString()),5081Op->getOperand(2)};5082ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));5083Changed = true;5084};50855086if (ID->getString() == "Objective-C Image Info Version")5087HasObjCFlag = true;5088if (ID->getString() == "Objective-C Class Properties")5089HasClassProperties = true;5090// Upgrade PIC from Error/Max to Min.5091if (ID->getString() == "PIC Level") {5092if (auto *Behavior =5093mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {5094uint64_t V = Behavior->getLimitedValue();5095if (V == Module::Error || V == Module::Max)5096SetBehavior(Module::Min);5097}5098}5099// Upgrade "PIE Level" from Error to Max.5100if (ID->getString() == "PIE Level")5101if (auto *Behavior =5102mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0)))5103if (Behavior->getLimitedValue() == Module::Error)5104SetBehavior(Module::Max);51055106// Upgrade branch protection and return address signing module flags. The5107// module flag behavior for these fields were Error and now they are Min.5108if (ID->getString() == "branch-target-enforcement" ||5109ID->getString().starts_with("sign-return-address")) {5110if (auto *Behavior =5111mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {5112if (Behavior->getLimitedValue() == Module::Error) {5113Type *Int32Ty = Type::getInt32Ty(M.getContext());5114Metadata *Ops[3] = {5115ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Min)),5116Op->getOperand(1), Op->getOperand(2)};5117ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));5118Changed = true;5119}5120}5121}51225123// Upgrade Objective-C Image Info Section. Removed the whitespce in the5124// section name so that llvm-lto will not complain about mismatching5125// module flags that is functionally the same.5126if (ID->getString() == "Objective-C Image Info Section") {5127if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {5128SmallVector<StringRef, 4> ValueComp;5129Value->getString().split(ValueComp, " ");5130if (ValueComp.size() != 1) {5131std::string NewValue;5132for (auto &S : ValueComp)5133NewValue += S.str();5134Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),5135MDString::get(M.getContext(), NewValue)};5136ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));5137Changed = true;5138}5139}5140}51415142// IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.5143// If the higher bits are set, it adds new module flag for swift info.5144if (ID->getString() == "Objective-C Garbage Collection") {5145auto Md = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));5146if (Md) {5147assert(Md->getValue() && "Expected non-empty metadata");5148auto Type = Md->getValue()->getType();5149if (Type == Int8Ty)5150continue;5151unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();5152if ((Val & 0xff) != Val) {5153HasSwiftVersionFlag = true;5154SwiftABIVersion = (Val & 0xff00) >> 8;5155SwiftMajorVersion = (Val & 0xff000000) >> 24;5156SwiftMinorVersion = (Val & 0xff0000) >> 16;5157}5158Metadata *Ops[3] = {5159ConstantAsMetadata::get(ConstantInt::get(Int32Ty,Module::Error)),5160Op->getOperand(1),5161ConstantAsMetadata::get(ConstantInt::get(Int8Ty,Val & 0xff))};5162ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));5163Changed = true;5164}5165}51665167if (ID->getString() == "amdgpu_code_object_version") {5168Metadata *Ops[3] = {5169Op->getOperand(0),5170MDString::get(M.getContext(), "amdhsa_code_object_version"),5171Op->getOperand(2)};5172ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));5173Changed = true;5174}5175}51765177// "Objective-C Class Properties" is recently added for Objective-C. We5178// upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module5179// flag of value 0, so we can correclty downgrade this flag when trying to5180// link an ObjC bitcode without this module flag with an ObjC bitcode with5181// this module flag.5182if (HasObjCFlag && !HasClassProperties) {5183M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",5184(uint32_t)0);5185Changed = true;5186}51875188if (HasSwiftVersionFlag) {5189M.addModuleFlag(Module::Error, "Swift ABI Version",5190SwiftABIVersion);5191M.addModuleFlag(Module::Error, "Swift Major Version",5192ConstantInt::get(Int8Ty, SwiftMajorVersion));5193M.addModuleFlag(Module::Error, "Swift Minor Version",5194ConstantInt::get(Int8Ty, SwiftMinorVersion));5195Changed = true;5196}51975198return Changed;5199}52005201void llvm::UpgradeSectionAttributes(Module &M) {5202auto TrimSpaces = [](StringRef Section) -> std::string {5203SmallVector<StringRef, 5> Components;5204Section.split(Components, ',');52055206SmallString<32> Buffer;5207raw_svector_ostream OS(Buffer);52085209for (auto Component : Components)5210OS << ',' << Component.trim();52115212return std::string(OS.str().substr(1));5213};52145215for (auto &GV : M.globals()) {5216if (!GV.hasSection())5217continue;52185219StringRef Section = GV.getSection();52205221if (!Section.starts_with("__DATA, __objc_catlist"))5222continue;52235224// __DATA, __objc_catlist, regular, no_dead_strip5225// __DATA,__objc_catlist,regular,no_dead_strip5226GV.setSection(TrimSpaces(Section));5227}5228}52295230namespace {5231// Prior to LLVM 10.0, the strictfp attribute could be used on individual5232// callsites within a function that did not also have the strictfp attribute.5233// Since 10.0, if strict FP semantics are needed within a function, the5234// function must have the strictfp attribute and all calls within the function5235// must also have the strictfp attribute. This latter restriction is5236// necessary to prevent unwanted libcall simplification when a function is5237// being cloned (such as for inlining).5238//5239// The "dangling" strictfp attribute usage was only used to prevent constant5240// folding and other libcall simplification. The nobuiltin attribute on the5241// callsite has the same effect.5242struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {5243StrictFPUpgradeVisitor() = default;52445245void visitCallBase(CallBase &Call) {5246if (!Call.isStrictFP())5247return;5248if (isa<ConstrainedFPIntrinsic>(&Call))5249return;5250// If we get here, the caller doesn't have the strictfp attribute5251// but this callsite does. Replace the strictfp attribute with nobuiltin.5252Call.removeFnAttr(Attribute::StrictFP);5253Call.addFnAttr(Attribute::NoBuiltin);5254}5255};5256} // namespace52575258void llvm::UpgradeFunctionAttributes(Function &F) {5259// If a function definition doesn't have the strictfp attribute,5260// convert any callsite strictfp attributes to nobuiltin.5261if (!F.isDeclaration() && !F.hasFnAttribute(Attribute::StrictFP)) {5262StrictFPUpgradeVisitor SFPV;5263SFPV.visit(F);5264}52655266// Remove all incompatibile attributes from function.5267F.removeRetAttrs(AttributeFuncs::typeIncompatible(F.getReturnType()));5268for (auto &Arg : F.args())5269Arg.removeAttrs(AttributeFuncs::typeIncompatible(Arg.getType()));52705271// Older versions of LLVM treated an "implicit-section-name" attribute5272// similarly to directly setting the section on a Function.5273if (Attribute A = F.getFnAttribute("implicit-section-name");5274A.isValid() && A.isStringAttribute()) {5275F.setSection(A.getValueAsString());5276F.removeFnAttr("implicit-section-name");5277}5278}52795280static bool isOldLoopArgument(Metadata *MD) {5281auto *T = dyn_cast_or_null<MDTuple>(MD);5282if (!T)5283return false;5284if (T->getNumOperands() < 1)5285return false;5286auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));5287if (!S)5288return false;5289return S->getString().starts_with("llvm.vectorizer.");5290}52915292static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) {5293StringRef OldPrefix = "llvm.vectorizer.";5294assert(OldTag.starts_with(OldPrefix) && "Expected old prefix");52955296if (OldTag == "llvm.vectorizer.unroll")5297return MDString::get(C, "llvm.loop.interleave.count");52985299return MDString::get(5300C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))5301.str());5302}53035304static Metadata *upgradeLoopArgument(Metadata *MD) {5305auto *T = dyn_cast_or_null<MDTuple>(MD);5306if (!T)5307return MD;5308if (T->getNumOperands() < 1)5309return MD;5310auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));5311if (!OldTag)5312return MD;5313if (!OldTag->getString().starts_with("llvm.vectorizer."))5314return MD;53155316// This has an old tag. Upgrade it.5317SmallVector<Metadata *, 8> Ops;5318Ops.reserve(T->getNumOperands());5319Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));5320for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)5321Ops.push_back(T->getOperand(I));53225323return MDTuple::get(T->getContext(), Ops);5324}53255326MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {5327auto *T = dyn_cast<MDTuple>(&N);5328if (!T)5329return &N;53305331if (none_of(T->operands(), isOldLoopArgument))5332return &N;53335334SmallVector<Metadata *, 8> Ops;5335Ops.reserve(T->getNumOperands());5336for (Metadata *MD : T->operands())5337Ops.push_back(upgradeLoopArgument(MD));53385339return MDTuple::get(T->getContext(), Ops);5340}53415342std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {5343Triple T(TT);5344// The only data layout upgrades needed for pre-GCN, SPIR or SPIRV are setting5345// the address space of globals to 1. This does not apply to SPIRV Logical.5346if (((T.isAMDGPU() && !T.isAMDGCN()) ||5347(T.isSPIR() || (T.isSPIRV() && !T.isSPIRVLogical()))) &&5348!DL.contains("-G") && !DL.starts_with("G")) {5349return DL.empty() ? std::string("G1") : (DL + "-G1").str();5350}53515352if (T.isLoongArch64() || T.isRISCV64()) {5353// Make i32 a native type for 64-bit LoongArch and RISC-V.5354auto I = DL.find("-n64-");5355if (I != StringRef::npos)5356return (DL.take_front(I) + "-n32:64-" + DL.drop_front(I + 5)).str();5357return DL.str();5358}53595360std::string Res = DL.str();5361// AMDGCN data layout upgrades.5362if (T.isAMDGCN()) {5363// Define address spaces for constants.5364if (!DL.contains("-G") && !DL.starts_with("G"))5365Res.append(Res.empty() ? "G1" : "-G1");53665367// Add missing non-integral declarations.5368// This goes before adding new address spaces to prevent incoherent string5369// values.5370if (!DL.contains("-ni") && !DL.starts_with("ni"))5371Res.append("-ni:7:8:9");5372// Update ni:7 to ni:7:8:9.5373if (DL.ends_with("ni:7"))5374Res.append(":8:9");5375if (DL.ends_with("ni:7:8"))5376Res.append(":9");53775378// Add sizing for address spaces 7 and 8 (fat raw buffers and buffer5379// resources) An empty data layout has already been upgraded to G1 by now.5380if (!DL.contains("-p7") && !DL.starts_with("p7"))5381Res.append("-p7:160:256:256:32");5382if (!DL.contains("-p8") && !DL.starts_with("p8"))5383Res.append("-p8:128:128");5384if (!DL.contains("-p9") && !DL.starts_with("p9"))5385Res.append("-p9:192:256:256:32");53865387return Res;5388}53895390// AArch64 data layout upgrades.5391if (T.isAArch64()) {5392// Add "-Fn32"5393if (!DL.empty() && !DL.contains("-Fn32"))5394Res.append("-Fn32");5395return Res;5396}53975398if (!T.isX86())5399return Res;54005401// If the datalayout matches the expected format, add pointer size address5402// spaces to the datalayout.5403std::string AddrSpaces = "-p270:32:32-p271:32:32-p272:64:64";5404if (StringRef Ref = Res; !Ref.contains(AddrSpaces)) {5405SmallVector<StringRef, 4> Groups;5406Regex R("(e-m:[a-z](-p:32:32)?)(-[if]64:.*$)");5407if (R.match(Res, &Groups))5408Res = (Groups[1] + AddrSpaces + Groups[3]).str();5409}54105411// i128 values need to be 16-byte-aligned. LLVM already called into libgcc5412// for i128 operations prior to this being reflected in the data layout, and5413// clang mostly produced LLVM IR that already aligned i128 to 16 byte5414// boundaries, so although this is a breaking change, the upgrade is expected5415// to fix more IR than it breaks.5416// Intel MCU is an exception and uses 4-byte-alignment.5417if (!T.isOSIAMCU()) {5418std::string I128 = "-i128:128";5419if (StringRef Ref = Res; !Ref.contains(I128)) {5420SmallVector<StringRef, 4> Groups;5421Regex R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");5422if (R.match(Res, &Groups))5423Res = (Groups[1] + I128 + Groups[3]).str();5424}5425}54265427// For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.5428// Raising the alignment is safe because Clang did not produce f80 values in5429// the MSVC environment before this upgrade was added.5430if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) {5431StringRef Ref = Res;5432auto I = Ref.find("-f80:32-");5433if (I != StringRef::npos)5434Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str();5435}54365437return Res;5438}54395440void llvm::UpgradeAttributes(AttrBuilder &B) {5441StringRef FramePointer;5442Attribute A = B.getAttribute("no-frame-pointer-elim");5443if (A.isValid()) {5444// The value can be "true" or "false".5445FramePointer = A.getValueAsString() == "true" ? "all" : "none";5446B.removeAttribute("no-frame-pointer-elim");5447}5448if (B.contains("no-frame-pointer-elim-non-leaf")) {5449// The value is ignored. "no-frame-pointer-elim"="true" takes priority.5450if (FramePointer != "all")5451FramePointer = "non-leaf";5452B.removeAttribute("no-frame-pointer-elim-non-leaf");5453}5454if (!FramePointer.empty())5455B.addAttribute("frame-pointer", FramePointer);54565457A = B.getAttribute("null-pointer-is-valid");5458if (A.isValid()) {5459// The value can be "true" or "false".5460bool NullPointerIsValid = A.getValueAsString() == "true";5461B.removeAttribute("null-pointer-is-valid");5462if (NullPointerIsValid)5463B.addAttribute(Attribute::NullPointerIsValid);5464}5465}54665467void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) {5468// clang.arc.attachedcall bundles are now required to have an operand.5469// If they don't, it's okay to drop them entirely: when there is an operand,5470// the "attachedcall" is meaningful and required, but without an operand,5471// it's just a marker NOP. Dropping it merely prevents an optimization.5472erase_if(Bundles, [&](OperandBundleDef &OBD) {5473return OBD.getTag() == "clang.arc.attachedcall" &&5474OBD.inputs().empty();5475});5476}547754785479