Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchOptWInstrs.cpp
35267 views
1
//===- LoongArchOptWInstrs.cpp - MI W instruction optimizations ----------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===---------------------------------------------------------------------===//
8
//
9
// This pass does some optimizations for *W instructions at the MI level.
10
//
11
// First it removes unneeded sext(addi.w rd, rs, 0) instructions. Either
12
// because the sign extended bits aren't consumed or because the input was
13
// already sign extended by an earlier instruction.
14
//
15
// Then:
16
// 1. Unless explicit disabled or the target prefers instructions with W suffix,
17
// it removes the -w suffix from opw instructions whenever all users are
18
// dependent only on the lower word of the result of the instruction.
19
// The cases handled are:
20
// * addi.w because it helps reduce test differences between LA32 and LA64
21
// w/o being a pessimization.
22
//
23
// 2. Or if explicit enabled or the target prefers instructions with W suffix,
24
// it adds the W suffix to the instruction whenever all users are dependent
25
// only on the lower word of the result of the instruction.
26
// The cases handled are:
27
// * add.d/addi.d/sub.d/mul.d.
28
// * slli.d with imm < 32.
29
// * ld.d/ld.wu.
30
//===---------------------------------------------------------------------===//
31
32
#include "LoongArch.h"
33
#include "LoongArchMachineFunctionInfo.h"
34
#include "LoongArchSubtarget.h"
35
#include "llvm/ADT/SmallSet.h"
36
#include "llvm/ADT/Statistic.h"
37
#include "llvm/CodeGen/MachineFunctionPass.h"
38
#include "llvm/CodeGen/TargetInstrInfo.h"
39
40
using namespace llvm;
41
42
#define DEBUG_TYPE "loongarch-opt-w-instrs"
43
#define LOONGARCH_OPT_W_INSTRS_NAME "LoongArch Optimize W Instructions"
44
45
STATISTIC(NumRemovedSExtW, "Number of removed sign-extensions");
46
STATISTIC(NumTransformedToWInstrs,
47
"Number of instructions transformed to W-ops");
48
49
static cl::opt<bool>
50
DisableSExtWRemoval("loongarch-disable-sextw-removal",
51
cl::desc("Disable removal of sign-extend insn"),
52
cl::init(false), cl::Hidden);
53
static cl::opt<bool>
54
DisableCvtToDSuffix("loongarch-disable-cvt-to-d-suffix",
55
cl::desc("Disable convert to D suffix"),
56
cl::init(false), cl::Hidden);
57
58
namespace {
59
60
class LoongArchOptWInstrs : public MachineFunctionPass {
61
public:
62
static char ID;
63
64
LoongArchOptWInstrs() : MachineFunctionPass(ID) {}
65
66
bool runOnMachineFunction(MachineFunction &MF) override;
67
bool removeSExtWInstrs(MachineFunction &MF, const LoongArchInstrInfo &TII,
68
const LoongArchSubtarget &ST,
69
MachineRegisterInfo &MRI);
70
bool convertToDSuffixes(MachineFunction &MF, const LoongArchInstrInfo &TII,
71
const LoongArchSubtarget &ST,
72
MachineRegisterInfo &MRI);
73
bool convertToWSuffixes(MachineFunction &MF, const LoongArchInstrInfo &TII,
74
const LoongArchSubtarget &ST,
75
MachineRegisterInfo &MRI);
76
77
void getAnalysisUsage(AnalysisUsage &AU) const override {
78
AU.setPreservesCFG();
79
MachineFunctionPass::getAnalysisUsage(AU);
80
}
81
82
StringRef getPassName() const override { return LOONGARCH_OPT_W_INSTRS_NAME; }
83
};
84
85
} // end anonymous namespace
86
87
char LoongArchOptWInstrs::ID = 0;
88
INITIALIZE_PASS(LoongArchOptWInstrs, DEBUG_TYPE, LOONGARCH_OPT_W_INSTRS_NAME,
89
false, false)
90
91
FunctionPass *llvm::createLoongArchOptWInstrsPass() {
92
return new LoongArchOptWInstrs();
93
}
94
95
// Checks if all users only demand the lower \p OrigBits of the original
96
// instruction's result.
97
// TODO: handle multiple interdependent transformations
98
static bool hasAllNBitUsers(const MachineInstr &OrigMI,
99
const LoongArchSubtarget &ST,
100
const MachineRegisterInfo &MRI, unsigned OrigBits) {
101
102
SmallSet<std::pair<const MachineInstr *, unsigned>, 4> Visited;
103
SmallVector<std::pair<const MachineInstr *, unsigned>, 4> Worklist;
104
105
Worklist.push_back(std::make_pair(&OrigMI, OrigBits));
106
107
while (!Worklist.empty()) {
108
auto P = Worklist.pop_back_val();
109
const MachineInstr *MI = P.first;
110
unsigned Bits = P.second;
111
112
if (!Visited.insert(P).second)
113
continue;
114
115
// Only handle instructions with one def.
116
if (MI->getNumExplicitDefs() != 1)
117
return false;
118
119
Register DestReg = MI->getOperand(0).getReg();
120
if (!DestReg.isVirtual())
121
return false;
122
123
for (auto &UserOp : MRI.use_nodbg_operands(DestReg)) {
124
const MachineInstr *UserMI = UserOp.getParent();
125
unsigned OpIdx = UserOp.getOperandNo();
126
127
switch (UserMI->getOpcode()) {
128
default:
129
// TODO: Add vector
130
return false;
131
132
case LoongArch::ADD_W:
133
case LoongArch::ADDI_W:
134
case LoongArch::SUB_W:
135
case LoongArch::ALSL_W:
136
case LoongArch::ALSL_WU:
137
case LoongArch::MUL_W:
138
case LoongArch::MULH_W:
139
case LoongArch::MULH_WU:
140
case LoongArch::MULW_D_W:
141
case LoongArch::MULW_D_WU:
142
// TODO: {DIV,MOD}.{W,WU} consumes the upper 32 bits before LA664+.
143
// case LoongArch::DIV_W:
144
// case LoongArch::DIV_WU:
145
// case LoongArch::MOD_W:
146
// case LoongArch::MOD_WU:
147
case LoongArch::SLL_W:
148
case LoongArch::SLLI_W:
149
case LoongArch::SRL_W:
150
case LoongArch::SRLI_W:
151
case LoongArch::SRA_W:
152
case LoongArch::SRAI_W:
153
case LoongArch::ROTR_W:
154
case LoongArch::ROTRI_W:
155
case LoongArch::CLO_W:
156
case LoongArch::CLZ_W:
157
case LoongArch::CTO_W:
158
case LoongArch::CTZ_W:
159
case LoongArch::BYTEPICK_W:
160
case LoongArch::REVB_2H:
161
case LoongArch::BITREV_4B:
162
case LoongArch::BITREV_W:
163
case LoongArch::BSTRINS_W:
164
case LoongArch::BSTRPICK_W:
165
case LoongArch::CRC_W_W_W:
166
case LoongArch::CRCC_W_W_W:
167
case LoongArch::MOVGR2FCSR:
168
case LoongArch::MOVGR2FRH_W:
169
case LoongArch::MOVGR2FR_W_64:
170
if (Bits >= 32)
171
break;
172
return false;
173
case LoongArch::MOVGR2CF:
174
if (Bits >= 1)
175
break;
176
return false;
177
case LoongArch::EXT_W_B:
178
if (Bits >= 8)
179
break;
180
return false;
181
case LoongArch::EXT_W_H:
182
if (Bits >= 16)
183
break;
184
return false;
185
186
case LoongArch::SRLI_D: {
187
// If we are shifting right by less than Bits, and users don't demand
188
// any bits that were shifted into [Bits-1:0], then we can consider this
189
// as an N-Bit user.
190
unsigned ShAmt = UserMI->getOperand(2).getImm();
191
if (Bits > ShAmt) {
192
Worklist.push_back(std::make_pair(UserMI, Bits - ShAmt));
193
break;
194
}
195
return false;
196
}
197
198
// these overwrite higher input bits, otherwise the lower word of output
199
// depends only on the lower word of input. So check their uses read W.
200
case LoongArch::SLLI_D:
201
if (Bits >= (ST.getGRLen() - UserMI->getOperand(2).getImm()))
202
break;
203
Worklist.push_back(std::make_pair(UserMI, Bits));
204
break;
205
case LoongArch::ANDI: {
206
uint64_t Imm = UserMI->getOperand(2).getImm();
207
if (Bits >= (unsigned)llvm::bit_width(Imm))
208
break;
209
Worklist.push_back(std::make_pair(UserMI, Bits));
210
break;
211
}
212
case LoongArch::ORI: {
213
uint64_t Imm = UserMI->getOperand(2).getImm();
214
if (Bits >= (unsigned)llvm::bit_width<uint64_t>(~Imm))
215
break;
216
Worklist.push_back(std::make_pair(UserMI, Bits));
217
break;
218
}
219
220
case LoongArch::SLL_D:
221
// Operand 2 is the shift amount which uses log2(grlen) bits.
222
if (OpIdx == 2) {
223
if (Bits >= Log2_32(ST.getGRLen()))
224
break;
225
return false;
226
}
227
Worklist.push_back(std::make_pair(UserMI, Bits));
228
break;
229
230
case LoongArch::SRA_D:
231
case LoongArch::SRL_D:
232
case LoongArch::ROTR_D:
233
// Operand 2 is the shift amount which uses 6 bits.
234
if (OpIdx == 2 && Bits >= Log2_32(ST.getGRLen()))
235
break;
236
return false;
237
238
case LoongArch::ST_B:
239
case LoongArch::STX_B:
240
case LoongArch::STGT_B:
241
case LoongArch::STLE_B:
242
case LoongArch::IOCSRWR_B:
243
// The first argument is the value to store.
244
if (OpIdx == 0 && Bits >= 8)
245
break;
246
return false;
247
case LoongArch::ST_H:
248
case LoongArch::STX_H:
249
case LoongArch::STGT_H:
250
case LoongArch::STLE_H:
251
case LoongArch::IOCSRWR_H:
252
// The first argument is the value to store.
253
if (OpIdx == 0 && Bits >= 16)
254
break;
255
return false;
256
case LoongArch::ST_W:
257
case LoongArch::STX_W:
258
case LoongArch::SCREL_W:
259
case LoongArch::STPTR_W:
260
case LoongArch::STGT_W:
261
case LoongArch::STLE_W:
262
case LoongArch::IOCSRWR_W:
263
// The first argument is the value to store.
264
if (OpIdx == 0 && Bits >= 32)
265
break;
266
return false;
267
268
case LoongArch::CRC_W_B_W:
269
case LoongArch::CRCC_W_B_W:
270
if ((OpIdx == 1 && Bits >= 8) || (OpIdx == 2 && Bits >= 32))
271
break;
272
return false;
273
case LoongArch::CRC_W_H_W:
274
case LoongArch::CRCC_W_H_W:
275
if ((OpIdx == 1 && Bits >= 16) || (OpIdx == 2 && Bits >= 32))
276
break;
277
return false;
278
case LoongArch::CRC_W_D_W:
279
case LoongArch::CRCC_W_D_W:
280
if (OpIdx == 2 && Bits >= 32)
281
break;
282
return false;
283
284
// For these, lower word of output in these operations, depends only on
285
// the lower word of input. So, we check all uses only read lower word.
286
case LoongArch::COPY:
287
case LoongArch::PHI:
288
case LoongArch::ADD_D:
289
case LoongArch::ADDI_D:
290
case LoongArch::SUB_D:
291
case LoongArch::MUL_D:
292
case LoongArch::AND:
293
case LoongArch::OR:
294
case LoongArch::NOR:
295
case LoongArch::XOR:
296
case LoongArch::XORI:
297
case LoongArch::ANDN:
298
case LoongArch::ORN:
299
Worklist.push_back(std::make_pair(UserMI, Bits));
300
break;
301
302
case LoongArch::MASKNEZ:
303
case LoongArch::MASKEQZ:
304
if (OpIdx != 1)
305
return false;
306
Worklist.push_back(std::make_pair(UserMI, Bits));
307
break;
308
}
309
}
310
}
311
312
return true;
313
}
314
315
static bool hasAllWUsers(const MachineInstr &OrigMI,
316
const LoongArchSubtarget &ST,
317
const MachineRegisterInfo &MRI) {
318
return hasAllNBitUsers(OrigMI, ST, MRI, 32);
319
}
320
321
// This function returns true if the machine instruction always outputs a value
322
// where bits 63:32 match bit 31.
323
static bool isSignExtendingOpW(const MachineInstr &MI,
324
const MachineRegisterInfo &MRI, unsigned OpNo) {
325
switch (MI.getOpcode()) {
326
// Normal cases
327
case LoongArch::ADD_W:
328
case LoongArch::SUB_W:
329
case LoongArch::ADDI_W:
330
case LoongArch::ALSL_W:
331
case LoongArch::LU12I_W:
332
case LoongArch::SLT:
333
case LoongArch::SLTU:
334
case LoongArch::SLTI:
335
case LoongArch::SLTUI:
336
case LoongArch::ANDI:
337
case LoongArch::MUL_W:
338
case LoongArch::MULH_W:
339
case LoongArch::MULH_WU:
340
case LoongArch::DIV_W:
341
case LoongArch::MOD_W:
342
case LoongArch::DIV_WU:
343
case LoongArch::MOD_WU:
344
case LoongArch::SLL_W:
345
case LoongArch::SRL_W:
346
case LoongArch::SRA_W:
347
case LoongArch::ROTR_W:
348
case LoongArch::SLLI_W:
349
case LoongArch::SRLI_W:
350
case LoongArch::SRAI_W:
351
case LoongArch::ROTRI_W:
352
case LoongArch::EXT_W_B:
353
case LoongArch::EXT_W_H:
354
case LoongArch::CLO_W:
355
case LoongArch::CLZ_W:
356
case LoongArch::CTO_W:
357
case LoongArch::CTZ_W:
358
case LoongArch::BYTEPICK_W:
359
case LoongArch::REVB_2H:
360
case LoongArch::BITREV_4B:
361
case LoongArch::BITREV_W:
362
case LoongArch::BSTRINS_W:
363
case LoongArch::BSTRPICK_W:
364
case LoongArch::LD_B:
365
case LoongArch::LD_H:
366
case LoongArch::LD_W:
367
case LoongArch::LD_BU:
368
case LoongArch::LD_HU:
369
case LoongArch::LL_W:
370
case LoongArch::LLACQ_W:
371
case LoongArch::RDTIMEL_W:
372
case LoongArch::RDTIMEH_W:
373
case LoongArch::CPUCFG:
374
case LoongArch::LDX_B:
375
case LoongArch::LDX_H:
376
case LoongArch::LDX_W:
377
case LoongArch::LDX_BU:
378
case LoongArch::LDX_HU:
379
case LoongArch::LDPTR_W:
380
case LoongArch::LDGT_B:
381
case LoongArch::LDGT_H:
382
case LoongArch::LDGT_W:
383
case LoongArch::LDLE_B:
384
case LoongArch::LDLE_H:
385
case LoongArch::LDLE_W:
386
case LoongArch::AMSWAP_B:
387
case LoongArch::AMSWAP_H:
388
case LoongArch::AMSWAP_W:
389
case LoongArch::AMADD_B:
390
case LoongArch::AMADD_H:
391
case LoongArch::AMADD_W:
392
case LoongArch::AMAND_W:
393
case LoongArch::AMOR_W:
394
case LoongArch::AMXOR_W:
395
case LoongArch::AMMAX_W:
396
case LoongArch::AMMIN_W:
397
case LoongArch::AMMAX_WU:
398
case LoongArch::AMMIN_WU:
399
case LoongArch::AMSWAP__DB_B:
400
case LoongArch::AMSWAP__DB_H:
401
case LoongArch::AMSWAP__DB_W:
402
case LoongArch::AMADD__DB_B:
403
case LoongArch::AMADD__DB_H:
404
case LoongArch::AMADD__DB_W:
405
case LoongArch::AMAND__DB_W:
406
case LoongArch::AMOR__DB_W:
407
case LoongArch::AMXOR__DB_W:
408
case LoongArch::AMMAX__DB_W:
409
case LoongArch::AMMIN__DB_W:
410
case LoongArch::AMMAX__DB_WU:
411
case LoongArch::AMMIN__DB_WU:
412
case LoongArch::AMCAS_B:
413
case LoongArch::AMCAS_H:
414
case LoongArch::AMCAS_W:
415
case LoongArch::AMCAS__DB_B:
416
case LoongArch::AMCAS__DB_H:
417
case LoongArch::AMCAS__DB_W:
418
case LoongArch::CRC_W_B_W:
419
case LoongArch::CRC_W_H_W:
420
case LoongArch::CRC_W_W_W:
421
case LoongArch::CRC_W_D_W:
422
case LoongArch::CRCC_W_B_W:
423
case LoongArch::CRCC_W_H_W:
424
case LoongArch::CRCC_W_W_W:
425
case LoongArch::CRCC_W_D_W:
426
case LoongArch::IOCSRRD_B:
427
case LoongArch::IOCSRRD_H:
428
case LoongArch::IOCSRRD_W:
429
case LoongArch::MOVFR2GR_S:
430
case LoongArch::MOVFCSR2GR:
431
case LoongArch::MOVCF2GR:
432
case LoongArch::MOVFRH2GR_S:
433
case LoongArch::MOVFR2GR_S_64:
434
// TODO: Add vector
435
return true;
436
// Special cases that require checking operands.
437
// shifting right sufficiently makes the value 32-bit sign-extended
438
case LoongArch::SRAI_D:
439
return MI.getOperand(2).getImm() >= 32;
440
case LoongArch::SRLI_D:
441
return MI.getOperand(2).getImm() > 32;
442
// The LI pattern ADDI rd, R0, imm and ORI rd, R0, imm are sign extended.
443
case LoongArch::ADDI_D:
444
case LoongArch::ORI:
445
return MI.getOperand(1).isReg() &&
446
MI.getOperand(1).getReg() == LoongArch::R0;
447
// A bits extract is sign extended if the msb is less than 31.
448
case LoongArch::BSTRPICK_D:
449
return MI.getOperand(2).getImm() < 31;
450
// Copying from R0 produces zero.
451
case LoongArch::COPY:
452
return MI.getOperand(1).getReg() == LoongArch::R0;
453
// Ignore the scratch register destination.
454
case LoongArch::PseudoMaskedAtomicSwap32:
455
case LoongArch::PseudoAtomicSwap32:
456
case LoongArch::PseudoMaskedAtomicLoadAdd32:
457
case LoongArch::PseudoMaskedAtomicLoadSub32:
458
case LoongArch::PseudoAtomicLoadNand32:
459
case LoongArch::PseudoMaskedAtomicLoadNand32:
460
case LoongArch::PseudoAtomicLoadAdd32:
461
case LoongArch::PseudoAtomicLoadSub32:
462
case LoongArch::PseudoAtomicLoadAnd32:
463
case LoongArch::PseudoAtomicLoadOr32:
464
case LoongArch::PseudoAtomicLoadXor32:
465
case LoongArch::PseudoMaskedAtomicLoadUMax32:
466
case LoongArch::PseudoMaskedAtomicLoadUMin32:
467
case LoongArch::PseudoCmpXchg32:
468
case LoongArch::PseudoMaskedCmpXchg32:
469
case LoongArch::PseudoMaskedAtomicLoadMax32:
470
case LoongArch::PseudoMaskedAtomicLoadMin32:
471
return OpNo == 0;
472
}
473
474
return false;
475
}
476
477
static bool isSignExtendedW(Register SrcReg, const LoongArchSubtarget &ST,
478
const MachineRegisterInfo &MRI,
479
SmallPtrSetImpl<MachineInstr *> &FixableDef) {
480
SmallSet<Register, 4> Visited;
481
SmallVector<Register, 4> Worklist;
482
483
auto AddRegToWorkList = [&](Register SrcReg) {
484
if (!SrcReg.isVirtual())
485
return false;
486
Worklist.push_back(SrcReg);
487
return true;
488
};
489
490
if (!AddRegToWorkList(SrcReg))
491
return false;
492
493
while (!Worklist.empty()) {
494
Register Reg = Worklist.pop_back_val();
495
496
// If we already visited this register, we don't need to check it again.
497
if (!Visited.insert(Reg).second)
498
continue;
499
500
MachineInstr *MI = MRI.getVRegDef(Reg);
501
if (!MI)
502
continue;
503
504
int OpNo = MI->findRegisterDefOperandIdx(Reg, /*TRI=*/nullptr);
505
assert(OpNo != -1 && "Couldn't find register");
506
507
// If this is a sign extending operation we don't need to look any further.
508
if (isSignExtendingOpW(*MI, MRI, OpNo))
509
continue;
510
511
// Is this an instruction that propagates sign extend?
512
switch (MI->getOpcode()) {
513
default:
514
// Unknown opcode, give up.
515
return false;
516
case LoongArch::COPY: {
517
const MachineFunction *MF = MI->getMF();
518
const LoongArchMachineFunctionInfo *LAFI =
519
MF->getInfo<LoongArchMachineFunctionInfo>();
520
521
// If this is the entry block and the register is livein, see if we know
522
// it is sign extended.
523
if (MI->getParent() == &MF->front()) {
524
Register VReg = MI->getOperand(0).getReg();
525
if (MF->getRegInfo().isLiveIn(VReg) && LAFI->isSExt32Register(VReg))
526
continue;
527
}
528
529
Register CopySrcReg = MI->getOperand(1).getReg();
530
if (CopySrcReg == LoongArch::R4) {
531
// For a method return value, we check the ZExt/SExt flags in attribute.
532
// We assume the following code sequence for method call.
533
// PseudoCALL @bar, ...
534
// ADJCALLSTACKUP 0, 0, implicit-def dead $r3, implicit $r3
535
// %0:gpr = COPY $r4
536
//
537
// We use the PseudoCall to look up the IR function being called to find
538
// its return attributes.
539
const MachineBasicBlock *MBB = MI->getParent();
540
auto II = MI->getIterator();
541
if (II == MBB->instr_begin() ||
542
(--II)->getOpcode() != LoongArch::ADJCALLSTACKUP)
543
return false;
544
545
const MachineInstr &CallMI = *(--II);
546
if (!CallMI.isCall() || !CallMI.getOperand(0).isGlobal())
547
return false;
548
549
auto *CalleeFn =
550
dyn_cast_if_present<Function>(CallMI.getOperand(0).getGlobal());
551
if (!CalleeFn)
552
return false;
553
554
auto *IntTy = dyn_cast<IntegerType>(CalleeFn->getReturnType());
555
if (!IntTy)
556
return false;
557
558
const AttributeSet &Attrs = CalleeFn->getAttributes().getRetAttrs();
559
unsigned BitWidth = IntTy->getBitWidth();
560
if ((BitWidth <= 32 && Attrs.hasAttribute(Attribute::SExt)) ||
561
(BitWidth < 32 && Attrs.hasAttribute(Attribute::ZExt)))
562
continue;
563
}
564
565
if (!AddRegToWorkList(CopySrcReg))
566
return false;
567
568
break;
569
}
570
571
// For these, we just need to check if the 1st operand is sign extended.
572
case LoongArch::MOD_D:
573
case LoongArch::ANDI:
574
case LoongArch::ORI:
575
case LoongArch::XORI:
576
// |Remainder| is always <= |Dividend|. If D is 32-bit, then so is R.
577
// DIV doesn't work because of the edge case 0xf..f 8000 0000 / (long)-1
578
// Logical operations use a sign extended 12-bit immediate.
579
if (!AddRegToWorkList(MI->getOperand(1).getReg()))
580
return false;
581
582
break;
583
case LoongArch::MOD_DU:
584
case LoongArch::AND:
585
case LoongArch::OR:
586
case LoongArch::XOR:
587
case LoongArch::ANDN:
588
case LoongArch::ORN:
589
case LoongArch::PHI: {
590
// If all incoming values are sign-extended, the output of AND, OR, XOR,
591
// or PHI is also sign-extended.
592
593
// The input registers for PHI are operand 1, 3, ...
594
// The input registers for others are operand 1 and 2.
595
unsigned B = 1, E = 3, D = 1;
596
switch (MI->getOpcode()) {
597
case LoongArch::PHI:
598
E = MI->getNumOperands();
599
D = 2;
600
break;
601
}
602
603
for (unsigned I = B; I != E; I += D) {
604
if (!MI->getOperand(I).isReg())
605
return false;
606
607
if (!AddRegToWorkList(MI->getOperand(I).getReg()))
608
return false;
609
}
610
611
break;
612
}
613
614
case LoongArch::MASKEQZ:
615
case LoongArch::MASKNEZ:
616
// Instructions return zero or operand 1. Result is sign extended if
617
// operand 1 is sign extended.
618
if (!AddRegToWorkList(MI->getOperand(1).getReg()))
619
return false;
620
break;
621
622
// With these opcode, we can "fix" them with the W-version
623
// if we know all users of the result only rely on bits 31:0
624
case LoongArch::SLLI_D:
625
// SLLI_W reads the lowest 5 bits, while SLLI_D reads lowest 6 bits
626
if (MI->getOperand(2).getImm() >= 32)
627
return false;
628
[[fallthrough]];
629
case LoongArch::ADDI_D:
630
case LoongArch::ADD_D:
631
case LoongArch::LD_D:
632
case LoongArch::LD_WU:
633
case LoongArch::MUL_D:
634
case LoongArch::SUB_D:
635
if (hasAllWUsers(*MI, ST, MRI)) {
636
FixableDef.insert(MI);
637
break;
638
}
639
return false;
640
// If all incoming values are sign-extended and all users only use
641
// the lower 32 bits, then convert them to W versions.
642
case LoongArch::DIV_D: {
643
if (!AddRegToWorkList(MI->getOperand(1).getReg()))
644
return false;
645
if (!AddRegToWorkList(MI->getOperand(2).getReg()))
646
return false;
647
if (hasAllWUsers(*MI, ST, MRI)) {
648
FixableDef.insert(MI);
649
break;
650
}
651
return false;
652
}
653
}
654
}
655
656
// If we get here, then every node we visited produces a sign extended value
657
// or propagated sign extended values. So the result must be sign extended.
658
return true;
659
}
660
661
static unsigned getWOp(unsigned Opcode) {
662
switch (Opcode) {
663
case LoongArch::ADDI_D:
664
return LoongArch::ADDI_W;
665
case LoongArch::ADD_D:
666
return LoongArch::ADD_W;
667
case LoongArch::DIV_D:
668
return LoongArch::DIV_W;
669
case LoongArch::LD_D:
670
case LoongArch::LD_WU:
671
return LoongArch::LD_W;
672
case LoongArch::MUL_D:
673
return LoongArch::MUL_W;
674
case LoongArch::SLLI_D:
675
return LoongArch::SLLI_W;
676
case LoongArch::SUB_D:
677
return LoongArch::SUB_W;
678
default:
679
llvm_unreachable("Unexpected opcode for replacement with W variant");
680
}
681
}
682
683
bool LoongArchOptWInstrs::removeSExtWInstrs(MachineFunction &MF,
684
const LoongArchInstrInfo &TII,
685
const LoongArchSubtarget &ST,
686
MachineRegisterInfo &MRI) {
687
if (DisableSExtWRemoval)
688
return false;
689
690
bool MadeChange = false;
691
for (MachineBasicBlock &MBB : MF) {
692
for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
693
// We're looking for the sext.w pattern ADDI.W rd, rs, 0.
694
if (!LoongArch::isSEXT_W(MI))
695
continue;
696
697
Register SrcReg = MI.getOperand(1).getReg();
698
699
SmallPtrSet<MachineInstr *, 4> FixableDefs;
700
701
// If all users only use the lower bits, this sext.w is redundant.
702
// Or if all definitions reaching MI sign-extend their output,
703
// then sext.w is redundant.
704
if (!hasAllWUsers(MI, ST, MRI) &&
705
!isSignExtendedW(SrcReg, ST, MRI, FixableDefs))
706
continue;
707
708
Register DstReg = MI.getOperand(0).getReg();
709
if (!MRI.constrainRegClass(SrcReg, MRI.getRegClass(DstReg)))
710
continue;
711
712
// Convert Fixable instructions to their W versions.
713
for (MachineInstr *Fixable : FixableDefs) {
714
LLVM_DEBUG(dbgs() << "Replacing " << *Fixable);
715
Fixable->setDesc(TII.get(getWOp(Fixable->getOpcode())));
716
Fixable->clearFlag(MachineInstr::MIFlag::NoSWrap);
717
Fixable->clearFlag(MachineInstr::MIFlag::NoUWrap);
718
Fixable->clearFlag(MachineInstr::MIFlag::IsExact);
719
LLVM_DEBUG(dbgs() << " with " << *Fixable);
720
++NumTransformedToWInstrs;
721
}
722
723
LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n");
724
MRI.replaceRegWith(DstReg, SrcReg);
725
MRI.clearKillFlags(SrcReg);
726
MI.eraseFromParent();
727
++NumRemovedSExtW;
728
MadeChange = true;
729
}
730
}
731
732
return MadeChange;
733
}
734
735
bool LoongArchOptWInstrs::convertToDSuffixes(MachineFunction &MF,
736
const LoongArchInstrInfo &TII,
737
const LoongArchSubtarget &ST,
738
MachineRegisterInfo &MRI) {
739
bool MadeChange = false;
740
for (MachineBasicBlock &MBB : MF) {
741
for (MachineInstr &MI : MBB) {
742
unsigned Opc;
743
switch (MI.getOpcode()) {
744
default:
745
continue;
746
case LoongArch::ADDI_W:
747
Opc = LoongArch::ADDI_D;
748
break;
749
}
750
751
if (hasAllWUsers(MI, ST, MRI)) {
752
MI.setDesc(TII.get(Opc));
753
MadeChange = true;
754
}
755
}
756
}
757
758
return MadeChange;
759
}
760
761
bool LoongArchOptWInstrs::convertToWSuffixes(MachineFunction &MF,
762
const LoongArchInstrInfo &TII,
763
const LoongArchSubtarget &ST,
764
MachineRegisterInfo &MRI) {
765
bool MadeChange = false;
766
for (MachineBasicBlock &MBB : MF) {
767
for (MachineInstr &MI : MBB) {
768
unsigned WOpc;
769
// TODO: Add more?
770
switch (MI.getOpcode()) {
771
default:
772
continue;
773
case LoongArch::ADD_D:
774
WOpc = LoongArch::ADD_W;
775
break;
776
case LoongArch::ADDI_D:
777
WOpc = LoongArch::ADDI_W;
778
break;
779
case LoongArch::SUB_D:
780
WOpc = LoongArch::SUB_W;
781
break;
782
case LoongArch::MUL_D:
783
WOpc = LoongArch::MUL_W;
784
break;
785
case LoongArch::SLLI_D:
786
// SLLI.W reads the lowest 5 bits, while SLLI.D reads lowest 6 bits
787
if (MI.getOperand(2).getImm() >= 32)
788
continue;
789
WOpc = LoongArch::SLLI_W;
790
break;
791
case LoongArch::LD_D:
792
case LoongArch::LD_WU:
793
WOpc = LoongArch::LD_W;
794
break;
795
}
796
797
if (hasAllWUsers(MI, ST, MRI)) {
798
LLVM_DEBUG(dbgs() << "Replacing " << MI);
799
MI.setDesc(TII.get(WOpc));
800
MI.clearFlag(MachineInstr::MIFlag::NoSWrap);
801
MI.clearFlag(MachineInstr::MIFlag::NoUWrap);
802
MI.clearFlag(MachineInstr::MIFlag::IsExact);
803
LLVM_DEBUG(dbgs() << " with " << MI);
804
++NumTransformedToWInstrs;
805
MadeChange = true;
806
}
807
}
808
}
809
810
return MadeChange;
811
}
812
813
bool LoongArchOptWInstrs::runOnMachineFunction(MachineFunction &MF) {
814
if (skipFunction(MF.getFunction()))
815
return false;
816
817
MachineRegisterInfo &MRI = MF.getRegInfo();
818
const LoongArchSubtarget &ST = MF.getSubtarget<LoongArchSubtarget>();
819
const LoongArchInstrInfo &TII = *ST.getInstrInfo();
820
821
if (!ST.is64Bit())
822
return false;
823
824
bool MadeChange = false;
825
MadeChange |= removeSExtWInstrs(MF, TII, ST, MRI);
826
827
if (!(DisableCvtToDSuffix || ST.preferWInst()))
828
MadeChange |= convertToDSuffixes(MF, TII, ST, MRI);
829
830
if (ST.preferWInst())
831
MadeChange |= convertToWSuffixes(MF, TII, ST, MRI);
832
833
return MadeChange;
834
}
835
836