Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchMergeBaseOffset.cpp
213799 views
1
//===---- LoongArchMergeBaseOffset.cpp - Optimise address calculations ----===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// Merge the offset of address calculation into the offset field
10
// of instructions in a global address lowering sequence.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#include "LoongArch.h"
15
#include "LoongArchTargetMachine.h"
16
#include "llvm/CodeGen/MachineFunctionPass.h"
17
#include "llvm/CodeGen/Passes.h"
18
#include "llvm/MC/TargetRegistry.h"
19
#include "llvm/Support/Debug.h"
20
#include "llvm/Target/TargetOptions.h"
21
#include <optional>
22
23
using namespace llvm;
24
25
#define DEBUG_TYPE "loongarch-merge-base-offset"
26
#define LoongArch_MERGE_BASE_OFFSET_NAME "LoongArch Merge Base Offset"
27
28
namespace {
29
30
class LoongArchMergeBaseOffsetOpt : public MachineFunctionPass {
31
const LoongArchSubtarget *ST = nullptr;
32
MachineRegisterInfo *MRI;
33
34
public:
35
static char ID;
36
bool runOnMachineFunction(MachineFunction &Fn) override;
37
bool detectFoldable(MachineInstr &Hi20, MachineInstr *&Lo12,
38
MachineInstr *&Lo20, MachineInstr *&Hi12,
39
MachineInstr *&Last);
40
bool detectFoldable(MachineInstr &Hi20, MachineInstr *&Add,
41
MachineInstr *&Lo12);
42
43
bool detectAndFoldOffset(MachineInstr &Hi20, MachineInstr &Lo12,
44
MachineInstr *&Lo20, MachineInstr *&Hi12,
45
MachineInstr *&Last);
46
void foldOffset(MachineInstr &Hi20, MachineInstr &Lo12, MachineInstr *&Lo20,
47
MachineInstr *&Hi12, MachineInstr *&Last, MachineInstr &Tail,
48
int64_t Offset);
49
bool foldLargeOffset(MachineInstr &Hi20, MachineInstr &Lo12,
50
MachineInstr *&Lo20, MachineInstr *&Hi12,
51
MachineInstr *&Last, MachineInstr &TailAdd,
52
Register GAReg);
53
54
bool foldIntoMemoryOps(MachineInstr &Hi20, MachineInstr &Lo12,
55
MachineInstr *&Lo20, MachineInstr *&Hi12,
56
MachineInstr *&Last);
57
58
LoongArchMergeBaseOffsetOpt() : MachineFunctionPass(ID) {}
59
60
MachineFunctionProperties getRequiredProperties() const override {
61
return MachineFunctionProperties().setIsSSA();
62
}
63
64
void getAnalysisUsage(AnalysisUsage &AU) const override {
65
AU.setPreservesCFG();
66
MachineFunctionPass::getAnalysisUsage(AU);
67
}
68
69
StringRef getPassName() const override {
70
return LoongArch_MERGE_BASE_OFFSET_NAME;
71
}
72
};
73
} // end anonymous namespace
74
75
char LoongArchMergeBaseOffsetOpt::ID = 0;
76
INITIALIZE_PASS(LoongArchMergeBaseOffsetOpt, DEBUG_TYPE,
77
LoongArch_MERGE_BASE_OFFSET_NAME, false, false)
78
79
// Detect either of the patterns:
80
//
81
// 1. (small/medium):
82
// pcalau12i vreg1, %pc_hi20(s)
83
// addi.d vreg2, vreg1, %pc_lo12(s)
84
//
85
// 2. (large):
86
// pcalau12i vreg1, %pc_hi20(s)
87
// addi.d vreg2, $zero, %pc_lo12(s)
88
// lu32i.d vreg3, vreg2, %pc64_lo20(s)
89
// lu52i.d vreg4, vreg3, %pc64_hi12(s)
90
// add.d vreg5, vreg4, vreg1
91
92
// The pattern is only accepted if:
93
// 1) For small and medium pattern, the first instruction has only one use,
94
// which is the ADDI.
95
// 2) For large pattern, the first four instructions each have only one use,
96
// and the user of the fourth instruction is ADD.
97
// 3) The address operands have the appropriate type, reflecting the
98
// lowering of a global address or constant pool using the pattern.
99
// 4) The offset value in the Global Address or Constant Pool is 0.
100
bool LoongArchMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi20,
101
MachineInstr *&Lo12,
102
MachineInstr *&Lo20,
103
MachineInstr *&Hi12,
104
MachineInstr *&Last) {
105
if (Hi20.getOpcode() != LoongArch::PCALAU12I)
106
return false;
107
108
const MachineOperand &Hi20Op1 = Hi20.getOperand(1);
109
if (LoongArchII::getDirectFlags(Hi20Op1) != LoongArchII::MO_PCREL_HI)
110
return false;
111
112
auto isGlobalOrCPIOrBlockAddress = [](const MachineOperand &Op) {
113
return Op.isGlobal() || Op.isCPI() || Op.isBlockAddress();
114
};
115
116
if (!isGlobalOrCPIOrBlockAddress(Hi20Op1) || Hi20Op1.getOffset() != 0)
117
return false;
118
119
Register HiDestReg = Hi20.getOperand(0).getReg();
120
if (!MRI->hasOneUse(HiDestReg))
121
return false;
122
123
MachineInstr *UseInst = &*MRI->use_instr_begin(HiDestReg);
124
if (UseInst->getOpcode() != LoongArch::ADD_D) {
125
Lo12 = UseInst;
126
if ((ST->is64Bit() && Lo12->getOpcode() != LoongArch::ADDI_D) ||
127
(!ST->is64Bit() && Lo12->getOpcode() != LoongArch::ADDI_W))
128
return false;
129
} else {
130
assert(ST->is64Bit());
131
Last = UseInst;
132
133
Register LastOp1Reg = Last->getOperand(1).getReg();
134
if (!LastOp1Reg.isVirtual())
135
return false;
136
Hi12 = MRI->getVRegDef(LastOp1Reg);
137
const MachineOperand &Hi12Op2 = Hi12->getOperand(2);
138
if (Hi12Op2.getTargetFlags() != LoongArchII::MO_PCREL64_HI)
139
return false;
140
if (!isGlobalOrCPIOrBlockAddress(Hi12Op2) || Hi12Op2.getOffset() != 0)
141
return false;
142
if (!MRI->hasOneUse(Hi12->getOperand(0).getReg()))
143
return false;
144
145
Lo20 = MRI->getVRegDef(Hi12->getOperand(1).getReg());
146
const MachineOperand &Lo20Op2 = Lo20->getOperand(2);
147
if (Lo20Op2.getTargetFlags() != LoongArchII::MO_PCREL64_LO)
148
return false;
149
if (!isGlobalOrCPIOrBlockAddress(Lo20Op2) || Lo20Op2.getOffset() != 0)
150
return false;
151
if (!MRI->hasOneUse(Lo20->getOperand(0).getReg()))
152
return false;
153
154
Lo12 = MRI->getVRegDef(Lo20->getOperand(1).getReg());
155
if (!MRI->hasOneUse(Lo12->getOperand(0).getReg()))
156
return false;
157
}
158
159
const MachineOperand &Lo12Op2 = Lo12->getOperand(2);
160
assert(Hi20.getOpcode() == LoongArch::PCALAU12I);
161
if (LoongArchII::getDirectFlags(Lo12Op2) != LoongArchII::MO_PCREL_LO ||
162
!(isGlobalOrCPIOrBlockAddress(Lo12Op2) || Lo12Op2.isMCSymbol()) ||
163
Lo12Op2.getOffset() != 0)
164
return false;
165
166
if (Hi20Op1.isGlobal()) {
167
LLVM_DEBUG(dbgs() << " Found lowered global address: "
168
<< *Hi20Op1.getGlobal() << "\n");
169
} else if (Hi20Op1.isBlockAddress()) {
170
LLVM_DEBUG(dbgs() << " Found lowered basic address: "
171
<< *Hi20Op1.getBlockAddress() << "\n");
172
} else if (Hi20Op1.isCPI()) {
173
LLVM_DEBUG(dbgs() << " Found lowered constant pool: " << Hi20Op1.getIndex()
174
<< "\n");
175
}
176
177
return true;
178
}
179
180
// Detect the pattern:
181
//
182
// (small/medium):
183
// lu12i.w vreg1, %le_hi20_r(s)
184
// add.w/d vreg2, vreg1, r2, %le_add_r(s)
185
// addi.w/d vreg3, vreg2, %le_lo12_r(s)
186
187
// The pattern is only accepted if:
188
// 1) The first instruction has only one use, which is the PseudoAddTPRel.
189
// The second instruction has only one use, which is the ADDI. The
190
// second instruction's last operand is the tp register.
191
// 2) The address operands have the appropriate type, reflecting the
192
// lowering of a thread_local global address using the pattern.
193
// 3) The offset value in the ThreadLocal Global Address is 0.
194
bool LoongArchMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi20,
195
MachineInstr *&Add,
196
MachineInstr *&Lo12) {
197
if (Hi20.getOpcode() != LoongArch::LU12I_W)
198
return false;
199
200
auto isGlobalOrCPI = [](const MachineOperand &Op) {
201
return Op.isGlobal() || Op.isCPI();
202
};
203
204
const MachineOperand &Hi20Op1 = Hi20.getOperand(1);
205
if (LoongArchII::getDirectFlags(Hi20Op1) != LoongArchII::MO_LE_HI_R ||
206
!isGlobalOrCPI(Hi20Op1) || Hi20Op1.getOffset() != 0)
207
return false;
208
209
Register HiDestReg = Hi20.getOperand(0).getReg();
210
if (!MRI->hasOneUse(HiDestReg))
211
return false;
212
213
Add = &*MRI->use_instr_begin(HiDestReg);
214
if ((ST->is64Bit() && Add->getOpcode() != LoongArch::PseudoAddTPRel_D) ||
215
(!ST->is64Bit() && Add->getOpcode() != LoongArch::PseudoAddTPRel_W))
216
return false;
217
218
if (Add->getOperand(2).getReg() != LoongArch::R2)
219
return false;
220
221
const MachineOperand &AddOp3 = Add->getOperand(3);
222
if (LoongArchII::getDirectFlags(AddOp3) != LoongArchII::MO_LE_ADD_R ||
223
!(isGlobalOrCPI(AddOp3) || AddOp3.isMCSymbol()) ||
224
AddOp3.getOffset() != 0)
225
return false;
226
227
Register AddDestReg = Add->getOperand(0).getReg();
228
if (!MRI->hasOneUse(AddDestReg))
229
return false;
230
231
Lo12 = &*MRI->use_instr_begin(AddDestReg);
232
if ((ST->is64Bit() && Lo12->getOpcode() != LoongArch::ADDI_D) ||
233
(!ST->is64Bit() && Lo12->getOpcode() != LoongArch::ADDI_W))
234
return false;
235
236
const MachineOperand &Lo12Op2 = Lo12->getOperand(2);
237
if (LoongArchII::getDirectFlags(Lo12Op2) != LoongArchII::MO_LE_LO_R ||
238
!(isGlobalOrCPI(Lo12Op2) || Lo12Op2.isMCSymbol()) ||
239
Lo12Op2.getOffset() != 0)
240
return false;
241
242
if (Hi20Op1.isGlobal()) {
243
LLVM_DEBUG(dbgs() << " Found lowered global address: "
244
<< *Hi20Op1.getGlobal() << "\n");
245
} else if (Hi20Op1.isCPI()) {
246
LLVM_DEBUG(dbgs() << " Found lowered constant pool: " << Hi20Op1.getIndex()
247
<< "\n");
248
}
249
250
return true;
251
}
252
253
// Update the offset in Hi20, (Add), Lo12, (Lo20 and Hi12) instructions.
254
// Delete the tail instruction and update all the uses to use the
255
// output from Last.
256
void LoongArchMergeBaseOffsetOpt::foldOffset(
257
MachineInstr &Hi20, MachineInstr &Lo12, MachineInstr *&Lo20,
258
MachineInstr *&Hi12, MachineInstr *&Last, MachineInstr &Tail,
259
int64_t Offset) {
260
// Put the offset back in Hi and the Lo
261
Hi20.getOperand(1).setOffset(Offset);
262
Lo12.getOperand(2).setOffset(Offset);
263
if (Lo20 && Hi12) {
264
Lo20->getOperand(2).setOffset(Offset);
265
Hi12->getOperand(2).setOffset(Offset);
266
}
267
268
// For tls-le, offset of the second PseudoAddTPRel instr should also be
269
// updated.
270
MachineInstr *Add = &*MRI->use_instr_begin(Hi20.getOperand(0).getReg());
271
if (Hi20.getOpcode() == LoongArch::LU12I_W)
272
Add->getOperand(3).setOffset(Offset);
273
274
// Delete the tail instruction.
275
MachineInstr *Def = Last ? Last : &Lo12;
276
MRI->constrainRegClass(Def->getOperand(0).getReg(),
277
MRI->getRegClass(Tail.getOperand(0).getReg()));
278
MRI->replaceRegWith(Tail.getOperand(0).getReg(), Def->getOperand(0).getReg());
279
Tail.eraseFromParent();
280
281
LLVM_DEBUG(dbgs() << " Merged offset " << Offset << " into base.\n"
282
<< " " << Hi20;);
283
if (Hi20.getOpcode() == LoongArch::LU12I_W) {
284
LLVM_DEBUG(dbgs() << " " << *Add;);
285
}
286
LLVM_DEBUG(dbgs() << " " << Lo12;);
287
if (Lo20 && Hi12) {
288
LLVM_DEBUG(dbgs() << " " << *Lo20 << " " << *Hi12;);
289
}
290
}
291
292
// Detect patterns for large offsets that are passed into an ADD instruction.
293
// If the pattern is found, updates the offset in Hi20, (Add), Lo12,
294
// (Lo20 and Hi12) instructions and deletes TailAdd and the instructions that
295
// produced the offset.
296
//
297
// (The instructions marked with "!" are not necessarily present)
298
//
299
// Base address lowering is of the form:
300
// 1) pcala:
301
// Hi20: pcalau12i vreg1, %pc_hi20(s)
302
// +--- Lo12: addi.d vreg2, vreg1, %pc_lo12(s)
303
// | Lo20: lu32i.d vreg2, %pc64_lo20(s) !
304
// +--- Hi12: lu52i.d vreg2, vreg2, %pc64_hi12(s) !
305
// |
306
// | 2) tls-le:
307
// | Hi20: lu12i.w vreg1, %le_hi20_r(s)
308
// | Add: add.w/d vreg1, vreg1, r2, %le_add_r(s)
309
// +--- Lo12: addi.w/d vreg2, vreg1, %le_lo12_r(s)
310
// |
311
// | The large offset can be one of the forms:
312
// |
313
// +-> 1) Offset that has non zero bits in Hi20 and Lo12 bits:
314
// | OffsetHi20: lu12i.w vreg3, 4
315
// | OffsetLo12: ori voff, vreg3, 188 ------------------+
316
// | |
317
// +-> 2) Offset that has non zero bits in Hi20 bits only: |
318
// | OffsetHi20: lu12i.w voff, 128 ------------------+
319
// | |
320
// +-> 3) Offset that has non zero bits in Lo20 bits: |
321
// | OffsetHi20: lu12i.w vreg3, 121 ! |
322
// | OffsetLo12: ori voff, vreg3, 122 ! |
323
// | OffsetLo20: lu32i.d voff, 123 ------------------+
324
// +-> 4) Offset that has non zero bits in Hi12 bits: |
325
// OffsetHi20: lu12i.w vreg3, 121 ! |
326
// OffsetLo12: ori voff, vreg3, 122 ! |
327
// OffsetLo20: lu32i.d vreg3, 123 ! |
328
// OffsetHi12: lu52i.d voff, vrg3, 124 ------------------+
329
// |
330
// TailAdd: add.d vreg4, vreg2, voff <------------------+
331
//
332
bool LoongArchMergeBaseOffsetOpt::foldLargeOffset(
333
MachineInstr &Hi20, MachineInstr &Lo12, MachineInstr *&Lo20,
334
MachineInstr *&Hi12, MachineInstr *&Last, MachineInstr &TailAdd,
335
Register GAReg) {
336
assert((TailAdd.getOpcode() == LoongArch::ADD_W ||
337
TailAdd.getOpcode() == LoongArch::ADD_D) &&
338
"Expected ADD instruction!");
339
Register Rs = TailAdd.getOperand(1).getReg();
340
Register Rt = TailAdd.getOperand(2).getReg();
341
Register Reg = Rs == GAReg ? Rt : Rs;
342
SmallVector<MachineInstr *, 4> Instrs;
343
int64_t Offset = 0;
344
int64_t Mask = -1;
345
346
// This can point to one of [ORI, LU12I.W, LU32I.D, LU52I.D]:
347
for (int i = 0; i < 4; i++) {
348
// Handle Reg is R0.
349
if (Reg == LoongArch::R0)
350
break;
351
352
// Can't fold if the register has more than one use.
353
if (!Reg.isVirtual() || !MRI->hasOneUse(Reg))
354
return false;
355
356
MachineInstr *Curr = MRI->getVRegDef(Reg);
357
if (!Curr)
358
break;
359
360
switch (Curr->getOpcode()) {
361
default:
362
// Can't fold if the instruction opcode is unexpected.
363
return false;
364
case LoongArch::ORI: {
365
MachineOperand ImmOp = Curr->getOperand(2);
366
if (ImmOp.getTargetFlags() != LoongArchII::MO_None)
367
return false;
368
Offset += ImmOp.getImm();
369
Reg = Curr->getOperand(1).getReg();
370
Instrs.push_back(Curr);
371
break;
372
}
373
case LoongArch::LU12I_W: {
374
MachineOperand ImmOp = Curr->getOperand(1);
375
if (ImmOp.getTargetFlags() != LoongArchII::MO_None)
376
return false;
377
Offset += SignExtend64<32>(ImmOp.getImm() << 12) & Mask;
378
Reg = LoongArch::R0;
379
Instrs.push_back(Curr);
380
break;
381
}
382
case LoongArch::LU32I_D: {
383
MachineOperand ImmOp = Curr->getOperand(2);
384
if (ImmOp.getTargetFlags() != LoongArchII::MO_None || !Lo20)
385
return false;
386
Offset += SignExtend64<52>(ImmOp.getImm() << 32) & Mask;
387
Mask ^= 0x000FFFFF00000000ULL;
388
Reg = Curr->getOperand(1).getReg();
389
Instrs.push_back(Curr);
390
break;
391
}
392
case LoongArch::LU52I_D: {
393
MachineOperand ImmOp = Curr->getOperand(2);
394
if (ImmOp.getTargetFlags() != LoongArchII::MO_None || !Hi12)
395
return false;
396
Offset += ImmOp.getImm() << 52;
397
Mask ^= 0xFFF0000000000000ULL;
398
Reg = Curr->getOperand(1).getReg();
399
Instrs.push_back(Curr);
400
break;
401
}
402
}
403
}
404
405
// Can't fold if the offset is not extracted.
406
if (!Offset)
407
return false;
408
409
foldOffset(Hi20, Lo12, Lo20, Hi12, Last, TailAdd, Offset);
410
LLVM_DEBUG(dbgs() << " Offset Instrs:\n");
411
for (auto I : Instrs) {
412
LLVM_DEBUG(dbgs() << " " << *I);
413
I->eraseFromParent();
414
}
415
416
return true;
417
}
418
419
bool LoongArchMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &Hi20,
420
MachineInstr &Lo12,
421
MachineInstr *&Lo20,
422
MachineInstr *&Hi12,
423
MachineInstr *&Last) {
424
Register DestReg =
425
Last ? Last->getOperand(0).getReg() : Lo12.getOperand(0).getReg();
426
427
// Look for arithmetic instructions we can get an offset from.
428
// We might be able to remove the arithmetic instructions by folding the
429
// offset into the PCALAU12I+(ADDI/ADDI+LU32I+LU52I) or
430
// LU12I_W+PseudoAddTPRel+ADDI.
431
if (!MRI->hasOneUse(DestReg))
432
return false;
433
434
// DestReg has only one use.
435
MachineInstr &Tail = *MRI->use_instr_begin(DestReg);
436
switch (Tail.getOpcode()) {
437
default:
438
LLVM_DEBUG(dbgs() << "Don't know how to get offset from this instr:"
439
<< Tail);
440
break;
441
case LoongArch::ADDI_W:
442
if (ST->is64Bit())
443
return false;
444
[[fallthrough]];
445
case LoongArch::ADDI_D:
446
case LoongArch::ADDU16I_D: {
447
// Offset is simply an immediate operand.
448
int64_t Offset = Tail.getOperand(2).getImm();
449
if (Tail.getOpcode() == LoongArch::ADDU16I_D)
450
Offset = SignExtend64<32>(Offset << 16);
451
452
// We might have two ADDIs in a row.
453
Register TailDestReg = Tail.getOperand(0).getReg();
454
if (MRI->hasOneUse(TailDestReg)) {
455
MachineInstr &TailTail = *MRI->use_instr_begin(TailDestReg);
456
if (ST->is64Bit() && TailTail.getOpcode() == LoongArch::ADDI_W)
457
return false;
458
if (TailTail.getOpcode() == LoongArch::ADDI_W ||
459
TailTail.getOpcode() == LoongArch::ADDI_D) {
460
Offset += TailTail.getOperand(2).getImm();
461
LLVM_DEBUG(dbgs() << " Offset Instrs: " << Tail << TailTail);
462
foldOffset(Hi20, Lo12, Lo20, Hi12, Last, TailTail, Offset);
463
Tail.eraseFromParent();
464
return true;
465
}
466
}
467
468
LLVM_DEBUG(dbgs() << " Offset Instr: " << Tail);
469
foldOffset(Hi20, Lo12, Lo20, Hi12, Last, Tail, Offset);
470
return true;
471
}
472
case LoongArch::ADD_W:
473
if (ST->is64Bit())
474
return false;
475
[[fallthrough]];
476
case LoongArch::ADD_D:
477
// The offset is too large to fit in the immediate field of ADDI.
478
return foldLargeOffset(Hi20, Lo12, Lo20, Hi12, Last, Tail, DestReg);
479
break;
480
}
481
482
return false;
483
}
484
485
// Memory access opcode mapping for transforms.
486
static unsigned getNewOpc(unsigned Op, bool isLarge) {
487
switch (Op) {
488
case LoongArch::LD_B:
489
return isLarge ? LoongArch::LDX_B : LoongArch::LD_B;
490
case LoongArch::LD_H:
491
return isLarge ? LoongArch::LDX_H : LoongArch::LD_H;
492
case LoongArch::LD_W:
493
case LoongArch::LDPTR_W:
494
return isLarge ? LoongArch::LDX_W : LoongArch::LD_W;
495
case LoongArch::LD_D:
496
case LoongArch::LDPTR_D:
497
return isLarge ? LoongArch::LDX_D : LoongArch::LD_D;
498
case LoongArch::LD_BU:
499
return isLarge ? LoongArch::LDX_BU : LoongArch::LD_BU;
500
case LoongArch::LD_HU:
501
return isLarge ? LoongArch::LDX_HU : LoongArch::LD_HU;
502
case LoongArch::LD_WU:
503
return isLarge ? LoongArch::LDX_WU : LoongArch::LD_WU;
504
case LoongArch::FLD_S:
505
return isLarge ? LoongArch::FLDX_S : LoongArch::FLD_S;
506
case LoongArch::FLD_D:
507
return isLarge ? LoongArch::FLDX_D : LoongArch::FLD_D;
508
case LoongArch::VLD:
509
return isLarge ? LoongArch::VLDX : LoongArch::VLD;
510
case LoongArch::XVLD:
511
return isLarge ? LoongArch::XVLDX : LoongArch::XVLD;
512
case LoongArch::VLDREPL_B:
513
return LoongArch::VLDREPL_B;
514
case LoongArch::XVLDREPL_B:
515
return LoongArch::XVLDREPL_B;
516
case LoongArch::ST_B:
517
return isLarge ? LoongArch::STX_B : LoongArch::ST_B;
518
case LoongArch::ST_H:
519
return isLarge ? LoongArch::STX_H : LoongArch::ST_H;
520
case LoongArch::ST_W:
521
case LoongArch::STPTR_W:
522
return isLarge ? LoongArch::STX_W : LoongArch::ST_W;
523
case LoongArch::ST_D:
524
case LoongArch::STPTR_D:
525
return isLarge ? LoongArch::STX_D : LoongArch::ST_D;
526
case LoongArch::FST_S:
527
return isLarge ? LoongArch::FSTX_S : LoongArch::FST_S;
528
case LoongArch::FST_D:
529
return isLarge ? LoongArch::FSTX_D : LoongArch::FST_D;
530
case LoongArch::VST:
531
return isLarge ? LoongArch::VSTX : LoongArch::VST;
532
case LoongArch::XVST:
533
return isLarge ? LoongArch::XVSTX : LoongArch::XVST;
534
default:
535
llvm_unreachable("Unexpected opcode for replacement");
536
}
537
}
538
539
bool LoongArchMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi20,
540
MachineInstr &Lo12,
541
MachineInstr *&Lo20,
542
MachineInstr *&Hi12,
543
MachineInstr *&Last) {
544
Register DestReg =
545
Last ? Last->getOperand(0).getReg() : Lo12.getOperand(0).getReg();
546
547
// If all the uses are memory ops with the same offset, we can transform:
548
//
549
// 1. (small/medium):
550
// 1.1. pcala
551
// pcalau12i vreg1, %pc_hi20(s)
552
// addi.d vreg2, vreg1, %pc_lo12(s)
553
// ld.w vreg3, 8(vreg2)
554
//
555
// =>
556
//
557
// pcalau12i vreg1, %pc_hi20(s+8)
558
// ld.w vreg3, vreg1, %pc_lo12(s+8)(vreg1)
559
//
560
// 1.2. tls-le
561
// lu12i.w vreg1, %le_hi20_r(s)
562
// add.w/d vreg2, vreg1, r2, %le_add_r(s)
563
// addi.w/d vreg3, vreg2, %le_lo12_r(s)
564
// ld.w vreg4, 8(vreg3)
565
//
566
// =>
567
//
568
// lu12i.w vreg1, %le_hi20_r(s+8)
569
// add.w/d vreg2, vreg1, r2, %le_add_r(s+8)
570
// ld.w vreg4, vreg2, %le_lo12_r(s+8)(vreg2)
571
//
572
// 2. (large):
573
// pcalau12i vreg1, %pc_hi20(s)
574
// addi.d vreg2, $zero, %pc_lo12(s)
575
// lu32i.d vreg3, vreg2, %pc64_lo20(s)
576
// lu52i.d vreg4, vreg3, %pc64_hi12(s)
577
// add.d vreg5, vreg4, vreg1
578
// ld.w vreg6, 8(vreg5)
579
//
580
// =>
581
//
582
// pcalau12i vreg1, %pc_hi20(s+8)
583
// addi.d vreg2, $zero, %pc_lo12(s+8)
584
// lu32i.d vreg3, vreg2, %pc64_lo20(s+8)
585
// lu52i.d vreg4, vreg3, %pc64_hi12(s+8)
586
// ldx.w vreg6, vreg4, vreg1
587
588
std::optional<int64_t> CommonOffset;
589
DenseMap<const MachineInstr *, SmallVector<unsigned>>
590
InlineAsmMemoryOpIndexesMap;
591
for (const MachineInstr &UseMI : MRI->use_instructions(DestReg)) {
592
switch (UseMI.getOpcode()) {
593
default:
594
LLVM_DEBUG(dbgs() << "Not a load or store instruction: " << UseMI);
595
return false;
596
case LoongArch::VLDREPL_B:
597
case LoongArch::XVLDREPL_B:
598
// We can't do this for large pattern.
599
if (Last)
600
return false;
601
[[fallthrough]];
602
case LoongArch::LD_B:
603
case LoongArch::LD_H:
604
case LoongArch::LD_W:
605
case LoongArch::LD_D:
606
case LoongArch::LD_BU:
607
case LoongArch::LD_HU:
608
case LoongArch::LD_WU:
609
case LoongArch::LDPTR_W:
610
case LoongArch::LDPTR_D:
611
case LoongArch::FLD_S:
612
case LoongArch::FLD_D:
613
case LoongArch::VLD:
614
case LoongArch::XVLD:
615
case LoongArch::ST_B:
616
case LoongArch::ST_H:
617
case LoongArch::ST_W:
618
case LoongArch::ST_D:
619
case LoongArch::STPTR_W:
620
case LoongArch::STPTR_D:
621
case LoongArch::FST_S:
622
case LoongArch::FST_D:
623
case LoongArch::VST:
624
case LoongArch::XVST: {
625
if (UseMI.getOperand(1).isFI())
626
return false;
627
// Register defined by Lo should not be the value register.
628
if (DestReg == UseMI.getOperand(0).getReg())
629
return false;
630
assert(DestReg == UseMI.getOperand(1).getReg() &&
631
"Expected base address use");
632
// All load/store instructions must use the same offset.
633
int64_t Offset = UseMI.getOperand(2).getImm();
634
if (CommonOffset && Offset != CommonOffset)
635
return false;
636
CommonOffset = Offset;
637
break;
638
}
639
case LoongArch::INLINEASM:
640
case LoongArch::INLINEASM_BR: {
641
// We can't do this for large pattern.
642
if (Last)
643
return false;
644
SmallVector<unsigned> InlineAsmMemoryOpIndexes;
645
unsigned NumOps = 0;
646
for (unsigned I = InlineAsm::MIOp_FirstOperand;
647
I < UseMI.getNumOperands(); I += 1 + NumOps) {
648
const MachineOperand &FlagsMO = UseMI.getOperand(I);
649
// Should be an imm.
650
if (!FlagsMO.isImm())
651
continue;
652
653
const InlineAsm::Flag Flags(FlagsMO.getImm());
654
NumOps = Flags.getNumOperandRegisters();
655
656
// Memory constraints have two operands.
657
if (NumOps != 2 || !Flags.isMemKind()) {
658
// If the register is used by something other than a memory contraint,
659
// we should not fold.
660
for (unsigned J = 0; J < NumOps; ++J) {
661
const MachineOperand &MO = UseMI.getOperand(I + 1 + J);
662
if (MO.isReg() && MO.getReg() == DestReg)
663
return false;
664
}
665
continue;
666
}
667
668
// We can only do this for constraint m.
669
if (Flags.getMemoryConstraintID() != InlineAsm::ConstraintCode::m)
670
return false;
671
672
const MachineOperand &AddrMO = UseMI.getOperand(I + 1);
673
if (!AddrMO.isReg() || AddrMO.getReg() != DestReg)
674
continue;
675
676
const MachineOperand &OffsetMO = UseMI.getOperand(I + 2);
677
if (!OffsetMO.isImm())
678
continue;
679
680
// All inline asm memory operands must use the same offset.
681
int64_t Offset = OffsetMO.getImm();
682
if (CommonOffset && Offset != CommonOffset)
683
return false;
684
CommonOffset = Offset;
685
InlineAsmMemoryOpIndexes.push_back(I + 1);
686
}
687
InlineAsmMemoryOpIndexesMap.insert(
688
std::make_pair(&UseMI, InlineAsmMemoryOpIndexes));
689
break;
690
}
691
}
692
}
693
694
// We found a common offset.
695
// Update the offsets in global address lowering.
696
// We may have already folded some arithmetic so we need to add to any
697
// existing offset.
698
int64_t NewOffset = Hi20.getOperand(1).getOffset() + *CommonOffset;
699
// LA32 ignores the upper 32 bits.
700
if (!ST->is64Bit())
701
NewOffset = SignExtend64<32>(NewOffset);
702
// We can only fold simm32 offsets.
703
if (!isInt<32>(NewOffset))
704
return false;
705
706
// If optimized by this pass successfully, MO_RELAX bitmask target-flag should
707
// be removed from the pcala code sequence. Code sequence of tls-le can still
708
// be relaxed after being optimized.
709
//
710
// For example:
711
// pcalau12i $a0, %pc_hi20(symbol)
712
// addi.d $a0, $a0, %pc_lo12(symbol)
713
// ld.w $a0, $a0, 0
714
//
715
// =>
716
//
717
// pcalau12i $a0, %pc_hi20(symbol)
718
// ld.w $a0, $a0, %pc_lo12(symbol)
719
//
720
// Code sequence optimized before can be relax by linker. But after being
721
// optimized, it cannot be relaxed any more. So MO_RELAX flag should not be
722
// carried by them.
723
Hi20.getOperand(1).setOffset(NewOffset);
724
MachineOperand &ImmOp = Lo12.getOperand(2);
725
ImmOp.setOffset(NewOffset);
726
if (Lo20 && Hi12) {
727
Lo20->getOperand(2).setOffset(NewOffset);
728
Hi12->getOperand(2).setOffset(NewOffset);
729
}
730
if (Hi20.getOpcode() == LoongArch::PCALAU12I) {
731
Hi20.getOperand(1).setTargetFlags(
732
LoongArchII::getDirectFlags(Hi20.getOperand(1)));
733
ImmOp.setTargetFlags(LoongArchII::getDirectFlags(ImmOp));
734
} else if (Hi20.getOpcode() == LoongArch::LU12I_W) {
735
MachineInstr *Add = &*MRI->use_instr_begin(Hi20.getOperand(0).getReg());
736
Add->getOperand(3).setOffset(NewOffset);
737
}
738
739
// Update the immediate in the load/store instructions to add the offset.
740
const LoongArchInstrInfo &TII = *ST->getInstrInfo();
741
for (MachineInstr &UseMI :
742
llvm::make_early_inc_range(MRI->use_instructions(DestReg))) {
743
if (UseMI.getOpcode() == LoongArch::INLINEASM ||
744
UseMI.getOpcode() == LoongArch::INLINEASM_BR) {
745
auto &InlineAsmMemoryOpIndexes = InlineAsmMemoryOpIndexesMap[&UseMI];
746
for (unsigned I : InlineAsmMemoryOpIndexes) {
747
MachineOperand &MO = UseMI.getOperand(I + 1);
748
switch (ImmOp.getType()) {
749
case MachineOperand::MO_GlobalAddress:
750
MO.ChangeToGA(ImmOp.getGlobal(), ImmOp.getOffset(),
751
LoongArchII::getDirectFlags(ImmOp));
752
break;
753
case MachineOperand::MO_MCSymbol:
754
MO.ChangeToMCSymbol(ImmOp.getMCSymbol(),
755
LoongArchII::getDirectFlags(ImmOp));
756
MO.setOffset(ImmOp.getOffset());
757
break;
758
case MachineOperand::MO_BlockAddress:
759
MO.ChangeToBA(ImmOp.getBlockAddress(), ImmOp.getOffset(),
760
LoongArchII::getDirectFlags(ImmOp));
761
break;
762
case MachineOperand::MO_ConstantPoolIndex:
763
MO.ChangeToCPI(ImmOp.getIndex(), ImmOp.getOffset(),
764
LoongArchII::getDirectFlags(ImmOp));
765
break;
766
default:
767
report_fatal_error("unsupported machine operand type");
768
break;
769
}
770
}
771
} else {
772
UseMI.setDesc(TII.get(getNewOpc(UseMI.getOpcode(), Last)));
773
if (Last) {
774
UseMI.removeOperand(2);
775
UseMI.removeOperand(1);
776
UseMI.addOperand(Last->getOperand(1));
777
UseMI.addOperand(Last->getOperand(2));
778
UseMI.getOperand(1).setIsKill(false);
779
UseMI.getOperand(2).setIsKill(false);
780
} else {
781
UseMI.removeOperand(2);
782
UseMI.addOperand(ImmOp);
783
}
784
}
785
}
786
787
if (Last) {
788
Last->eraseFromParent();
789
return true;
790
}
791
792
if (Hi20.getOpcode() == LoongArch::PCALAU12I) {
793
MRI->replaceRegWith(Lo12.getOperand(0).getReg(),
794
Hi20.getOperand(0).getReg());
795
} else if (Hi20.getOpcode() == LoongArch::LU12I_W) {
796
MachineInstr *Add = &*MRI->use_instr_begin(Hi20.getOperand(0).getReg());
797
MRI->replaceRegWith(Lo12.getOperand(0).getReg(),
798
Add->getOperand(0).getReg());
799
}
800
Lo12.eraseFromParent();
801
return true;
802
}
803
804
bool LoongArchMergeBaseOffsetOpt::runOnMachineFunction(MachineFunction &Fn) {
805
if (skipFunction(Fn.getFunction()))
806
return false;
807
808
ST = &Fn.getSubtarget<LoongArchSubtarget>();
809
810
bool MadeChange = false;
811
MRI = &Fn.getRegInfo();
812
for (MachineBasicBlock &MBB : Fn) {
813
LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n");
814
for (MachineInstr &Hi20 : MBB) {
815
MachineInstr *Lo12 = nullptr;
816
MachineInstr *Lo20 = nullptr;
817
MachineInstr *Hi12 = nullptr;
818
MachineInstr *Last = nullptr;
819
if (Hi20.getOpcode() == LoongArch::PCALAU12I) {
820
// Detect foldable pcala code sequence in small/medium/large code model.
821
if (!detectFoldable(Hi20, Lo12, Lo20, Hi12, Last))
822
continue;
823
} else if (Hi20.getOpcode() == LoongArch::LU12I_W) {
824
MachineInstr *Add = nullptr;
825
// Detect foldable tls-le code sequence in small/medium code model.
826
if (!detectFoldable(Hi20, Add, Lo12))
827
continue;
828
} else {
829
continue;
830
}
831
// For tls-le, we do not pass the second PseudoAddTPRel instr in order to
832
// reuse the existing hooks and the last three paramaters should always be
833
// nullptr.
834
MadeChange |= detectAndFoldOffset(Hi20, *Lo12, Lo20, Hi12, Last);
835
MadeChange |= foldIntoMemoryOps(Hi20, *Lo12, Lo20, Hi12, Last);
836
}
837
}
838
839
return MadeChange;
840
}
841
842
/// Returns an instance of the Merge Base Offset Optimization pass.
843
FunctionPass *llvm::createLoongArchMergeBaseOffsetOptPass() {
844
return new LoongArchMergeBaseOffsetOpt();
845
}
846
847