Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
35271 views
1
//===- AMDGPUEmitPrintf.cpp -----------------------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// Utility function to lower a printf call into a series of device
10
// library calls on the AMDGPU target.
11
//
12
// WARNING: This file knows about certain library functions. It recognizes them
13
// by name, and hardwires knowledge of their semantics.
14
//
15
//===----------------------------------------------------------------------===//
16
17
#include "llvm/Transforms/Utils/AMDGPUEmitPrintf.h"
18
#include "llvm/ADT/SparseBitVector.h"
19
#include "llvm/ADT/StringExtras.h"
20
#include "llvm/Analysis/ValueTracking.h"
21
#include "llvm/IR/Module.h"
22
#include "llvm/Support/DataExtractor.h"
23
#include "llvm/Support/MD5.h"
24
#include "llvm/Support/MathExtras.h"
25
26
using namespace llvm;
27
28
#define DEBUG_TYPE "amdgpu-emit-printf"
29
30
static Value *fitArgInto64Bits(IRBuilder<> &Builder, Value *Arg) {
31
auto Int64Ty = Builder.getInt64Ty();
32
auto Ty = Arg->getType();
33
34
if (auto IntTy = dyn_cast<IntegerType>(Ty)) {
35
switch (IntTy->getBitWidth()) {
36
case 32:
37
return Builder.CreateZExt(Arg, Int64Ty);
38
case 64:
39
return Arg;
40
}
41
}
42
43
if (Ty->getTypeID() == Type::DoubleTyID) {
44
return Builder.CreateBitCast(Arg, Int64Ty);
45
}
46
47
if (isa<PointerType>(Ty)) {
48
return Builder.CreatePtrToInt(Arg, Int64Ty);
49
}
50
51
llvm_unreachable("unexpected type");
52
}
53
54
static Value *callPrintfBegin(IRBuilder<> &Builder, Value *Version) {
55
auto Int64Ty = Builder.getInt64Ty();
56
auto M = Builder.GetInsertBlock()->getModule();
57
auto Fn = M->getOrInsertFunction("__ockl_printf_begin", Int64Ty, Int64Ty);
58
return Builder.CreateCall(Fn, Version);
59
}
60
61
static Value *callAppendArgs(IRBuilder<> &Builder, Value *Desc, int NumArgs,
62
Value *Arg0, Value *Arg1, Value *Arg2, Value *Arg3,
63
Value *Arg4, Value *Arg5, Value *Arg6,
64
bool IsLast) {
65
auto Int64Ty = Builder.getInt64Ty();
66
auto Int32Ty = Builder.getInt32Ty();
67
auto M = Builder.GetInsertBlock()->getModule();
68
auto Fn = M->getOrInsertFunction("__ockl_printf_append_args", Int64Ty,
69
Int64Ty, Int32Ty, Int64Ty, Int64Ty, Int64Ty,
70
Int64Ty, Int64Ty, Int64Ty, Int64Ty, Int32Ty);
71
auto IsLastValue = Builder.getInt32(IsLast);
72
auto NumArgsValue = Builder.getInt32(NumArgs);
73
return Builder.CreateCall(Fn, {Desc, NumArgsValue, Arg0, Arg1, Arg2, Arg3,
74
Arg4, Arg5, Arg6, IsLastValue});
75
}
76
77
static Value *appendArg(IRBuilder<> &Builder, Value *Desc, Value *Arg,
78
bool IsLast) {
79
auto Arg0 = fitArgInto64Bits(Builder, Arg);
80
auto Zero = Builder.getInt64(0);
81
return callAppendArgs(Builder, Desc, 1, Arg0, Zero, Zero, Zero, Zero, Zero,
82
Zero, IsLast);
83
}
84
85
// The device library does not provide strlen, so we build our own loop
86
// here. While we are at it, we also include the terminating null in the length.
87
static Value *getStrlenWithNull(IRBuilder<> &Builder, Value *Str) {
88
auto *Prev = Builder.GetInsertBlock();
89
Module *M = Prev->getModule();
90
91
auto CharZero = Builder.getInt8(0);
92
auto One = Builder.getInt64(1);
93
auto Zero = Builder.getInt64(0);
94
auto Int64Ty = Builder.getInt64Ty();
95
96
// The length is either zero for a null pointer, or the computed value for an
97
// actual string. We need a join block for a phi that represents the final
98
// value.
99
//
100
// Strictly speaking, the zero does not matter since
101
// __ockl_printf_append_string_n ignores the length if the pointer is null.
102
BasicBlock *Join = nullptr;
103
if (Prev->getTerminator()) {
104
Join = Prev->splitBasicBlock(Builder.GetInsertPoint(),
105
"strlen.join");
106
Prev->getTerminator()->eraseFromParent();
107
} else {
108
Join = BasicBlock::Create(M->getContext(), "strlen.join",
109
Prev->getParent());
110
}
111
BasicBlock *While =
112
BasicBlock::Create(M->getContext(), "strlen.while",
113
Prev->getParent(), Join);
114
BasicBlock *WhileDone = BasicBlock::Create(
115
M->getContext(), "strlen.while.done",
116
Prev->getParent(), Join);
117
118
// Emit an early return for when the pointer is null.
119
Builder.SetInsertPoint(Prev);
120
auto CmpNull =
121
Builder.CreateICmpEQ(Str, Constant::getNullValue(Str->getType()));
122
BranchInst::Create(Join, While, CmpNull, Prev);
123
124
// Entry to the while loop.
125
Builder.SetInsertPoint(While);
126
127
auto PtrPhi = Builder.CreatePHI(Str->getType(), 2);
128
PtrPhi->addIncoming(Str, Prev);
129
auto PtrNext = Builder.CreateGEP(Builder.getInt8Ty(), PtrPhi, One);
130
PtrPhi->addIncoming(PtrNext, While);
131
132
// Condition for the while loop.
133
auto Data = Builder.CreateLoad(Builder.getInt8Ty(), PtrPhi);
134
auto Cmp = Builder.CreateICmpEQ(Data, CharZero);
135
Builder.CreateCondBr(Cmp, WhileDone, While);
136
137
// Add one to the computed length.
138
Builder.SetInsertPoint(WhileDone, WhileDone->begin());
139
auto Begin = Builder.CreatePtrToInt(Str, Int64Ty);
140
auto End = Builder.CreatePtrToInt(PtrPhi, Int64Ty);
141
auto Len = Builder.CreateSub(End, Begin);
142
Len = Builder.CreateAdd(Len, One);
143
144
// Final join.
145
BranchInst::Create(Join, WhileDone);
146
Builder.SetInsertPoint(Join, Join->begin());
147
auto LenPhi = Builder.CreatePHI(Len->getType(), 2);
148
LenPhi->addIncoming(Len, WhileDone);
149
LenPhi->addIncoming(Zero, Prev);
150
151
return LenPhi;
152
}
153
154
static Value *callAppendStringN(IRBuilder<> &Builder, Value *Desc, Value *Str,
155
Value *Length, bool isLast) {
156
auto Int64Ty = Builder.getInt64Ty();
157
auto IsLastInt32 = Builder.getInt32(isLast);
158
auto M = Builder.GetInsertBlock()->getModule();
159
auto Fn = M->getOrInsertFunction("__ockl_printf_append_string_n", Int64Ty,
160
Desc->getType(), Str->getType(),
161
Length->getType(), IsLastInt32->getType());
162
return Builder.CreateCall(Fn, {Desc, Str, Length, IsLastInt32});
163
}
164
165
static Value *appendString(IRBuilder<> &Builder, Value *Desc, Value *Arg,
166
bool IsLast) {
167
auto Length = getStrlenWithNull(Builder, Arg);
168
return callAppendStringN(Builder, Desc, Arg, Length, IsLast);
169
}
170
171
static Value *processArg(IRBuilder<> &Builder, Value *Desc, Value *Arg,
172
bool SpecIsCString, bool IsLast) {
173
if (SpecIsCString && isa<PointerType>(Arg->getType())) {
174
return appendString(Builder, Desc, Arg, IsLast);
175
}
176
// If the format specifies a string but the argument is not, the frontend will
177
// have printed a warning. We just rely on undefined behaviour and send the
178
// argument anyway.
179
return appendArg(Builder, Desc, Arg, IsLast);
180
}
181
182
// Scan the format string to locate all specifiers, and mark the ones that
183
// specify a string, i.e, the "%s" specifier with optional '*' characters.
184
static void locateCStrings(SparseBitVector<8> &BV, StringRef Str) {
185
static const char ConvSpecifiers[] = "diouxXfFeEgGaAcspn";
186
size_t SpecPos = 0;
187
// Skip the first argument, the format string.
188
unsigned ArgIdx = 1;
189
190
while ((SpecPos = Str.find_first_of('%', SpecPos)) != StringRef::npos) {
191
if (Str[SpecPos + 1] == '%') {
192
SpecPos += 2;
193
continue;
194
}
195
auto SpecEnd = Str.find_first_of(ConvSpecifiers, SpecPos);
196
if (SpecEnd == StringRef::npos)
197
return;
198
auto Spec = Str.slice(SpecPos, SpecEnd + 1);
199
ArgIdx += Spec.count('*');
200
if (Str[SpecEnd] == 's') {
201
BV.set(ArgIdx);
202
}
203
SpecPos = SpecEnd + 1;
204
++ArgIdx;
205
}
206
}
207
208
// helper struct to package the string related data
209
struct StringData {
210
StringRef Str;
211
Value *RealSize = nullptr;
212
Value *AlignedSize = nullptr;
213
bool IsConst = true;
214
215
StringData(StringRef ST, Value *RS, Value *AS, bool IC)
216
: Str(ST), RealSize(RS), AlignedSize(AS), IsConst(IC) {}
217
};
218
219
// Calculates frame size required for current printf expansion and allocates
220
// space on printf buffer. Printf frame includes following contents
221
// [ ControlDWord , format string/Hash , Arguments (each aligned to 8 byte) ]
222
static Value *callBufferedPrintfStart(
223
IRBuilder<> &Builder, ArrayRef<Value *> Args, Value *Fmt,
224
bool isConstFmtStr, SparseBitVector<8> &SpecIsCString,
225
SmallVectorImpl<StringData> &StringContents, Value *&ArgSize) {
226
Module *M = Builder.GetInsertBlock()->getModule();
227
Value *NonConstStrLen = nullptr;
228
Value *LenWithNull = nullptr;
229
Value *LenWithNullAligned = nullptr;
230
Value *TempAdd = nullptr;
231
232
// First 4 bytes to be reserved for control dword
233
size_t BufSize = 4;
234
if (isConstFmtStr)
235
// First 8 bytes of MD5 hash
236
BufSize += 8;
237
else {
238
LenWithNull = getStrlenWithNull(Builder, Fmt);
239
240
// Align the computed length to next 8 byte boundary
241
TempAdd = Builder.CreateAdd(LenWithNull,
242
ConstantInt::get(LenWithNull->getType(), 7U));
243
NonConstStrLen = Builder.CreateAnd(
244
TempAdd, ConstantInt::get(LenWithNull->getType(), ~7U));
245
246
StringContents.push_back(
247
StringData(StringRef(), LenWithNull, NonConstStrLen, false));
248
}
249
250
for (size_t i = 1; i < Args.size(); i++) {
251
if (SpecIsCString.test(i)) {
252
StringRef ArgStr;
253
if (getConstantStringInfo(Args[i], ArgStr)) {
254
auto alignedLen = alignTo(ArgStr.size() + 1, 8);
255
StringContents.push_back(StringData(
256
ArgStr,
257
/*RealSize*/ nullptr, /*AlignedSize*/ nullptr, /*IsConst*/ true));
258
BufSize += alignedLen;
259
} else {
260
LenWithNull = getStrlenWithNull(Builder, Args[i]);
261
262
// Align the computed length to next 8 byte boundary
263
TempAdd = Builder.CreateAdd(
264
LenWithNull, ConstantInt::get(LenWithNull->getType(), 7U));
265
LenWithNullAligned = Builder.CreateAnd(
266
TempAdd, ConstantInt::get(LenWithNull->getType(), ~7U));
267
268
if (NonConstStrLen) {
269
auto Val = Builder.CreateAdd(LenWithNullAligned, NonConstStrLen,
270
"cumulativeAdd");
271
NonConstStrLen = Val;
272
} else
273
NonConstStrLen = LenWithNullAligned;
274
275
StringContents.push_back(
276
StringData(StringRef(), LenWithNull, LenWithNullAligned, false));
277
}
278
} else {
279
int AllocSize = M->getDataLayout().getTypeAllocSize(Args[i]->getType());
280
// We end up expanding non string arguments to 8 bytes
281
// (args smaller than 8 bytes)
282
BufSize += std::max(AllocSize, 8);
283
}
284
}
285
286
// calculate final size value to be passed to printf_alloc
287
Value *SizeToReserve = ConstantInt::get(Builder.getInt64Ty(), BufSize, false);
288
SmallVector<Value *, 1> Alloc_args;
289
if (NonConstStrLen)
290
SizeToReserve = Builder.CreateAdd(NonConstStrLen, SizeToReserve);
291
292
ArgSize = Builder.CreateTrunc(SizeToReserve, Builder.getInt32Ty());
293
Alloc_args.push_back(ArgSize);
294
295
// call the printf_alloc function
296
AttributeList Attr = AttributeList::get(
297
Builder.getContext(), AttributeList::FunctionIndex, Attribute::NoUnwind);
298
299
Type *Tys_alloc[1] = {Builder.getInt32Ty()};
300
Type *PtrTy =
301
Builder.getPtrTy(M->getDataLayout().getDefaultGlobalsAddressSpace());
302
FunctionType *FTy_alloc = FunctionType::get(PtrTy, Tys_alloc, false);
303
auto PrintfAllocFn =
304
M->getOrInsertFunction(StringRef("__printf_alloc"), FTy_alloc, Attr);
305
306
return Builder.CreateCall(PrintfAllocFn, Alloc_args, "printf_alloc_fn");
307
}
308
309
// Prepare constant string argument to push onto the buffer
310
static void processConstantStringArg(StringData *SD, IRBuilder<> &Builder,
311
SmallVectorImpl<Value *> &WhatToStore) {
312
std::string Str(SD->Str.str() + '\0');
313
314
DataExtractor Extractor(Str, /*IsLittleEndian=*/true, 8);
315
DataExtractor::Cursor Offset(0);
316
while (Offset && Offset.tell() < Str.size()) {
317
const uint64_t ReadSize = 4;
318
uint64_t ReadNow = std::min(ReadSize, Str.size() - Offset.tell());
319
uint64_t ReadBytes = 0;
320
switch (ReadNow) {
321
default:
322
llvm_unreachable("min(4, X) > 4?");
323
case 1:
324
ReadBytes = Extractor.getU8(Offset);
325
break;
326
case 2:
327
ReadBytes = Extractor.getU16(Offset);
328
break;
329
case 3:
330
ReadBytes = Extractor.getU24(Offset);
331
break;
332
case 4:
333
ReadBytes = Extractor.getU32(Offset);
334
break;
335
}
336
cantFail(Offset.takeError(), "failed to read bytes from constant array");
337
338
APInt IntVal(8 * ReadSize, ReadBytes);
339
340
// TODO: Should not bother aligning up.
341
if (ReadNow < ReadSize)
342
IntVal = IntVal.zext(8 * ReadSize);
343
344
Type *IntTy = Type::getIntNTy(Builder.getContext(), IntVal.getBitWidth());
345
WhatToStore.push_back(ConstantInt::get(IntTy, IntVal));
346
}
347
// Additional padding for 8 byte alignment
348
int Rem = (Str.size() % 8);
349
if (Rem > 0 && Rem <= 4)
350
WhatToStore.push_back(ConstantInt::get(Builder.getInt32Ty(), 0));
351
}
352
353
static Value *processNonStringArg(Value *Arg, IRBuilder<> &Builder) {
354
const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout();
355
auto Ty = Arg->getType();
356
357
if (auto IntTy = dyn_cast<IntegerType>(Ty)) {
358
if (IntTy->getBitWidth() < 64) {
359
return Builder.CreateZExt(Arg, Builder.getInt64Ty());
360
}
361
}
362
363
if (Ty->isFloatingPointTy()) {
364
if (DL.getTypeAllocSize(Ty) < 8) {
365
return Builder.CreateFPExt(Arg, Builder.getDoubleTy());
366
}
367
}
368
369
return Arg;
370
}
371
372
static void
373
callBufferedPrintfArgPush(IRBuilder<> &Builder, ArrayRef<Value *> Args,
374
Value *PtrToStore, SparseBitVector<8> &SpecIsCString,
375
SmallVectorImpl<StringData> &StringContents,
376
bool IsConstFmtStr) {
377
Module *M = Builder.GetInsertBlock()->getModule();
378
const DataLayout &DL = M->getDataLayout();
379
auto StrIt = StringContents.begin();
380
size_t i = IsConstFmtStr ? 1 : 0;
381
for (; i < Args.size(); i++) {
382
SmallVector<Value *, 32> WhatToStore;
383
if ((i == 0) || SpecIsCString.test(i)) {
384
if (StrIt->IsConst) {
385
processConstantStringArg(StrIt, Builder, WhatToStore);
386
StrIt++;
387
} else {
388
// This copies the contents of the string, however the next offset
389
// is at aligned length, the extra space that might be created due
390
// to alignment padding is not populated with any specific value
391
// here. This would be safe as long as runtime is sync with
392
// the offsets.
393
Builder.CreateMemCpy(PtrToStore, /*DstAlign*/ Align(1), Args[i],
394
/*SrcAlign*/ Args[i]->getPointerAlignment(DL),
395
StrIt->RealSize);
396
397
PtrToStore =
398
Builder.CreateInBoundsGEP(Builder.getInt8Ty(), PtrToStore,
399
{StrIt->AlignedSize}, "PrintBuffNextPtr");
400
LLVM_DEBUG(dbgs() << "inserting gep to the printf buffer:"
401
<< *PtrToStore << '\n');
402
403
// done with current argument, move to next
404
StrIt++;
405
continue;
406
}
407
} else {
408
WhatToStore.push_back(processNonStringArg(Args[i], Builder));
409
}
410
411
for (Value *toStore : WhatToStore) {
412
StoreInst *StBuff = Builder.CreateStore(toStore, PtrToStore);
413
LLVM_DEBUG(dbgs() << "inserting store to printf buffer:" << *StBuff
414
<< '\n');
415
(void)StBuff;
416
PtrToStore = Builder.CreateConstInBoundsGEP1_32(
417
Builder.getInt8Ty(), PtrToStore,
418
M->getDataLayout().getTypeAllocSize(toStore->getType()),
419
"PrintBuffNextPtr");
420
LLVM_DEBUG(dbgs() << "inserting gep to the printf buffer:" << *PtrToStore
421
<< '\n');
422
}
423
}
424
}
425
426
Value *llvm::emitAMDGPUPrintfCall(IRBuilder<> &Builder, ArrayRef<Value *> Args,
427
bool IsBuffered) {
428
auto NumOps = Args.size();
429
assert(NumOps >= 1);
430
431
auto Fmt = Args[0];
432
SparseBitVector<8> SpecIsCString;
433
StringRef FmtStr;
434
435
if (getConstantStringInfo(Fmt, FmtStr))
436
locateCStrings(SpecIsCString, FmtStr);
437
438
if (IsBuffered) {
439
SmallVector<StringData, 8> StringContents;
440
Module *M = Builder.GetInsertBlock()->getModule();
441
LLVMContext &Ctx = Builder.getContext();
442
auto Int8Ty = Builder.getInt8Ty();
443
auto Int32Ty = Builder.getInt32Ty();
444
bool IsConstFmtStr = !FmtStr.empty();
445
446
Value *ArgSize = nullptr;
447
Value *Ptr =
448
callBufferedPrintfStart(Builder, Args, Fmt, IsConstFmtStr,
449
SpecIsCString, StringContents, ArgSize);
450
451
// The buffered version still follows OpenCL printf standards for
452
// printf return value, i.e 0 on success, -1 on failure.
453
ConstantPointerNull *zeroIntPtr =
454
ConstantPointerNull::get(cast<PointerType>(Ptr->getType()));
455
456
auto *Cmp = cast<ICmpInst>(Builder.CreateICmpNE(Ptr, zeroIntPtr, ""));
457
458
BasicBlock *End = BasicBlock::Create(Ctx, "end.block",
459
Builder.GetInsertBlock()->getParent());
460
BasicBlock *ArgPush = BasicBlock::Create(
461
Ctx, "argpush.block", Builder.GetInsertBlock()->getParent());
462
463
BranchInst::Create(ArgPush, End, Cmp, Builder.GetInsertBlock());
464
Builder.SetInsertPoint(ArgPush);
465
466
// Create controlDWord and store as the first entry, format as follows
467
// Bit 0 (LSB) -> stream (1 if stderr, 0 if stdout, printf always outputs to
468
// stdout) Bit 1 -> constant format string (1 if constant) Bits 2-31 -> size
469
// of printf data frame
470
auto ConstantTwo = Builder.getInt32(2);
471
auto ControlDWord = Builder.CreateShl(ArgSize, ConstantTwo);
472
if (IsConstFmtStr)
473
ControlDWord = Builder.CreateOr(ControlDWord, ConstantTwo);
474
475
Builder.CreateStore(ControlDWord, Ptr);
476
477
Ptr = Builder.CreateConstInBoundsGEP1_32(Int8Ty, Ptr, 4);
478
479
// Create MD5 hash for costant format string, push low 64 bits of the
480
// same onto buffer and metadata.
481
NamedMDNode *metaD = M->getOrInsertNamedMetadata("llvm.printf.fmts");
482
if (IsConstFmtStr) {
483
MD5 Hasher;
484
MD5::MD5Result Hash;
485
Hasher.update(FmtStr);
486
Hasher.final(Hash);
487
488
// Try sticking to llvm.printf.fmts format, although we are not going to
489
// use the ID and argument size fields while printing,
490
std::string MetadataStr =
491
"0:0:" + llvm::utohexstr(Hash.low(), /*LowerCase=*/true) + "," +
492
FmtStr.str();
493
MDString *fmtStrArray = MDString::get(Ctx, MetadataStr);
494
MDNode *myMD = MDNode::get(Ctx, fmtStrArray);
495
metaD->addOperand(myMD);
496
497
Builder.CreateStore(Builder.getInt64(Hash.low()), Ptr);
498
Ptr = Builder.CreateConstInBoundsGEP1_32(Int8Ty, Ptr, 8);
499
} else {
500
// Include a dummy metadata instance in case of only non constant
501
// format string usage, This might be an absurd usecase but needs to
502
// be done for completeness
503
if (metaD->getNumOperands() == 0) {
504
MDString *fmtStrArray =
505
MDString::get(Ctx, "0:0:ffffffff,\"Non const format string\"");
506
MDNode *myMD = MDNode::get(Ctx, fmtStrArray);
507
metaD->addOperand(myMD);
508
}
509
}
510
511
// Push The printf arguments onto buffer
512
callBufferedPrintfArgPush(Builder, Args, Ptr, SpecIsCString, StringContents,
513
IsConstFmtStr);
514
515
// End block, returns -1 on failure
516
BranchInst::Create(End, ArgPush);
517
Builder.SetInsertPoint(End);
518
return Builder.CreateSExt(Builder.CreateNot(Cmp), Int32Ty, "printf_result");
519
}
520
521
auto Desc = callPrintfBegin(Builder, Builder.getIntN(64, 0));
522
Desc = appendString(Builder, Desc, Fmt, NumOps == 1);
523
524
// FIXME: This invokes hostcall once for each argument. We can pack up to
525
// seven scalar printf arguments in a single hostcall. See the signature of
526
// callAppendArgs().
527
for (unsigned int i = 1; i != NumOps; ++i) {
528
bool IsLast = i == NumOps - 1;
529
bool IsCString = SpecIsCString.test(i);
530
Desc = processArg(Builder, Desc, Args[i], IsCString, IsLast);
531
}
532
533
return Builder.CreateTrunc(Desc, Builder.getInt32Ty());
534
}
535
536