Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/intel/common/tests/mi_builder_test.cpp
4547 views
1
/*
2
* Copyright © 2019 Intel Corporation
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
* IN THE SOFTWARE.
22
*/
23
24
#include <fcntl.h>
25
#include <string.h>
26
#include <xf86drm.h>
27
28
#include <gtest/gtest.h>
29
30
#include "dev/intel_device_info.h"
31
#include "drm-uapi/i915_drm.h"
32
#include "genxml/gen_macros.h"
33
#include "util/macros.h"
34
35
class mi_builder_test;
36
37
struct address {
38
uint32_t gem_handle;
39
uint32_t offset;
40
};
41
42
#define __gen_address_type struct address
43
#define __gen_user_data ::mi_builder_test
44
45
uint64_t __gen_combine_address(mi_builder_test *test, void *location,
46
struct address addr, uint32_t delta);
47
void * __gen_get_batch_dwords(mi_builder_test *test, unsigned num_dwords);
48
struct address __gen_get_batch_address(mi_builder_test *test,
49
void *location);
50
51
struct address
52
__gen_address_offset(address addr, uint64_t offset)
53
{
54
addr.offset += offset;
55
return addr;
56
}
57
58
#if GFX_VERx10 >= 75
59
#define RSVD_TEMP_REG 0x2678 /* MI_ALU_REG15 */
60
#else
61
#define RSVD_TEMP_REG 0x2430 /* GFX7_3DPRIM_START_VERTEX */
62
#endif
63
#define MI_BUILDER_NUM_ALLOC_GPRS 15
64
#define INPUT_DATA_OFFSET 0
65
#define OUTPUT_DATA_OFFSET 2048
66
67
#define __genxml_cmd_length(cmd) cmd ## _length
68
#define __genxml_cmd_length_bias(cmd) cmd ## _length_bias
69
#define __genxml_cmd_header(cmd) cmd ## _header
70
#define __genxml_cmd_pack(cmd) cmd ## _pack
71
72
#include "genxml/genX_pack.h"
73
#include "mi_builder.h"
74
75
#define emit_cmd(cmd, name) \
76
for (struct cmd name = { __genxml_cmd_header(cmd) }, \
77
*_dst = (struct cmd *) emit_dwords(__genxml_cmd_length(cmd)); \
78
__builtin_expect(_dst != NULL, 1); \
79
__genxml_cmd_pack(cmd)(this, (void *)_dst, &name), _dst = NULL)
80
81
#include <vector>
82
83
class mi_builder_test : public ::testing::Test {
84
public:
85
mi_builder_test();
86
~mi_builder_test();
87
88
void SetUp();
89
90
void *emit_dwords(int num_dwords);
91
void submit_batch();
92
93
inline address in_addr(uint32_t offset)
94
{
95
address addr;
96
addr.gem_handle = data_bo_handle;
97
addr.offset = INPUT_DATA_OFFSET + offset;
98
return addr;
99
}
100
101
inline address out_addr(uint32_t offset)
102
{
103
address addr;
104
addr.gem_handle = data_bo_handle;
105
addr.offset = OUTPUT_DATA_OFFSET + offset;
106
return addr;
107
}
108
109
inline mi_value in_mem64(uint32_t offset)
110
{
111
return mi_mem64(in_addr(offset));
112
}
113
114
inline mi_value in_mem32(uint32_t offset)
115
{
116
return mi_mem32(in_addr(offset));
117
}
118
119
inline mi_value out_mem64(uint32_t offset)
120
{
121
return mi_mem64(out_addr(offset));
122
}
123
124
inline mi_value out_mem32(uint32_t offset)
125
{
126
return mi_mem32(out_addr(offset));
127
}
128
129
int fd;
130
int ctx_id;
131
intel_device_info devinfo;
132
133
uint32_t batch_bo_handle;
134
#if GFX_VER >= 8
135
uint64_t batch_bo_addr;
136
#endif
137
uint32_t batch_offset;
138
void *batch_map;
139
140
#if GFX_VER < 8
141
std::vector<drm_i915_gem_relocation_entry> relocs;
142
#endif
143
144
uint32_t data_bo_handle;
145
#if GFX_VER >= 8
146
uint64_t data_bo_addr;
147
#endif
148
void *data_map;
149
char *input;
150
char *output;
151
uint64_t canary;
152
153
mi_builder b;
154
};
155
156
mi_builder_test::mi_builder_test() :
157
fd(-1)
158
{ }
159
160
mi_builder_test::~mi_builder_test()
161
{
162
close(fd);
163
}
164
165
// 1 MB of batch should be enough for anyone, right?
166
#define BATCH_BO_SIZE (256 * 4096)
167
#define DATA_BO_SIZE 4096
168
169
void
170
mi_builder_test::SetUp()
171
{
172
drmDevicePtr devices[8];
173
int max_devices = drmGetDevices2(0, devices, 8);
174
175
int i;
176
for (i = 0; i < max_devices; i++) {
177
if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
178
devices[i]->bustype == DRM_BUS_PCI &&
179
devices[i]->deviceinfo.pci->vendor_id == 0x8086) {
180
fd = open(devices[i]->nodes[DRM_NODE_RENDER], O_RDWR | O_CLOEXEC);
181
if (fd < 0)
182
continue;
183
184
/* We don't really need to do this when running on hardware because
185
* we can just pull it from the drmDevice. However, without doing
186
* this, intel_dump_gpu gets a bit of heartburn and we can't use the
187
* --device option with it.
188
*/
189
int device_id;
190
drm_i915_getparam getparam = drm_i915_getparam();
191
getparam.param = I915_PARAM_CHIPSET_ID;
192
getparam.value = &device_id;
193
ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GETPARAM,
194
(void *)&getparam), 0) << strerror(errno);
195
196
ASSERT_TRUE(intel_get_device_info_from_pci_id(device_id, &devinfo));
197
if (devinfo.ver != GFX_VER || devinfo.is_haswell != (GFX_VERx10 == 75)) {
198
close(fd);
199
fd = -1;
200
continue;
201
}
202
203
204
/* Found a device! */
205
break;
206
}
207
}
208
ASSERT_TRUE(i < max_devices) << "Failed to find a DRM device";
209
210
drm_i915_gem_context_create ctx_create = drm_i915_gem_context_create();
211
ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE,
212
(void *)&ctx_create), 0) << strerror(errno);
213
ctx_id = ctx_create.ctx_id;
214
215
if (GFX_VER >= 8) {
216
/* On gfx8+, we require softpin */
217
int has_softpin;
218
drm_i915_getparam getparam = drm_i915_getparam();
219
getparam.param = I915_PARAM_HAS_EXEC_SOFTPIN;
220
getparam.value = &has_softpin;
221
ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GETPARAM,
222
(void *)&getparam), 0) << strerror(errno);
223
ASSERT_TRUE(has_softpin);
224
}
225
226
// Create the batch buffer
227
drm_i915_gem_create gem_create = drm_i915_gem_create();
228
gem_create.size = BATCH_BO_SIZE;
229
ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE,
230
(void *)&gem_create), 0) << strerror(errno);
231
batch_bo_handle = gem_create.handle;
232
#if GFX_VER >= 8
233
batch_bo_addr = 0xffffffffdff70000ULL;
234
#endif
235
236
drm_i915_gem_caching gem_caching = drm_i915_gem_caching();
237
gem_caching.handle = batch_bo_handle;
238
gem_caching.caching = I915_CACHING_CACHED;
239
ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_SET_CACHING,
240
(void *)&gem_caching), 0) << strerror(errno);
241
242
drm_i915_gem_mmap gem_mmap = drm_i915_gem_mmap();
243
gem_mmap.handle = batch_bo_handle;
244
gem_mmap.offset = 0;
245
gem_mmap.size = BATCH_BO_SIZE;
246
gem_mmap.flags = 0;
247
ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_MMAP,
248
(void *)&gem_mmap), 0) << strerror(errno);
249
batch_map = (void *)(uintptr_t)gem_mmap.addr_ptr;
250
251
// Start the batch at zero
252
batch_offset = 0;
253
254
// Create the data buffer
255
gem_create = drm_i915_gem_create();
256
gem_create.size = DATA_BO_SIZE;
257
ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE,
258
(void *)&gem_create), 0) << strerror(errno);
259
data_bo_handle = gem_create.handle;
260
#if GFX_VER >= 8
261
data_bo_addr = 0xffffffffefff0000ULL;
262
#endif
263
264
gem_caching = drm_i915_gem_caching();
265
gem_caching.handle = data_bo_handle;
266
gem_caching.caching = I915_CACHING_CACHED;
267
ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_SET_CACHING,
268
(void *)&gem_caching), 0) << strerror(errno);
269
270
gem_mmap = drm_i915_gem_mmap();
271
gem_mmap.handle = data_bo_handle;
272
gem_mmap.offset = 0;
273
gem_mmap.size = DATA_BO_SIZE;
274
gem_mmap.flags = 0;
275
ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_MMAP,
276
(void *)&gem_mmap), 0) << strerror(errno);
277
data_map = (void *)(uintptr_t)gem_mmap.addr_ptr;
278
input = (char *)data_map + INPUT_DATA_OFFSET;
279
output = (char *)data_map + OUTPUT_DATA_OFFSET;
280
281
// Fill the test data with garbage
282
memset(data_map, 139, DATA_BO_SIZE);
283
memset(&canary, 139, sizeof(canary));
284
285
mi_builder_init(&b, &devinfo, this);
286
}
287
288
void *
289
mi_builder_test::emit_dwords(int num_dwords)
290
{
291
void *ptr = (void *)((char *)batch_map + batch_offset);
292
batch_offset += num_dwords * 4;
293
assert(batch_offset < BATCH_BO_SIZE);
294
return ptr;
295
}
296
297
void
298
mi_builder_test::submit_batch()
299
{
300
mi_builder_emit(&b, GENX(MI_BATCH_BUFFER_END), bbe);
301
302
// Round batch up to an even number of dwords.
303
if (batch_offset & 4)
304
mi_builder_emit(&b, GENX(MI_NOOP), noop);
305
306
drm_i915_gem_exec_object2 objects[2];
307
memset(objects, 0, sizeof(objects));
308
309
objects[0].handle = data_bo_handle;
310
objects[0].relocation_count = 0;
311
objects[0].relocs_ptr = 0;
312
#if GFX_VER >= 8 /* On gfx8+, we pin everything */
313
objects[0].flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS |
314
EXEC_OBJECT_PINNED |
315
EXEC_OBJECT_WRITE;
316
objects[0].offset = data_bo_addr;
317
#else
318
objects[0].flags = EXEC_OBJECT_WRITE;
319
objects[0].offset = -1;
320
#endif
321
322
objects[1].handle = batch_bo_handle;
323
#if GFX_VER >= 8 /* On gfx8+, we don't use relocations */
324
objects[1].relocation_count = 0;
325
objects[1].relocs_ptr = 0;
326
objects[1].flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS |
327
EXEC_OBJECT_PINNED;
328
objects[1].offset = batch_bo_addr;
329
#else
330
objects[1].relocation_count = relocs.size();
331
objects[1].relocs_ptr = (uintptr_t)(void *)&relocs[0];
332
objects[1].flags = 0;
333
objects[1].offset = -1;
334
#endif
335
336
drm_i915_gem_execbuffer2 execbuf = drm_i915_gem_execbuffer2();
337
execbuf.buffers_ptr = (uintptr_t)(void *)objects;
338
execbuf.buffer_count = 2;
339
execbuf.batch_start_offset = 0;
340
execbuf.batch_len = batch_offset;
341
execbuf.flags = I915_EXEC_HANDLE_LUT | I915_EXEC_RENDER;
342
execbuf.rsvd1 = ctx_id;
343
344
ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2,
345
(void *)&execbuf), 0) << strerror(errno);
346
347
drm_i915_gem_wait gem_wait = drm_i915_gem_wait();
348
gem_wait.bo_handle = batch_bo_handle;
349
gem_wait.timeout_ns = INT64_MAX;
350
ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_WAIT,
351
(void *)&gem_wait), 0) << strerror(errno);
352
}
353
354
uint64_t
355
__gen_combine_address(mi_builder_test *test, void *location,
356
address addr, uint32_t delta)
357
{
358
#if GFX_VER >= 8
359
uint64_t addr_u64 = addr.gem_handle == test->data_bo_handle ?
360
test->data_bo_addr : test->batch_bo_addr;
361
return addr_u64 + addr.offset + delta;
362
#else
363
drm_i915_gem_relocation_entry reloc = drm_i915_gem_relocation_entry();
364
reloc.target_handle = addr.gem_handle == test->data_bo_handle ? 0 : 1;
365
reloc.delta = addr.offset + delta;
366
reloc.offset = (char *)location - (char *)test->batch_map;
367
reloc.presumed_offset = -1;
368
test->relocs.push_back(reloc);
369
370
return reloc.delta;
371
#endif
372
}
373
374
void *
375
__gen_get_batch_dwords(mi_builder_test *test, unsigned num_dwords)
376
{
377
return test->emit_dwords(num_dwords);
378
}
379
380
struct address
381
__gen_get_batch_address(mi_builder_test *test, void *location)
382
{
383
assert(location >= test->batch_map);
384
size_t offset = (char *)location - (char *)test->batch_map;
385
assert(offset < BATCH_BO_SIZE);
386
assert(offset <= UINT32_MAX);
387
388
return (struct address) {
389
.gem_handle = test->batch_bo_handle,
390
.offset = (uint32_t)offset,
391
};
392
}
393
394
#include "genxml/genX_pack.h"
395
#include "mi_builder.h"
396
397
TEST_F(mi_builder_test, imm_mem)
398
{
399
const uint64_t value = 0x0123456789abcdef;
400
401
mi_store(&b, out_mem64(0), mi_imm(value));
402
mi_store(&b, out_mem32(8), mi_imm(value));
403
404
submit_batch();
405
406
// 64 -> 64
407
EXPECT_EQ(*(uint64_t *)(output + 0), value);
408
409
// 64 -> 32
410
EXPECT_EQ(*(uint32_t *)(output + 8), (uint32_t)value);
411
EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
412
}
413
414
/* mem -> mem copies are only supported on HSW+ */
415
#if GFX_VERx10 >= 75
416
TEST_F(mi_builder_test, mem_mem)
417
{
418
const uint64_t value = 0x0123456789abcdef;
419
*(uint64_t *)input = value;
420
421
mi_store(&b, out_mem64(0), in_mem64(0));
422
mi_store(&b, out_mem32(8), in_mem64(0));
423
mi_store(&b, out_mem32(16), in_mem32(0));
424
mi_store(&b, out_mem64(24), in_mem32(0));
425
426
submit_batch();
427
428
// 64 -> 64
429
EXPECT_EQ(*(uint64_t *)(output + 0), value);
430
431
// 64 -> 32
432
EXPECT_EQ(*(uint32_t *)(output + 8), (uint32_t)value);
433
EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
434
435
// 32 -> 32
436
EXPECT_EQ(*(uint32_t *)(output + 16), (uint32_t)value);
437
EXPECT_EQ(*(uint32_t *)(output + 20), (uint32_t)canary);
438
439
// 32 -> 64
440
EXPECT_EQ(*(uint64_t *)(output + 24), (uint64_t)(uint32_t)value);
441
}
442
#endif
443
444
TEST_F(mi_builder_test, imm_reg)
445
{
446
const uint64_t value = 0x0123456789abcdef;
447
448
mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
449
mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(value));
450
mi_store(&b, out_mem64(0), mi_reg64(RSVD_TEMP_REG));
451
452
mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
453
mi_store(&b, mi_reg32(RSVD_TEMP_REG), mi_imm(value));
454
mi_store(&b, out_mem64(8), mi_reg64(RSVD_TEMP_REG));
455
456
submit_batch();
457
458
// 64 -> 64
459
EXPECT_EQ(*(uint64_t *)(output + 0), value);
460
461
// 64 -> 32
462
EXPECT_EQ(*(uint32_t *)(output + 8), (uint32_t)value);
463
EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
464
}
465
466
TEST_F(mi_builder_test, mem_reg)
467
{
468
const uint64_t value = 0x0123456789abcdef;
469
*(uint64_t *)input = value;
470
471
mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
472
mi_store(&b, mi_reg64(RSVD_TEMP_REG), in_mem64(0));
473
mi_store(&b, out_mem64(0), mi_reg64(RSVD_TEMP_REG));
474
475
mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
476
mi_store(&b, mi_reg32(RSVD_TEMP_REG), in_mem64(0));
477
mi_store(&b, out_mem64(8), mi_reg64(RSVD_TEMP_REG));
478
479
mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
480
mi_store(&b, mi_reg32(RSVD_TEMP_REG), in_mem32(0));
481
mi_store(&b, out_mem64(16), mi_reg64(RSVD_TEMP_REG));
482
483
mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
484
mi_store(&b, mi_reg64(RSVD_TEMP_REG), in_mem32(0));
485
mi_store(&b, out_mem64(24), mi_reg64(RSVD_TEMP_REG));
486
487
submit_batch();
488
489
// 64 -> 64
490
EXPECT_EQ(*(uint64_t *)(output + 0), value);
491
492
// 64 -> 32
493
EXPECT_EQ(*(uint32_t *)(output + 8), (uint32_t)value);
494
EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
495
496
// 32 -> 32
497
EXPECT_EQ(*(uint32_t *)(output + 16), (uint32_t)value);
498
EXPECT_EQ(*(uint32_t *)(output + 20), (uint32_t)canary);
499
500
// 32 -> 64
501
EXPECT_EQ(*(uint64_t *)(output + 24), (uint64_t)(uint32_t)value);
502
}
503
504
TEST_F(mi_builder_test, memset)
505
{
506
const unsigned memset_size = 256;
507
508
mi_memset(&b, out_addr(0), 0xdeadbeef, memset_size);
509
510
submit_batch();
511
512
uint32_t *out_u32 = (uint32_t *)output;
513
for (unsigned i = 0; i < memset_size / sizeof(*out_u32); i++)
514
EXPECT_EQ(out_u32[i], 0xdeadbeef);
515
}
516
517
TEST_F(mi_builder_test, memcpy)
518
{
519
const unsigned memcpy_size = 256;
520
521
uint8_t *in_u8 = (uint8_t *)input;
522
for (unsigned i = 0; i < memcpy_size; i++)
523
in_u8[i] = i;
524
525
mi_memcpy(&b, out_addr(0), in_addr(0), 256);
526
527
submit_batch();
528
529
uint8_t *out_u8 = (uint8_t *)output;
530
for (unsigned i = 0; i < memcpy_size; i++)
531
EXPECT_EQ(out_u8[i], i);
532
}
533
534
/* Start of MI_MATH section */
535
#if GFX_VERx10 >= 75
536
537
#define EXPECT_EQ_IMM(x, imm) EXPECT_EQ(x, mi_value_to_u64(imm))
538
539
TEST_F(mi_builder_test, inot)
540
{
541
const uint64_t value = 0x0123456789abcdef;
542
const uint32_t value_lo = (uint32_t)value;
543
const uint32_t value_hi = (uint32_t)(value >> 32);
544
memcpy(input, &value, sizeof(value));
545
546
mi_store(&b, out_mem64(0), mi_inot(&b, in_mem64(0)));
547
mi_store(&b, out_mem64(8), mi_inot(&b, mi_inot(&b, in_mem64(0))));
548
mi_store(&b, out_mem64(16), mi_inot(&b, in_mem32(0)));
549
mi_store(&b, out_mem64(24), mi_inot(&b, in_mem32(4)));
550
mi_store(&b, out_mem32(32), mi_inot(&b, in_mem64(0)));
551
mi_store(&b, out_mem32(36), mi_inot(&b, in_mem32(0)));
552
mi_store(&b, out_mem32(40), mi_inot(&b, mi_inot(&b, in_mem32(0))));
553
mi_store(&b, out_mem32(44), mi_inot(&b, in_mem32(4)));
554
555
submit_batch();
556
557
EXPECT_EQ(*(uint64_t *)(output + 0), ~value);
558
EXPECT_EQ(*(uint64_t *)(output + 8), value);
559
EXPECT_EQ(*(uint64_t *)(output + 16), ~(uint64_t)value_lo);
560
EXPECT_EQ(*(uint64_t *)(output + 24), ~(uint64_t)value_hi);
561
EXPECT_EQ(*(uint32_t *)(output + 32), (uint32_t)~value);
562
EXPECT_EQ(*(uint32_t *)(output + 36), (uint32_t)~value_lo);
563
EXPECT_EQ(*(uint32_t *)(output + 40), (uint32_t)value_lo);
564
EXPECT_EQ(*(uint32_t *)(output + 44), (uint32_t)~value_hi);
565
}
566
567
/* Test adding of immediates of all kinds including
568
*
569
* - All zeroes
570
* - All ones
571
* - inverted constants
572
*/
573
TEST_F(mi_builder_test, add_imm)
574
{
575
const uint64_t value = 0x0123456789abcdef;
576
const uint64_t add = 0xdeadbeefac0ffee2;
577
memcpy(input, &value, sizeof(value));
578
579
mi_store(&b, out_mem64(0),
580
mi_iadd(&b, in_mem64(0), mi_imm(0)));
581
mi_store(&b, out_mem64(8),
582
mi_iadd(&b, in_mem64(0), mi_imm(-1)));
583
mi_store(&b, out_mem64(16),
584
mi_iadd(&b, in_mem64(0), mi_inot(&b, mi_imm(0))));
585
mi_store(&b, out_mem64(24),
586
mi_iadd(&b, in_mem64(0), mi_inot(&b, mi_imm(-1))));
587
mi_store(&b, out_mem64(32),
588
mi_iadd(&b, in_mem64(0), mi_imm(add)));
589
mi_store(&b, out_mem64(40),
590
mi_iadd(&b, in_mem64(0), mi_inot(&b, mi_imm(add))));
591
mi_store(&b, out_mem64(48),
592
mi_iadd(&b, mi_imm(0), in_mem64(0)));
593
mi_store(&b, out_mem64(56),
594
mi_iadd(&b, mi_imm(-1), in_mem64(0)));
595
mi_store(&b, out_mem64(64),
596
mi_iadd(&b, mi_inot(&b, mi_imm(0)), in_mem64(0)));
597
mi_store(&b, out_mem64(72),
598
mi_iadd(&b, mi_inot(&b, mi_imm(-1)), in_mem64(0)));
599
mi_store(&b, out_mem64(80),
600
mi_iadd(&b, mi_imm(add), in_mem64(0)));
601
mi_store(&b, out_mem64(88),
602
mi_iadd(&b, mi_inot(&b, mi_imm(add)), in_mem64(0)));
603
604
// And som add_imm just for good measure
605
mi_store(&b, out_mem64(96), mi_iadd_imm(&b, in_mem64(0), 0));
606
mi_store(&b, out_mem64(104), mi_iadd_imm(&b, in_mem64(0), add));
607
608
submit_batch();
609
610
EXPECT_EQ(*(uint64_t *)(output + 0), value);
611
EXPECT_EQ(*(uint64_t *)(output + 8), value - 1);
612
EXPECT_EQ(*(uint64_t *)(output + 16), value - 1);
613
EXPECT_EQ(*(uint64_t *)(output + 24), value);
614
EXPECT_EQ(*(uint64_t *)(output + 32), value + add);
615
EXPECT_EQ(*(uint64_t *)(output + 40), value + ~add);
616
EXPECT_EQ(*(uint64_t *)(output + 48), value);
617
EXPECT_EQ(*(uint64_t *)(output + 56), value - 1);
618
EXPECT_EQ(*(uint64_t *)(output + 64), value - 1);
619
EXPECT_EQ(*(uint64_t *)(output + 72), value);
620
EXPECT_EQ(*(uint64_t *)(output + 80), value + add);
621
EXPECT_EQ(*(uint64_t *)(output + 88), value + ~add);
622
EXPECT_EQ(*(uint64_t *)(output + 96), value);
623
EXPECT_EQ(*(uint64_t *)(output + 104), value + add);
624
}
625
626
TEST_F(mi_builder_test, ult_uge_ieq_ine)
627
{
628
uint64_t values[8] = {
629
0x0123456789abcdef,
630
0xdeadbeefac0ffee2,
631
(uint64_t)-1,
632
1,
633
0,
634
1049571,
635
(uint64_t)-240058,
636
20204184,
637
};
638
memcpy(input, values, sizeof(values));
639
640
for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
641
for (unsigned j = 0; j < ARRAY_SIZE(values); j++) {
642
mi_store(&b, out_mem64(i * 256 + j * 32 + 0),
643
mi_ult(&b, in_mem64(i * 8), in_mem64(j * 8)));
644
mi_store(&b, out_mem64(i * 256 + j * 32 + 8),
645
mi_uge(&b, in_mem64(i * 8), in_mem64(j * 8)));
646
mi_store(&b, out_mem64(i * 256 + j * 32 + 16),
647
mi_ieq(&b, in_mem64(i * 8), in_mem64(j * 8)));
648
mi_store(&b, out_mem64(i * 256 + j * 32 + 24),
649
mi_ine(&b, in_mem64(i * 8), in_mem64(j * 8)));
650
}
651
}
652
653
submit_batch();
654
655
for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
656
for (unsigned j = 0; j < ARRAY_SIZE(values); j++) {
657
uint64_t *out_u64 = (uint64_t *)(output + i * 256 + j * 32);
658
EXPECT_EQ_IMM(out_u64[0], mi_ult(&b, mi_imm(values[i]),
659
mi_imm(values[j])));
660
EXPECT_EQ_IMM(out_u64[1], mi_uge(&b, mi_imm(values[i]),
661
mi_imm(values[j])));
662
EXPECT_EQ_IMM(out_u64[2], mi_ieq(&b, mi_imm(values[i]),
663
mi_imm(values[j])));
664
EXPECT_EQ_IMM(out_u64[3], mi_ine(&b, mi_imm(values[i]),
665
mi_imm(values[j])));
666
}
667
}
668
}
669
670
TEST_F(mi_builder_test, z_nz)
671
{
672
uint64_t values[8] = {
673
0,
674
1,
675
UINT32_MAX,
676
UINT32_MAX + 1,
677
UINT64_MAX,
678
};
679
memcpy(input, values, sizeof(values));
680
681
for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
682
mi_store(&b, out_mem64(i * 16 + 0), mi_nz(&b, in_mem64(i * 8)));
683
mi_store(&b, out_mem64(i * 16 + 8), mi_z(&b, in_mem64(i * 8)));
684
}
685
686
submit_batch();
687
688
for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
689
uint64_t *out_u64 = (uint64_t *)(output + i * 16);
690
EXPECT_EQ_IMM(out_u64[0], mi_nz(&b, mi_imm(values[i])));
691
EXPECT_EQ_IMM(out_u64[1], mi_z(&b, mi_imm(values[i])));
692
}
693
}
694
695
TEST_F(mi_builder_test, iand)
696
{
697
const uint64_t values[2] = {
698
0x0123456789abcdef,
699
0xdeadbeefac0ffee2,
700
};
701
memcpy(input, values, sizeof(values));
702
703
mi_store(&b, out_mem64(0), mi_iand(&b, in_mem64(0), in_mem64(8)));
704
705
submit_batch();
706
707
EXPECT_EQ_IMM(*(uint64_t *)output, mi_iand(&b, mi_imm(values[0]),
708
mi_imm(values[1])));
709
}
710
711
#if GFX_VERx10 >= 125
712
TEST_F(mi_builder_test, ishl)
713
{
714
const uint64_t value = 0x0123456789abcdef;
715
memcpy(input, &value, sizeof(value));
716
717
uint32_t shifts[] = { 0, 1, 2, 4, 8, 16, 32 };
718
memcpy(input + 8, shifts, sizeof(shifts));
719
720
for (unsigned i = 0; i < ARRAY_SIZE(shifts); i++) {
721
mi_store(&b, out_mem64(i * 8),
722
mi_ishl(&b, in_mem64(0), in_mem32(8 + i * 4)));
723
}
724
725
submit_batch();
726
727
for (unsigned i = 0; i < ARRAY_SIZE(shifts); i++) {
728
EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
729
mi_ishl(&b, mi_imm(value), mi_imm(shifts[i])));
730
}
731
}
732
733
TEST_F(mi_builder_test, ushr)
734
{
735
const uint64_t value = 0x0123456789abcdef;
736
memcpy(input, &value, sizeof(value));
737
738
uint32_t shifts[] = { 0, 1, 2, 4, 8, 16, 32 };
739
memcpy(input + 8, shifts, sizeof(shifts));
740
741
for (unsigned i = 0; i < ARRAY_SIZE(shifts); i++) {
742
mi_store(&b, out_mem64(i * 8),
743
mi_ushr(&b, in_mem64(0), in_mem32(8 + i * 4)));
744
}
745
746
submit_batch();
747
748
for (unsigned i = 0; i < ARRAY_SIZE(shifts); i++) {
749
EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
750
mi_ushr(&b, mi_imm(value), mi_imm(shifts[i])));
751
}
752
}
753
754
TEST_F(mi_builder_test, ushr_imm)
755
{
756
const uint64_t value = 0x0123456789abcdef;
757
memcpy(input, &value, sizeof(value));
758
759
const unsigned max_shift = 64;
760
761
for (unsigned i = 0; i <= max_shift; i++)
762
mi_store(&b, out_mem64(i * 8), mi_ushr_imm(&b, in_mem64(0), i));
763
764
submit_batch();
765
766
for (unsigned i = 0; i <= max_shift; i++) {
767
EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
768
mi_ushr_imm(&b, mi_imm(value), i));
769
}
770
}
771
772
TEST_F(mi_builder_test, ishr)
773
{
774
const uint64_t values[] = {
775
0x0123456789abcdef,
776
0xfedcba9876543210,
777
};
778
memcpy(input, values, sizeof(values));
779
780
uint32_t shifts[] = { 0, 1, 2, 4, 8, 16, 32 };
781
memcpy(input + 16, shifts, sizeof(shifts));
782
783
for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
784
for (unsigned j = 0; j < ARRAY_SIZE(shifts); j++) {
785
mi_store(&b, out_mem64(i * 8 + j * 16),
786
mi_ishr(&b, in_mem64(i * 8), in_mem32(16 + j * 4)));
787
}
788
}
789
790
submit_batch();
791
792
for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
793
for (unsigned j = 0; j < ARRAY_SIZE(shifts); j++) {
794
EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8 + j * 16),
795
mi_ishr(&b, mi_imm(values[i]), mi_imm(shifts[j])));
796
}
797
}
798
}
799
800
TEST_F(mi_builder_test, ishr_imm)
801
{
802
const uint64_t value = 0x0123456789abcdef;
803
memcpy(input, &value, sizeof(value));
804
805
const unsigned max_shift = 64;
806
807
for (unsigned i = 0; i <= max_shift; i++)
808
mi_store(&b, out_mem64(i * 8), mi_ishr_imm(&b, in_mem64(0), i));
809
810
submit_batch();
811
812
for (unsigned i = 0; i <= max_shift; i++) {
813
EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
814
mi_ishr_imm(&b, mi_imm(value), i));
815
}
816
}
817
#endif /* if GFX_VERx10 >= 125 */
818
819
TEST_F(mi_builder_test, imul_imm)
820
{
821
uint64_t lhs[2] = {
822
0x0123456789abcdef,
823
0xdeadbeefac0ffee2,
824
};
825
memcpy(input, lhs, sizeof(lhs));
826
827
/* Some random 32-bit unsigned integers. The first four have been
828
* hand-chosen just to ensure some good low integers; the rest were
829
* generated with a python script.
830
*/
831
uint32_t rhs[20] = {
832
1, 2, 3, 5,
833
10800, 193, 64, 40,
834
3796, 256, 88, 473,
835
1421, 706, 175, 850,
836
39, 38985, 1941, 17,
837
};
838
839
for (unsigned i = 0; i < ARRAY_SIZE(lhs); i++) {
840
for (unsigned j = 0; j < ARRAY_SIZE(rhs); j++) {
841
mi_store(&b, out_mem64(i * 160 + j * 8),
842
mi_imul_imm(&b, in_mem64(i * 8), rhs[j]));
843
}
844
}
845
846
submit_batch();
847
848
for (unsigned i = 0; i < ARRAY_SIZE(lhs); i++) {
849
for (unsigned j = 0; j < ARRAY_SIZE(rhs); j++) {
850
EXPECT_EQ_IMM(*(uint64_t *)(output + i * 160 + j * 8),
851
mi_imul_imm(&b, mi_imm(lhs[i]), rhs[j]));
852
}
853
}
854
}
855
856
TEST_F(mi_builder_test, ishl_imm)
857
{
858
const uint64_t value = 0x0123456789abcdef;
859
memcpy(input, &value, sizeof(value));
860
861
const unsigned max_shift = 64;
862
863
for (unsigned i = 0; i <= max_shift; i++)
864
mi_store(&b, out_mem64(i * 8), mi_ishl_imm(&b, in_mem64(0), i));
865
866
submit_batch();
867
868
for (unsigned i = 0; i <= max_shift; i++) {
869
EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
870
mi_ishl_imm(&b, mi_imm(value), i));
871
}
872
}
873
874
TEST_F(mi_builder_test, ushr32_imm)
875
{
876
const uint64_t value = 0x0123456789abcdef;
877
memcpy(input, &value, sizeof(value));
878
879
const unsigned max_shift = 64;
880
881
for (unsigned i = 0; i <= max_shift; i++)
882
mi_store(&b, out_mem64(i * 8), mi_ushr32_imm(&b, in_mem64(0), i));
883
884
submit_batch();
885
886
for (unsigned i = 0; i <= max_shift; i++) {
887
EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
888
mi_ushr32_imm(&b, mi_imm(value), i));
889
}
890
}
891
892
TEST_F(mi_builder_test, udiv32_imm)
893
{
894
/* Some random 32-bit unsigned integers. The first four have been
895
* hand-chosen just to ensure some good low integers; the rest were
896
* generated with a python script.
897
*/
898
uint32_t values[20] = {
899
1, 2, 3, 5,
900
10800, 193, 64, 40,
901
3796, 256, 88, 473,
902
1421, 706, 175, 850,
903
39, 38985, 1941, 17,
904
};
905
memcpy(input, values, sizeof(values));
906
907
for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
908
for (unsigned j = 0; j < ARRAY_SIZE(values); j++) {
909
mi_store(&b, out_mem32(i * 80 + j * 4),
910
mi_udiv32_imm(&b, in_mem32(i * 4), values[j]));
911
}
912
}
913
914
submit_batch();
915
916
for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
917
for (unsigned j = 0; j < ARRAY_SIZE(values); j++) {
918
EXPECT_EQ_IMM(*(uint32_t *)(output + i * 80 + j * 4),
919
mi_udiv32_imm(&b, mi_imm(values[i]), values[j]));
920
}
921
}
922
}
923
924
TEST_F(mi_builder_test, store_if)
925
{
926
uint64_t u64 = 0xb453b411deadc0deull;
927
uint32_t u32 = 0x1337d00d;
928
929
/* Write values with the predicate enabled */
930
emit_cmd(GENX(MI_PREDICATE), mip) {
931
mip.LoadOperation = LOAD_LOAD;
932
mip.CombineOperation = COMBINE_SET;
933
mip.CompareOperation = COMPARE_TRUE;
934
}
935
936
mi_store_if(&b, out_mem64(0), mi_imm(u64));
937
mi_store_if(&b, out_mem32(8), mi_imm(u32));
938
939
/* Set predicate to false, write garbage that shouldn't land */
940
emit_cmd(GENX(MI_PREDICATE), mip) {
941
mip.LoadOperation = LOAD_LOAD;
942
mip.CombineOperation = COMBINE_SET;
943
mip.CompareOperation = COMPARE_FALSE;
944
}
945
946
mi_store_if(&b, out_mem64(0), mi_imm(0xd0d0d0d0d0d0d0d0ull));
947
mi_store_if(&b, out_mem32(8), mi_imm(0xc000c000));
948
949
submit_batch();
950
951
EXPECT_EQ(*(uint64_t *)(output + 0), u64);
952
EXPECT_EQ(*(uint32_t *)(output + 8), u32);
953
EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
954
}
955
956
#endif /* GFX_VERx10 >= 75 */
957
958
#if GFX_VERx10 >= 125
959
960
/*
961
* Indirect load/store tests. Only available on XE_HP+
962
*/
963
964
TEST_F(mi_builder_test, load_mem64_offset)
965
{
966
uint64_t values[8] = {
967
0x0123456789abcdef,
968
0xdeadbeefac0ffee2,
969
(uint64_t)-1,
970
1,
971
0,
972
1049571,
973
(uint64_t)-240058,
974
20204184,
975
};
976
memcpy(input, values, sizeof(values));
977
978
uint32_t offsets[8] = { 0, 40, 24, 48, 56, 8, 32, 16 };
979
memcpy(input + 64, offsets, sizeof(offsets));
980
981
for (unsigned i = 0; i < ARRAY_SIZE(offsets); i++) {
982
mi_store(&b, out_mem64(i * 8),
983
mi_load_mem64_offset(&b, in_addr(0), in_mem32(i * 4 + 64)));
984
}
985
986
submit_batch();
987
988
for (unsigned i = 0; i < ARRAY_SIZE(offsets); i++)
989
EXPECT_EQ(*(uint64_t *)(output + i * 8), values[offsets[i] / 8]);
990
}
991
992
TEST_F(mi_builder_test, store_mem64_offset)
993
{
994
uint64_t values[8] = {
995
0x0123456789abcdef,
996
0xdeadbeefac0ffee2,
997
(uint64_t)-1,
998
1,
999
0,
1000
1049571,
1001
(uint64_t)-240058,
1002
20204184,
1003
};
1004
memcpy(input, values, sizeof(values));
1005
1006
uint32_t offsets[8] = { 0, 40, 24, 48, 56, 8, 32, 16 };
1007
memcpy(input + 64, offsets, sizeof(offsets));
1008
1009
for (unsigned i = 0; i < ARRAY_SIZE(offsets); i++) {
1010
mi_store_mem64_offset(&b, out_addr(0), in_mem32(i * 4 + 64),
1011
in_mem64(i * 8));
1012
}
1013
1014
submit_batch();
1015
1016
for (unsigned i = 0; i < ARRAY_SIZE(offsets); i++)
1017
EXPECT_EQ(*(uint64_t *)(output + offsets[i]), values[i]);
1018
}
1019
1020
/*
1021
* Control-flow tests. Only available on XE_HP+
1022
*/
1023
1024
TEST_F(mi_builder_test, goto)
1025
{
1026
const uint64_t value = 0xb453b411deadc0deull;
1027
1028
mi_store(&b, out_mem64(0), mi_imm(value));
1029
1030
struct mi_goto_target t = MI_GOTO_TARGET_INIT;
1031
mi_goto(&b, &t);
1032
1033
/* This one should be skipped */
1034
mi_store(&b, out_mem64(0), mi_imm(0));
1035
1036
mi_goto_target(&b, &t);
1037
1038
submit_batch();
1039
1040
EXPECT_EQ(*(uint64_t *)(output + 0), value);
1041
}
1042
1043
#define MI_PREDICATE_RESULT 0x2418
1044
1045
TEST_F(mi_builder_test, goto_if)
1046
{
1047
const uint64_t values[] = {
1048
0xb453b411deadc0deull,
1049
0x0123456789abcdefull,
1050
0,
1051
};
1052
1053
mi_store(&b, out_mem64(0), mi_imm(values[0]));
1054
1055
emit_cmd(GENX(MI_PREDICATE), mip) {
1056
mip.LoadOperation = LOAD_LOAD;
1057
mip.CombineOperation = COMBINE_SET;
1058
mip.CompareOperation = COMPARE_FALSE;
1059
}
1060
1061
struct mi_goto_target t = MI_GOTO_TARGET_INIT;
1062
mi_goto_if(&b, mi_reg32(MI_PREDICATE_RESULT), &t);
1063
1064
mi_store(&b, out_mem64(0), mi_imm(values[1]));
1065
1066
emit_cmd(GENX(MI_PREDICATE), mip) {
1067
mip.LoadOperation = LOAD_LOAD;
1068
mip.CombineOperation = COMBINE_SET;
1069
mip.CompareOperation = COMPARE_TRUE;
1070
}
1071
1072
mi_goto_if(&b, mi_reg32(MI_PREDICATE_RESULT), &t);
1073
1074
/* This one should be skipped */
1075
mi_store(&b, out_mem64(0), mi_imm(values[2]));
1076
1077
mi_goto_target(&b, &t);
1078
1079
submit_batch();
1080
1081
EXPECT_EQ(*(uint64_t *)(output + 0), values[1]);
1082
}
1083
1084
TEST_F(mi_builder_test, loop_simple)
1085
{
1086
const uint64_t loop_count = 8;
1087
1088
mi_store(&b, out_mem64(0), mi_imm(0));
1089
1090
mi_loop(&b) {
1091
mi_break_if(&b, mi_uge(&b, out_mem64(0), mi_imm(loop_count)));
1092
1093
mi_store(&b, out_mem64(0), mi_iadd_imm(&b, out_mem64(0), 1));
1094
}
1095
1096
submit_batch();
1097
1098
EXPECT_EQ(*(uint64_t *)(output + 0), loop_count);
1099
}
1100
1101
TEST_F(mi_builder_test, loop_break)
1102
{
1103
mi_loop(&b) {
1104
mi_store(&b, out_mem64(0), mi_imm(1));
1105
1106
mi_break_if(&b, mi_imm(0));
1107
1108
mi_store(&b, out_mem64(0), mi_imm(2));
1109
1110
mi_break(&b);
1111
1112
mi_store(&b, out_mem64(0), mi_imm(3));
1113
}
1114
1115
submit_batch();
1116
1117
EXPECT_EQ(*(uint64_t *)(output + 0), 2);
1118
}
1119
1120
TEST_F(mi_builder_test, loop_continue)
1121
{
1122
const uint64_t loop_count = 8;
1123
1124
mi_store(&b, out_mem64(0), mi_imm(0));
1125
mi_store(&b, out_mem64(8), mi_imm(0));
1126
1127
mi_loop(&b) {
1128
mi_break_if(&b, mi_uge(&b, out_mem64(0), mi_imm(loop_count)));
1129
1130
mi_store(&b, out_mem64(0), mi_iadd_imm(&b, out_mem64(0), 1));
1131
mi_store(&b, out_mem64(8), mi_imm(5));
1132
1133
mi_continue(&b);
1134
1135
mi_store(&b, out_mem64(8), mi_imm(10));
1136
}
1137
1138
submit_batch();
1139
1140
EXPECT_EQ(*(uint64_t *)(output + 0), loop_count);
1141
EXPECT_EQ(*(uint64_t *)(output + 8), 5);
1142
}
1143
1144
TEST_F(mi_builder_test, loop_continue_if)
1145
{
1146
const uint64_t loop_count = 8;
1147
1148
mi_store(&b, out_mem64(0), mi_imm(0));
1149
mi_store(&b, out_mem64(8), mi_imm(0));
1150
1151
mi_loop(&b) {
1152
mi_break_if(&b, mi_uge(&b, out_mem64(0), mi_imm(loop_count)));
1153
1154
mi_store(&b, out_mem64(0), mi_iadd_imm(&b, out_mem64(0), 1));
1155
mi_store(&b, out_mem64(8), mi_imm(5));
1156
1157
emit_cmd(GENX(MI_PREDICATE), mip) {
1158
mip.LoadOperation = LOAD_LOAD;
1159
mip.CombineOperation = COMBINE_SET;
1160
mip.CompareOperation = COMPARE_FALSE;
1161
}
1162
1163
mi_continue_if(&b, mi_reg32(MI_PREDICATE_RESULT));
1164
1165
mi_store(&b, out_mem64(8), mi_imm(10));
1166
1167
emit_cmd(GENX(MI_PREDICATE), mip) {
1168
mip.LoadOperation = LOAD_LOAD;
1169
mip.CombineOperation = COMBINE_SET;
1170
mip.CompareOperation = COMPARE_TRUE;
1171
}
1172
1173
mi_continue_if(&b, mi_reg32(MI_PREDICATE_RESULT));
1174
1175
mi_store(&b, out_mem64(8), mi_imm(15));
1176
}
1177
1178
submit_batch();
1179
1180
EXPECT_EQ(*(uint64_t *)(output + 0), loop_count);
1181
EXPECT_EQ(*(uint64_t *)(output + 8), 10);
1182
}
1183
#endif /* GFX_VERx10 >= 125 */
1184
1185