Path: blob/21.2-virgl/src/intel/common/tests/mi_builder_test.cpp
4547 views
/*1* Copyright © 2019 Intel Corporation2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*/2223#include <fcntl.h>24#include <string.h>25#include <xf86drm.h>2627#include <gtest/gtest.h>2829#include "dev/intel_device_info.h"30#include "drm-uapi/i915_drm.h"31#include "genxml/gen_macros.h"32#include "util/macros.h"3334class mi_builder_test;3536struct address {37uint32_t gem_handle;38uint32_t offset;39};4041#define __gen_address_type struct address42#define __gen_user_data ::mi_builder_test4344uint64_t __gen_combine_address(mi_builder_test *test, void *location,45struct address addr, uint32_t delta);46void * __gen_get_batch_dwords(mi_builder_test *test, unsigned num_dwords);47struct address __gen_get_batch_address(mi_builder_test *test,48void *location);4950struct address51__gen_address_offset(address addr, uint64_t offset)52{53addr.offset += offset;54return addr;55}5657#if GFX_VERx10 >= 7558#define RSVD_TEMP_REG 0x2678 /* MI_ALU_REG15 */59#else60#define RSVD_TEMP_REG 0x2430 /* GFX7_3DPRIM_START_VERTEX */61#endif62#define MI_BUILDER_NUM_ALLOC_GPRS 1563#define INPUT_DATA_OFFSET 064#define OUTPUT_DATA_OFFSET 20486566#define __genxml_cmd_length(cmd) cmd ## _length67#define __genxml_cmd_length_bias(cmd) cmd ## _length_bias68#define __genxml_cmd_header(cmd) cmd ## _header69#define __genxml_cmd_pack(cmd) cmd ## _pack7071#include "genxml/genX_pack.h"72#include "mi_builder.h"7374#define emit_cmd(cmd, name) \75for (struct cmd name = { __genxml_cmd_header(cmd) }, \76*_dst = (struct cmd *) emit_dwords(__genxml_cmd_length(cmd)); \77__builtin_expect(_dst != NULL, 1); \78__genxml_cmd_pack(cmd)(this, (void *)_dst, &name), _dst = NULL)7980#include <vector>8182class mi_builder_test : public ::testing::Test {83public:84mi_builder_test();85~mi_builder_test();8687void SetUp();8889void *emit_dwords(int num_dwords);90void submit_batch();9192inline address in_addr(uint32_t offset)93{94address addr;95addr.gem_handle = data_bo_handle;96addr.offset = INPUT_DATA_OFFSET + offset;97return addr;98}99100inline address out_addr(uint32_t offset)101{102address addr;103addr.gem_handle = data_bo_handle;104addr.offset = OUTPUT_DATA_OFFSET + offset;105return addr;106}107108inline mi_value in_mem64(uint32_t offset)109{110return mi_mem64(in_addr(offset));111}112113inline mi_value in_mem32(uint32_t offset)114{115return mi_mem32(in_addr(offset));116}117118inline mi_value out_mem64(uint32_t offset)119{120return mi_mem64(out_addr(offset));121}122123inline mi_value out_mem32(uint32_t offset)124{125return mi_mem32(out_addr(offset));126}127128int fd;129int ctx_id;130intel_device_info devinfo;131132uint32_t batch_bo_handle;133#if GFX_VER >= 8134uint64_t batch_bo_addr;135#endif136uint32_t batch_offset;137void *batch_map;138139#if GFX_VER < 8140std::vector<drm_i915_gem_relocation_entry> relocs;141#endif142143uint32_t data_bo_handle;144#if GFX_VER >= 8145uint64_t data_bo_addr;146#endif147void *data_map;148char *input;149char *output;150uint64_t canary;151152mi_builder b;153};154155mi_builder_test::mi_builder_test() :156fd(-1)157{ }158159mi_builder_test::~mi_builder_test()160{161close(fd);162}163164// 1 MB of batch should be enough for anyone, right?165#define BATCH_BO_SIZE (256 * 4096)166#define DATA_BO_SIZE 4096167168void169mi_builder_test::SetUp()170{171drmDevicePtr devices[8];172int max_devices = drmGetDevices2(0, devices, 8);173174int i;175for (i = 0; i < max_devices; i++) {176if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&177devices[i]->bustype == DRM_BUS_PCI &&178devices[i]->deviceinfo.pci->vendor_id == 0x8086) {179fd = open(devices[i]->nodes[DRM_NODE_RENDER], O_RDWR | O_CLOEXEC);180if (fd < 0)181continue;182183/* We don't really need to do this when running on hardware because184* we can just pull it from the drmDevice. However, without doing185* this, intel_dump_gpu gets a bit of heartburn and we can't use the186* --device option with it.187*/188int device_id;189drm_i915_getparam getparam = drm_i915_getparam();190getparam.param = I915_PARAM_CHIPSET_ID;191getparam.value = &device_id;192ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GETPARAM,193(void *)&getparam), 0) << strerror(errno);194195ASSERT_TRUE(intel_get_device_info_from_pci_id(device_id, &devinfo));196if (devinfo.ver != GFX_VER || devinfo.is_haswell != (GFX_VERx10 == 75)) {197close(fd);198fd = -1;199continue;200}201202203/* Found a device! */204break;205}206}207ASSERT_TRUE(i < max_devices) << "Failed to find a DRM device";208209drm_i915_gem_context_create ctx_create = drm_i915_gem_context_create();210ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE,211(void *)&ctx_create), 0) << strerror(errno);212ctx_id = ctx_create.ctx_id;213214if (GFX_VER >= 8) {215/* On gfx8+, we require softpin */216int has_softpin;217drm_i915_getparam getparam = drm_i915_getparam();218getparam.param = I915_PARAM_HAS_EXEC_SOFTPIN;219getparam.value = &has_softpin;220ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GETPARAM,221(void *)&getparam), 0) << strerror(errno);222ASSERT_TRUE(has_softpin);223}224225// Create the batch buffer226drm_i915_gem_create gem_create = drm_i915_gem_create();227gem_create.size = BATCH_BO_SIZE;228ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE,229(void *)&gem_create), 0) << strerror(errno);230batch_bo_handle = gem_create.handle;231#if GFX_VER >= 8232batch_bo_addr = 0xffffffffdff70000ULL;233#endif234235drm_i915_gem_caching gem_caching = drm_i915_gem_caching();236gem_caching.handle = batch_bo_handle;237gem_caching.caching = I915_CACHING_CACHED;238ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_SET_CACHING,239(void *)&gem_caching), 0) << strerror(errno);240241drm_i915_gem_mmap gem_mmap = drm_i915_gem_mmap();242gem_mmap.handle = batch_bo_handle;243gem_mmap.offset = 0;244gem_mmap.size = BATCH_BO_SIZE;245gem_mmap.flags = 0;246ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_MMAP,247(void *)&gem_mmap), 0) << strerror(errno);248batch_map = (void *)(uintptr_t)gem_mmap.addr_ptr;249250// Start the batch at zero251batch_offset = 0;252253// Create the data buffer254gem_create = drm_i915_gem_create();255gem_create.size = DATA_BO_SIZE;256ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE,257(void *)&gem_create), 0) << strerror(errno);258data_bo_handle = gem_create.handle;259#if GFX_VER >= 8260data_bo_addr = 0xffffffffefff0000ULL;261#endif262263gem_caching = drm_i915_gem_caching();264gem_caching.handle = data_bo_handle;265gem_caching.caching = I915_CACHING_CACHED;266ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_SET_CACHING,267(void *)&gem_caching), 0) << strerror(errno);268269gem_mmap = drm_i915_gem_mmap();270gem_mmap.handle = data_bo_handle;271gem_mmap.offset = 0;272gem_mmap.size = DATA_BO_SIZE;273gem_mmap.flags = 0;274ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_MMAP,275(void *)&gem_mmap), 0) << strerror(errno);276data_map = (void *)(uintptr_t)gem_mmap.addr_ptr;277input = (char *)data_map + INPUT_DATA_OFFSET;278output = (char *)data_map + OUTPUT_DATA_OFFSET;279280// Fill the test data with garbage281memset(data_map, 139, DATA_BO_SIZE);282memset(&canary, 139, sizeof(canary));283284mi_builder_init(&b, &devinfo, this);285}286287void *288mi_builder_test::emit_dwords(int num_dwords)289{290void *ptr = (void *)((char *)batch_map + batch_offset);291batch_offset += num_dwords * 4;292assert(batch_offset < BATCH_BO_SIZE);293return ptr;294}295296void297mi_builder_test::submit_batch()298{299mi_builder_emit(&b, GENX(MI_BATCH_BUFFER_END), bbe);300301// Round batch up to an even number of dwords.302if (batch_offset & 4)303mi_builder_emit(&b, GENX(MI_NOOP), noop);304305drm_i915_gem_exec_object2 objects[2];306memset(objects, 0, sizeof(objects));307308objects[0].handle = data_bo_handle;309objects[0].relocation_count = 0;310objects[0].relocs_ptr = 0;311#if GFX_VER >= 8 /* On gfx8+, we pin everything */312objects[0].flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS |313EXEC_OBJECT_PINNED |314EXEC_OBJECT_WRITE;315objects[0].offset = data_bo_addr;316#else317objects[0].flags = EXEC_OBJECT_WRITE;318objects[0].offset = -1;319#endif320321objects[1].handle = batch_bo_handle;322#if GFX_VER >= 8 /* On gfx8+, we don't use relocations */323objects[1].relocation_count = 0;324objects[1].relocs_ptr = 0;325objects[1].flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS |326EXEC_OBJECT_PINNED;327objects[1].offset = batch_bo_addr;328#else329objects[1].relocation_count = relocs.size();330objects[1].relocs_ptr = (uintptr_t)(void *)&relocs[0];331objects[1].flags = 0;332objects[1].offset = -1;333#endif334335drm_i915_gem_execbuffer2 execbuf = drm_i915_gem_execbuffer2();336execbuf.buffers_ptr = (uintptr_t)(void *)objects;337execbuf.buffer_count = 2;338execbuf.batch_start_offset = 0;339execbuf.batch_len = batch_offset;340execbuf.flags = I915_EXEC_HANDLE_LUT | I915_EXEC_RENDER;341execbuf.rsvd1 = ctx_id;342343ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2,344(void *)&execbuf), 0) << strerror(errno);345346drm_i915_gem_wait gem_wait = drm_i915_gem_wait();347gem_wait.bo_handle = batch_bo_handle;348gem_wait.timeout_ns = INT64_MAX;349ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_WAIT,350(void *)&gem_wait), 0) << strerror(errno);351}352353uint64_t354__gen_combine_address(mi_builder_test *test, void *location,355address addr, uint32_t delta)356{357#if GFX_VER >= 8358uint64_t addr_u64 = addr.gem_handle == test->data_bo_handle ?359test->data_bo_addr : test->batch_bo_addr;360return addr_u64 + addr.offset + delta;361#else362drm_i915_gem_relocation_entry reloc = drm_i915_gem_relocation_entry();363reloc.target_handle = addr.gem_handle == test->data_bo_handle ? 0 : 1;364reloc.delta = addr.offset + delta;365reloc.offset = (char *)location - (char *)test->batch_map;366reloc.presumed_offset = -1;367test->relocs.push_back(reloc);368369return reloc.delta;370#endif371}372373void *374__gen_get_batch_dwords(mi_builder_test *test, unsigned num_dwords)375{376return test->emit_dwords(num_dwords);377}378379struct address380__gen_get_batch_address(mi_builder_test *test, void *location)381{382assert(location >= test->batch_map);383size_t offset = (char *)location - (char *)test->batch_map;384assert(offset < BATCH_BO_SIZE);385assert(offset <= UINT32_MAX);386387return (struct address) {388.gem_handle = test->batch_bo_handle,389.offset = (uint32_t)offset,390};391}392393#include "genxml/genX_pack.h"394#include "mi_builder.h"395396TEST_F(mi_builder_test, imm_mem)397{398const uint64_t value = 0x0123456789abcdef;399400mi_store(&b, out_mem64(0), mi_imm(value));401mi_store(&b, out_mem32(8), mi_imm(value));402403submit_batch();404405// 64 -> 64406EXPECT_EQ(*(uint64_t *)(output + 0), value);407408// 64 -> 32409EXPECT_EQ(*(uint32_t *)(output + 8), (uint32_t)value);410EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);411}412413/* mem -> mem copies are only supported on HSW+ */414#if GFX_VERx10 >= 75415TEST_F(mi_builder_test, mem_mem)416{417const uint64_t value = 0x0123456789abcdef;418*(uint64_t *)input = value;419420mi_store(&b, out_mem64(0), in_mem64(0));421mi_store(&b, out_mem32(8), in_mem64(0));422mi_store(&b, out_mem32(16), in_mem32(0));423mi_store(&b, out_mem64(24), in_mem32(0));424425submit_batch();426427// 64 -> 64428EXPECT_EQ(*(uint64_t *)(output + 0), value);429430// 64 -> 32431EXPECT_EQ(*(uint32_t *)(output + 8), (uint32_t)value);432EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);433434// 32 -> 32435EXPECT_EQ(*(uint32_t *)(output + 16), (uint32_t)value);436EXPECT_EQ(*(uint32_t *)(output + 20), (uint32_t)canary);437438// 32 -> 64439EXPECT_EQ(*(uint64_t *)(output + 24), (uint64_t)(uint32_t)value);440}441#endif442443TEST_F(mi_builder_test, imm_reg)444{445const uint64_t value = 0x0123456789abcdef;446447mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));448mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(value));449mi_store(&b, out_mem64(0), mi_reg64(RSVD_TEMP_REG));450451mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));452mi_store(&b, mi_reg32(RSVD_TEMP_REG), mi_imm(value));453mi_store(&b, out_mem64(8), mi_reg64(RSVD_TEMP_REG));454455submit_batch();456457// 64 -> 64458EXPECT_EQ(*(uint64_t *)(output + 0), value);459460// 64 -> 32461EXPECT_EQ(*(uint32_t *)(output + 8), (uint32_t)value);462EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);463}464465TEST_F(mi_builder_test, mem_reg)466{467const uint64_t value = 0x0123456789abcdef;468*(uint64_t *)input = value;469470mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));471mi_store(&b, mi_reg64(RSVD_TEMP_REG), in_mem64(0));472mi_store(&b, out_mem64(0), mi_reg64(RSVD_TEMP_REG));473474mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));475mi_store(&b, mi_reg32(RSVD_TEMP_REG), in_mem64(0));476mi_store(&b, out_mem64(8), mi_reg64(RSVD_TEMP_REG));477478mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));479mi_store(&b, mi_reg32(RSVD_TEMP_REG), in_mem32(0));480mi_store(&b, out_mem64(16), mi_reg64(RSVD_TEMP_REG));481482mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));483mi_store(&b, mi_reg64(RSVD_TEMP_REG), in_mem32(0));484mi_store(&b, out_mem64(24), mi_reg64(RSVD_TEMP_REG));485486submit_batch();487488// 64 -> 64489EXPECT_EQ(*(uint64_t *)(output + 0), value);490491// 64 -> 32492EXPECT_EQ(*(uint32_t *)(output + 8), (uint32_t)value);493EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);494495// 32 -> 32496EXPECT_EQ(*(uint32_t *)(output + 16), (uint32_t)value);497EXPECT_EQ(*(uint32_t *)(output + 20), (uint32_t)canary);498499// 32 -> 64500EXPECT_EQ(*(uint64_t *)(output + 24), (uint64_t)(uint32_t)value);501}502503TEST_F(mi_builder_test, memset)504{505const unsigned memset_size = 256;506507mi_memset(&b, out_addr(0), 0xdeadbeef, memset_size);508509submit_batch();510511uint32_t *out_u32 = (uint32_t *)output;512for (unsigned i = 0; i < memset_size / sizeof(*out_u32); i++)513EXPECT_EQ(out_u32[i], 0xdeadbeef);514}515516TEST_F(mi_builder_test, memcpy)517{518const unsigned memcpy_size = 256;519520uint8_t *in_u8 = (uint8_t *)input;521for (unsigned i = 0; i < memcpy_size; i++)522in_u8[i] = i;523524mi_memcpy(&b, out_addr(0), in_addr(0), 256);525526submit_batch();527528uint8_t *out_u8 = (uint8_t *)output;529for (unsigned i = 0; i < memcpy_size; i++)530EXPECT_EQ(out_u8[i], i);531}532533/* Start of MI_MATH section */534#if GFX_VERx10 >= 75535536#define EXPECT_EQ_IMM(x, imm) EXPECT_EQ(x, mi_value_to_u64(imm))537538TEST_F(mi_builder_test, inot)539{540const uint64_t value = 0x0123456789abcdef;541const uint32_t value_lo = (uint32_t)value;542const uint32_t value_hi = (uint32_t)(value >> 32);543memcpy(input, &value, sizeof(value));544545mi_store(&b, out_mem64(0), mi_inot(&b, in_mem64(0)));546mi_store(&b, out_mem64(8), mi_inot(&b, mi_inot(&b, in_mem64(0))));547mi_store(&b, out_mem64(16), mi_inot(&b, in_mem32(0)));548mi_store(&b, out_mem64(24), mi_inot(&b, in_mem32(4)));549mi_store(&b, out_mem32(32), mi_inot(&b, in_mem64(0)));550mi_store(&b, out_mem32(36), mi_inot(&b, in_mem32(0)));551mi_store(&b, out_mem32(40), mi_inot(&b, mi_inot(&b, in_mem32(0))));552mi_store(&b, out_mem32(44), mi_inot(&b, in_mem32(4)));553554submit_batch();555556EXPECT_EQ(*(uint64_t *)(output + 0), ~value);557EXPECT_EQ(*(uint64_t *)(output + 8), value);558EXPECT_EQ(*(uint64_t *)(output + 16), ~(uint64_t)value_lo);559EXPECT_EQ(*(uint64_t *)(output + 24), ~(uint64_t)value_hi);560EXPECT_EQ(*(uint32_t *)(output + 32), (uint32_t)~value);561EXPECT_EQ(*(uint32_t *)(output + 36), (uint32_t)~value_lo);562EXPECT_EQ(*(uint32_t *)(output + 40), (uint32_t)value_lo);563EXPECT_EQ(*(uint32_t *)(output + 44), (uint32_t)~value_hi);564}565566/* Test adding of immediates of all kinds including567*568* - All zeroes569* - All ones570* - inverted constants571*/572TEST_F(mi_builder_test, add_imm)573{574const uint64_t value = 0x0123456789abcdef;575const uint64_t add = 0xdeadbeefac0ffee2;576memcpy(input, &value, sizeof(value));577578mi_store(&b, out_mem64(0),579mi_iadd(&b, in_mem64(0), mi_imm(0)));580mi_store(&b, out_mem64(8),581mi_iadd(&b, in_mem64(0), mi_imm(-1)));582mi_store(&b, out_mem64(16),583mi_iadd(&b, in_mem64(0), mi_inot(&b, mi_imm(0))));584mi_store(&b, out_mem64(24),585mi_iadd(&b, in_mem64(0), mi_inot(&b, mi_imm(-1))));586mi_store(&b, out_mem64(32),587mi_iadd(&b, in_mem64(0), mi_imm(add)));588mi_store(&b, out_mem64(40),589mi_iadd(&b, in_mem64(0), mi_inot(&b, mi_imm(add))));590mi_store(&b, out_mem64(48),591mi_iadd(&b, mi_imm(0), in_mem64(0)));592mi_store(&b, out_mem64(56),593mi_iadd(&b, mi_imm(-1), in_mem64(0)));594mi_store(&b, out_mem64(64),595mi_iadd(&b, mi_inot(&b, mi_imm(0)), in_mem64(0)));596mi_store(&b, out_mem64(72),597mi_iadd(&b, mi_inot(&b, mi_imm(-1)), in_mem64(0)));598mi_store(&b, out_mem64(80),599mi_iadd(&b, mi_imm(add), in_mem64(0)));600mi_store(&b, out_mem64(88),601mi_iadd(&b, mi_inot(&b, mi_imm(add)), in_mem64(0)));602603// And som add_imm just for good measure604mi_store(&b, out_mem64(96), mi_iadd_imm(&b, in_mem64(0), 0));605mi_store(&b, out_mem64(104), mi_iadd_imm(&b, in_mem64(0), add));606607submit_batch();608609EXPECT_EQ(*(uint64_t *)(output + 0), value);610EXPECT_EQ(*(uint64_t *)(output + 8), value - 1);611EXPECT_EQ(*(uint64_t *)(output + 16), value - 1);612EXPECT_EQ(*(uint64_t *)(output + 24), value);613EXPECT_EQ(*(uint64_t *)(output + 32), value + add);614EXPECT_EQ(*(uint64_t *)(output + 40), value + ~add);615EXPECT_EQ(*(uint64_t *)(output + 48), value);616EXPECT_EQ(*(uint64_t *)(output + 56), value - 1);617EXPECT_EQ(*(uint64_t *)(output + 64), value - 1);618EXPECT_EQ(*(uint64_t *)(output + 72), value);619EXPECT_EQ(*(uint64_t *)(output + 80), value + add);620EXPECT_EQ(*(uint64_t *)(output + 88), value + ~add);621EXPECT_EQ(*(uint64_t *)(output + 96), value);622EXPECT_EQ(*(uint64_t *)(output + 104), value + add);623}624625TEST_F(mi_builder_test, ult_uge_ieq_ine)626{627uint64_t values[8] = {6280x0123456789abcdef,6290xdeadbeefac0ffee2,630(uint64_t)-1,6311,6320,6331049571,634(uint64_t)-240058,63520204184,636};637memcpy(input, values, sizeof(values));638639for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {640for (unsigned j = 0; j < ARRAY_SIZE(values); j++) {641mi_store(&b, out_mem64(i * 256 + j * 32 + 0),642mi_ult(&b, in_mem64(i * 8), in_mem64(j * 8)));643mi_store(&b, out_mem64(i * 256 + j * 32 + 8),644mi_uge(&b, in_mem64(i * 8), in_mem64(j * 8)));645mi_store(&b, out_mem64(i * 256 + j * 32 + 16),646mi_ieq(&b, in_mem64(i * 8), in_mem64(j * 8)));647mi_store(&b, out_mem64(i * 256 + j * 32 + 24),648mi_ine(&b, in_mem64(i * 8), in_mem64(j * 8)));649}650}651652submit_batch();653654for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {655for (unsigned j = 0; j < ARRAY_SIZE(values); j++) {656uint64_t *out_u64 = (uint64_t *)(output + i * 256 + j * 32);657EXPECT_EQ_IMM(out_u64[0], mi_ult(&b, mi_imm(values[i]),658mi_imm(values[j])));659EXPECT_EQ_IMM(out_u64[1], mi_uge(&b, mi_imm(values[i]),660mi_imm(values[j])));661EXPECT_EQ_IMM(out_u64[2], mi_ieq(&b, mi_imm(values[i]),662mi_imm(values[j])));663EXPECT_EQ_IMM(out_u64[3], mi_ine(&b, mi_imm(values[i]),664mi_imm(values[j])));665}666}667}668669TEST_F(mi_builder_test, z_nz)670{671uint64_t values[8] = {6720,6731,674UINT32_MAX,675UINT32_MAX + 1,676UINT64_MAX,677};678memcpy(input, values, sizeof(values));679680for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {681mi_store(&b, out_mem64(i * 16 + 0), mi_nz(&b, in_mem64(i * 8)));682mi_store(&b, out_mem64(i * 16 + 8), mi_z(&b, in_mem64(i * 8)));683}684685submit_batch();686687for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {688uint64_t *out_u64 = (uint64_t *)(output + i * 16);689EXPECT_EQ_IMM(out_u64[0], mi_nz(&b, mi_imm(values[i])));690EXPECT_EQ_IMM(out_u64[1], mi_z(&b, mi_imm(values[i])));691}692}693694TEST_F(mi_builder_test, iand)695{696const uint64_t values[2] = {6970x0123456789abcdef,6980xdeadbeefac0ffee2,699};700memcpy(input, values, sizeof(values));701702mi_store(&b, out_mem64(0), mi_iand(&b, in_mem64(0), in_mem64(8)));703704submit_batch();705706EXPECT_EQ_IMM(*(uint64_t *)output, mi_iand(&b, mi_imm(values[0]),707mi_imm(values[1])));708}709710#if GFX_VERx10 >= 125711TEST_F(mi_builder_test, ishl)712{713const uint64_t value = 0x0123456789abcdef;714memcpy(input, &value, sizeof(value));715716uint32_t shifts[] = { 0, 1, 2, 4, 8, 16, 32 };717memcpy(input + 8, shifts, sizeof(shifts));718719for (unsigned i = 0; i < ARRAY_SIZE(shifts); i++) {720mi_store(&b, out_mem64(i * 8),721mi_ishl(&b, in_mem64(0), in_mem32(8 + i * 4)));722}723724submit_batch();725726for (unsigned i = 0; i < ARRAY_SIZE(shifts); i++) {727EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),728mi_ishl(&b, mi_imm(value), mi_imm(shifts[i])));729}730}731732TEST_F(mi_builder_test, ushr)733{734const uint64_t value = 0x0123456789abcdef;735memcpy(input, &value, sizeof(value));736737uint32_t shifts[] = { 0, 1, 2, 4, 8, 16, 32 };738memcpy(input + 8, shifts, sizeof(shifts));739740for (unsigned i = 0; i < ARRAY_SIZE(shifts); i++) {741mi_store(&b, out_mem64(i * 8),742mi_ushr(&b, in_mem64(0), in_mem32(8 + i * 4)));743}744745submit_batch();746747for (unsigned i = 0; i < ARRAY_SIZE(shifts); i++) {748EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),749mi_ushr(&b, mi_imm(value), mi_imm(shifts[i])));750}751}752753TEST_F(mi_builder_test, ushr_imm)754{755const uint64_t value = 0x0123456789abcdef;756memcpy(input, &value, sizeof(value));757758const unsigned max_shift = 64;759760for (unsigned i = 0; i <= max_shift; i++)761mi_store(&b, out_mem64(i * 8), mi_ushr_imm(&b, in_mem64(0), i));762763submit_batch();764765for (unsigned i = 0; i <= max_shift; i++) {766EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),767mi_ushr_imm(&b, mi_imm(value), i));768}769}770771TEST_F(mi_builder_test, ishr)772{773const uint64_t values[] = {7740x0123456789abcdef,7750xfedcba9876543210,776};777memcpy(input, values, sizeof(values));778779uint32_t shifts[] = { 0, 1, 2, 4, 8, 16, 32 };780memcpy(input + 16, shifts, sizeof(shifts));781782for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {783for (unsigned j = 0; j < ARRAY_SIZE(shifts); j++) {784mi_store(&b, out_mem64(i * 8 + j * 16),785mi_ishr(&b, in_mem64(i * 8), in_mem32(16 + j * 4)));786}787}788789submit_batch();790791for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {792for (unsigned j = 0; j < ARRAY_SIZE(shifts); j++) {793EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8 + j * 16),794mi_ishr(&b, mi_imm(values[i]), mi_imm(shifts[j])));795}796}797}798799TEST_F(mi_builder_test, ishr_imm)800{801const uint64_t value = 0x0123456789abcdef;802memcpy(input, &value, sizeof(value));803804const unsigned max_shift = 64;805806for (unsigned i = 0; i <= max_shift; i++)807mi_store(&b, out_mem64(i * 8), mi_ishr_imm(&b, in_mem64(0), i));808809submit_batch();810811for (unsigned i = 0; i <= max_shift; i++) {812EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),813mi_ishr_imm(&b, mi_imm(value), i));814}815}816#endif /* if GFX_VERx10 >= 125 */817818TEST_F(mi_builder_test, imul_imm)819{820uint64_t lhs[2] = {8210x0123456789abcdef,8220xdeadbeefac0ffee2,823};824memcpy(input, lhs, sizeof(lhs));825826/* Some random 32-bit unsigned integers. The first four have been827* hand-chosen just to ensure some good low integers; the rest were828* generated with a python script.829*/830uint32_t rhs[20] = {8311, 2, 3, 5,83210800, 193, 64, 40,8333796, 256, 88, 473,8341421, 706, 175, 850,83539, 38985, 1941, 17,836};837838for (unsigned i = 0; i < ARRAY_SIZE(lhs); i++) {839for (unsigned j = 0; j < ARRAY_SIZE(rhs); j++) {840mi_store(&b, out_mem64(i * 160 + j * 8),841mi_imul_imm(&b, in_mem64(i * 8), rhs[j]));842}843}844845submit_batch();846847for (unsigned i = 0; i < ARRAY_SIZE(lhs); i++) {848for (unsigned j = 0; j < ARRAY_SIZE(rhs); j++) {849EXPECT_EQ_IMM(*(uint64_t *)(output + i * 160 + j * 8),850mi_imul_imm(&b, mi_imm(lhs[i]), rhs[j]));851}852}853}854855TEST_F(mi_builder_test, ishl_imm)856{857const uint64_t value = 0x0123456789abcdef;858memcpy(input, &value, sizeof(value));859860const unsigned max_shift = 64;861862for (unsigned i = 0; i <= max_shift; i++)863mi_store(&b, out_mem64(i * 8), mi_ishl_imm(&b, in_mem64(0), i));864865submit_batch();866867for (unsigned i = 0; i <= max_shift; i++) {868EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),869mi_ishl_imm(&b, mi_imm(value), i));870}871}872873TEST_F(mi_builder_test, ushr32_imm)874{875const uint64_t value = 0x0123456789abcdef;876memcpy(input, &value, sizeof(value));877878const unsigned max_shift = 64;879880for (unsigned i = 0; i <= max_shift; i++)881mi_store(&b, out_mem64(i * 8), mi_ushr32_imm(&b, in_mem64(0), i));882883submit_batch();884885for (unsigned i = 0; i <= max_shift; i++) {886EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),887mi_ushr32_imm(&b, mi_imm(value), i));888}889}890891TEST_F(mi_builder_test, udiv32_imm)892{893/* Some random 32-bit unsigned integers. The first four have been894* hand-chosen just to ensure some good low integers; the rest were895* generated with a python script.896*/897uint32_t values[20] = {8981, 2, 3, 5,89910800, 193, 64, 40,9003796, 256, 88, 473,9011421, 706, 175, 850,90239, 38985, 1941, 17,903};904memcpy(input, values, sizeof(values));905906for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {907for (unsigned j = 0; j < ARRAY_SIZE(values); j++) {908mi_store(&b, out_mem32(i * 80 + j * 4),909mi_udiv32_imm(&b, in_mem32(i * 4), values[j]));910}911}912913submit_batch();914915for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {916for (unsigned j = 0; j < ARRAY_SIZE(values); j++) {917EXPECT_EQ_IMM(*(uint32_t *)(output + i * 80 + j * 4),918mi_udiv32_imm(&b, mi_imm(values[i]), values[j]));919}920}921}922923TEST_F(mi_builder_test, store_if)924{925uint64_t u64 = 0xb453b411deadc0deull;926uint32_t u32 = 0x1337d00d;927928/* Write values with the predicate enabled */929emit_cmd(GENX(MI_PREDICATE), mip) {930mip.LoadOperation = LOAD_LOAD;931mip.CombineOperation = COMBINE_SET;932mip.CompareOperation = COMPARE_TRUE;933}934935mi_store_if(&b, out_mem64(0), mi_imm(u64));936mi_store_if(&b, out_mem32(8), mi_imm(u32));937938/* Set predicate to false, write garbage that shouldn't land */939emit_cmd(GENX(MI_PREDICATE), mip) {940mip.LoadOperation = LOAD_LOAD;941mip.CombineOperation = COMBINE_SET;942mip.CompareOperation = COMPARE_FALSE;943}944945mi_store_if(&b, out_mem64(0), mi_imm(0xd0d0d0d0d0d0d0d0ull));946mi_store_if(&b, out_mem32(8), mi_imm(0xc000c000));947948submit_batch();949950EXPECT_EQ(*(uint64_t *)(output + 0), u64);951EXPECT_EQ(*(uint32_t *)(output + 8), u32);952EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);953}954955#endif /* GFX_VERx10 >= 75 */956957#if GFX_VERx10 >= 125958959/*960* Indirect load/store tests. Only available on XE_HP+961*/962963TEST_F(mi_builder_test, load_mem64_offset)964{965uint64_t values[8] = {9660x0123456789abcdef,9670xdeadbeefac0ffee2,968(uint64_t)-1,9691,9700,9711049571,972(uint64_t)-240058,97320204184,974};975memcpy(input, values, sizeof(values));976977uint32_t offsets[8] = { 0, 40, 24, 48, 56, 8, 32, 16 };978memcpy(input + 64, offsets, sizeof(offsets));979980for (unsigned i = 0; i < ARRAY_SIZE(offsets); i++) {981mi_store(&b, out_mem64(i * 8),982mi_load_mem64_offset(&b, in_addr(0), in_mem32(i * 4 + 64)));983}984985submit_batch();986987for (unsigned i = 0; i < ARRAY_SIZE(offsets); i++)988EXPECT_EQ(*(uint64_t *)(output + i * 8), values[offsets[i] / 8]);989}990991TEST_F(mi_builder_test, store_mem64_offset)992{993uint64_t values[8] = {9940x0123456789abcdef,9950xdeadbeefac0ffee2,996(uint64_t)-1,9971,9980,9991049571,1000(uint64_t)-240058,100120204184,1002};1003memcpy(input, values, sizeof(values));10041005uint32_t offsets[8] = { 0, 40, 24, 48, 56, 8, 32, 16 };1006memcpy(input + 64, offsets, sizeof(offsets));10071008for (unsigned i = 0; i < ARRAY_SIZE(offsets); i++) {1009mi_store_mem64_offset(&b, out_addr(0), in_mem32(i * 4 + 64),1010in_mem64(i * 8));1011}10121013submit_batch();10141015for (unsigned i = 0; i < ARRAY_SIZE(offsets); i++)1016EXPECT_EQ(*(uint64_t *)(output + offsets[i]), values[i]);1017}10181019/*1020* Control-flow tests. Only available on XE_HP+1021*/10221023TEST_F(mi_builder_test, goto)1024{1025const uint64_t value = 0xb453b411deadc0deull;10261027mi_store(&b, out_mem64(0), mi_imm(value));10281029struct mi_goto_target t = MI_GOTO_TARGET_INIT;1030mi_goto(&b, &t);10311032/* This one should be skipped */1033mi_store(&b, out_mem64(0), mi_imm(0));10341035mi_goto_target(&b, &t);10361037submit_batch();10381039EXPECT_EQ(*(uint64_t *)(output + 0), value);1040}10411042#define MI_PREDICATE_RESULT 0x241810431044TEST_F(mi_builder_test, goto_if)1045{1046const uint64_t values[] = {10470xb453b411deadc0deull,10480x0123456789abcdefull,10490,1050};10511052mi_store(&b, out_mem64(0), mi_imm(values[0]));10531054emit_cmd(GENX(MI_PREDICATE), mip) {1055mip.LoadOperation = LOAD_LOAD;1056mip.CombineOperation = COMBINE_SET;1057mip.CompareOperation = COMPARE_FALSE;1058}10591060struct mi_goto_target t = MI_GOTO_TARGET_INIT;1061mi_goto_if(&b, mi_reg32(MI_PREDICATE_RESULT), &t);10621063mi_store(&b, out_mem64(0), mi_imm(values[1]));10641065emit_cmd(GENX(MI_PREDICATE), mip) {1066mip.LoadOperation = LOAD_LOAD;1067mip.CombineOperation = COMBINE_SET;1068mip.CompareOperation = COMPARE_TRUE;1069}10701071mi_goto_if(&b, mi_reg32(MI_PREDICATE_RESULT), &t);10721073/* This one should be skipped */1074mi_store(&b, out_mem64(0), mi_imm(values[2]));10751076mi_goto_target(&b, &t);10771078submit_batch();10791080EXPECT_EQ(*(uint64_t *)(output + 0), values[1]);1081}10821083TEST_F(mi_builder_test, loop_simple)1084{1085const uint64_t loop_count = 8;10861087mi_store(&b, out_mem64(0), mi_imm(0));10881089mi_loop(&b) {1090mi_break_if(&b, mi_uge(&b, out_mem64(0), mi_imm(loop_count)));10911092mi_store(&b, out_mem64(0), mi_iadd_imm(&b, out_mem64(0), 1));1093}10941095submit_batch();10961097EXPECT_EQ(*(uint64_t *)(output + 0), loop_count);1098}10991100TEST_F(mi_builder_test, loop_break)1101{1102mi_loop(&b) {1103mi_store(&b, out_mem64(0), mi_imm(1));11041105mi_break_if(&b, mi_imm(0));11061107mi_store(&b, out_mem64(0), mi_imm(2));11081109mi_break(&b);11101111mi_store(&b, out_mem64(0), mi_imm(3));1112}11131114submit_batch();11151116EXPECT_EQ(*(uint64_t *)(output + 0), 2);1117}11181119TEST_F(mi_builder_test, loop_continue)1120{1121const uint64_t loop_count = 8;11221123mi_store(&b, out_mem64(0), mi_imm(0));1124mi_store(&b, out_mem64(8), mi_imm(0));11251126mi_loop(&b) {1127mi_break_if(&b, mi_uge(&b, out_mem64(0), mi_imm(loop_count)));11281129mi_store(&b, out_mem64(0), mi_iadd_imm(&b, out_mem64(0), 1));1130mi_store(&b, out_mem64(8), mi_imm(5));11311132mi_continue(&b);11331134mi_store(&b, out_mem64(8), mi_imm(10));1135}11361137submit_batch();11381139EXPECT_EQ(*(uint64_t *)(output + 0), loop_count);1140EXPECT_EQ(*(uint64_t *)(output + 8), 5);1141}11421143TEST_F(mi_builder_test, loop_continue_if)1144{1145const uint64_t loop_count = 8;11461147mi_store(&b, out_mem64(0), mi_imm(0));1148mi_store(&b, out_mem64(8), mi_imm(0));11491150mi_loop(&b) {1151mi_break_if(&b, mi_uge(&b, out_mem64(0), mi_imm(loop_count)));11521153mi_store(&b, out_mem64(0), mi_iadd_imm(&b, out_mem64(0), 1));1154mi_store(&b, out_mem64(8), mi_imm(5));11551156emit_cmd(GENX(MI_PREDICATE), mip) {1157mip.LoadOperation = LOAD_LOAD;1158mip.CombineOperation = COMBINE_SET;1159mip.CompareOperation = COMPARE_FALSE;1160}11611162mi_continue_if(&b, mi_reg32(MI_PREDICATE_RESULT));11631164mi_store(&b, out_mem64(8), mi_imm(10));11651166emit_cmd(GENX(MI_PREDICATE), mip) {1167mip.LoadOperation = LOAD_LOAD;1168mip.CombineOperation = COMBINE_SET;1169mip.CompareOperation = COMPARE_TRUE;1170}11711172mi_continue_if(&b, mi_reg32(MI_PREDICATE_RESULT));11731174mi_store(&b, out_mem64(8), mi_imm(15));1175}11761177submit_batch();11781179EXPECT_EQ(*(uint64_t *)(output + 0), loop_count);1180EXPECT_EQ(*(uint64_t *)(output + 8), 10);1181}1182#endif /* GFX_VERx10 >= 125 */118311841185