diff options
Diffstat (limited to 'src/intel')
-rw-r--r-- | src/intel/common/meson.build | 20 | ||||
-rw-r--r-- | src/intel/common/tests/gen_mi_builder_test.cpp | 641 |
2 files changed, 661 insertions, 0 deletions
diff --git a/src/intel/common/meson.build b/src/intel/common/meson.build index 19ee2f64875..bf8d6feab2a 100644 --- a/src/intel/common/meson.build +++ b/src/intel/common/meson.build @@ -43,3 +43,23 @@ libintel_common = static_library( link_with : [libisl], dependencies : [dep_expat, dep_libdrm, dep_thread, idep_genxml], ) + +install_intel_gpu_tests = get_option('install-intel-gpu-tests') + +if install_intel_gpu_tests + foreach g : [['70', 'gen7'], ['75', 'hsw'], ['80', 'gen8'], + ['90', 'gen9'], ['110', 'gen11']] + executable( + 'intel_@0@_mi_builder_test'.format(g[1]), + files('tests/gen_mi_builder_test.cpp'), + cpp_args : [ + cpp_vis_args, cpp_msvc_compat_args, + '-DGEN_VERSIONx10=@0@'.format(g[0]) + ], + include_directories : [inc_common, inc_intel], + link_with : [libintel_dev, libmesa_util], + dependencies : [dep_libdrm, dep_thread, idep_gtest, idep_genxml], + install : install_intel_gpu_tests, + ) + endforeach +endif diff --git a/src/intel/common/tests/gen_mi_builder_test.cpp b/src/intel/common/tests/gen_mi_builder_test.cpp new file mode 100644 index 00000000000..e68d35c9fb9 --- /dev/null +++ b/src/intel/common/tests/gen_mi_builder_test.cpp @@ -0,0 +1,641 @@ +/* + * Copyright © 2019 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <fcntl.h> +#include <string.h> +#include <xf86drm.h> + +#include <gtest/gtest.h> + +#include "dev/gen_device_info.h" +#include "drm-uapi/i915_drm.h" +#include "genxml/gen_macros.h" +#include "util/macros.h" + +class gen_mi_builder_test; + +struct address { + uint32_t gem_handle; + uint32_t offset; +}; + +#define __gen_address_type struct address +#define __gen_user_data ::gen_mi_builder_test + +uint64_t __gen_combine_address(gen_mi_builder_test *test, void *location, + struct address addr, uint32_t delta); +void * __gen_get_batch_dwords(gen_mi_builder_test *test, unsigned num_dwords); + +struct address +__gen_address_offset(address addr, uint64_t offset) +{ + addr.offset += offset; + return addr; +} + +#if GEN_GEN >= 8 || GEN_IS_HASWELL +#define RSVD_TEMP_REG 0x2678 /* MI_ALU_REG15 */ +#else +#define RSVD_TEMP_REG 0x2430 /* GEN7_3DPRIM_START_VERTEX */ +#endif +#define GEN_MI_BUILDER_NUM_ALLOC_GPRS 15 +#define INPUT_DATA_OFFSET 0 +#define OUTPUT_DATA_OFFSET 2048 + +#include "genxml/genX_pack.h" +#include "gen_mi_builder.h" + +#include <vector> + +class gen_mi_builder_test : public ::testing::Test { +public: + gen_mi_builder_test(); + ~gen_mi_builder_test(); + + void SetUp(); + + void *emit_dwords(int num_dwords); + void submit_batch(); + + inline address in_addr(uint32_t offset) + { + address addr; + addr.gem_handle = data_bo_handle; + addr.offset = INPUT_DATA_OFFSET + offset; + return addr; + } + + inline address out_addr(uint32_t offset) + { + address addr; + addr.gem_handle = data_bo_handle; + addr.offset = OUTPUT_DATA_OFFSET + offset; + return addr; + } + + inline gen_mi_value in_mem64(uint32_t offset) + { + return gen_mi_mem64(in_addr(offset)); + } + + inline gen_mi_value in_mem32(uint32_t offset) + { + return gen_mi_mem32(in_addr(offset)); + } + + inline gen_mi_value out_mem64(uint32_t offset) + { + return gen_mi_mem64(out_addr(offset)); + } + + inline gen_mi_value out_mem32(uint32_t offset) + { + return gen_mi_mem32(out_addr(offset)); + } + + int fd; + gen_device_info devinfo; + + uint32_t batch_bo_handle; + uint32_t batch_offset; + void *batch_map; + + std::vector<drm_i915_gem_relocation_entry> relocs; + + uint32_t data_bo_handle; + void *data_map; + char *input; + char *output; + uint64_t canary; + + gen_mi_builder b; +}; + +gen_mi_builder_test::gen_mi_builder_test() : + fd(-1) +{ } + +gen_mi_builder_test::~gen_mi_builder_test() +{ + close(fd); +} + +// 1 MB of batch should be enough for anyone, right? +#define BATCH_BO_SIZE (256 * 4096) +#define DATA_BO_SIZE 4096 + +void +gen_mi_builder_test::SetUp() +{ + drmDevicePtr devices[8]; + int max_devices = drmGetDevices2(0, devices, 8); + + int i; + for (i = 0; i < max_devices; i++) { + if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER && + devices[i]->bustype == DRM_BUS_PCI && + devices[i]->deviceinfo.pci->vendor_id == 0x8086) { + fd = open(devices[i]->nodes[DRM_NODE_RENDER], O_RDWR | O_CLOEXEC); + if (fd < 0) + continue; + + /* We don't really need to do this when running on hardware because + * we can just pull it from the drmDevice. However, without doing + * this, intel_dump_gpu gets a bit of heartburn and we can't use the + * --device option with it. + */ + int device_id; + drm_i915_getparam getparam = drm_i915_getparam(); + getparam.param = I915_PARAM_CHIPSET_ID; + getparam.value = &device_id; + ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, + (void *)&getparam), 0) << strerror(errno); + + ASSERT_TRUE(gen_get_device_info(device_id, &devinfo)); + if (devinfo.gen != GEN_GEN || devinfo.is_haswell != GEN_IS_HASWELL) { + close(fd); + fd = -1; + continue; + } + + + /* Found a device! */ + break; + } + } + ASSERT_TRUE(i < max_devices) << "Failed to find a DRM device"; + + // Create the batch buffer + drm_i915_gem_create gem_create = drm_i915_gem_create(); + gem_create.size = BATCH_BO_SIZE; + ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE, + (void *)&gem_create), 0) << strerror(errno); + batch_bo_handle = gem_create.handle; + + drm_i915_gem_caching gem_caching = drm_i915_gem_caching(); + gem_caching.handle = batch_bo_handle; + gem_caching.caching = I915_CACHING_CACHED; + ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_SET_CACHING, + (void *)&gem_caching), 0) << strerror(errno); + + drm_i915_gem_mmap gem_mmap = drm_i915_gem_mmap(); + gem_mmap.handle = batch_bo_handle; + gem_mmap.offset = 0; + gem_mmap.size = BATCH_BO_SIZE; + gem_mmap.flags = 0; + ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_MMAP, + (void *)&gem_mmap), 0) << strerror(errno); + batch_map = (void *)(uintptr_t)gem_mmap.addr_ptr; + + // Start the batch at zero + batch_offset = 0; + + // Create the data buffer + gem_create = drm_i915_gem_create(); + gem_create.size = DATA_BO_SIZE; + ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE, + (void *)&gem_create), 0) << strerror(errno); + data_bo_handle = gem_create.handle; + + gem_caching = drm_i915_gem_caching(); + gem_caching.handle = data_bo_handle; + gem_caching.caching = I915_CACHING_CACHED; + ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_SET_CACHING, + (void *)&gem_caching), 0) << strerror(errno); + + gem_mmap = drm_i915_gem_mmap(); + gem_mmap.handle = data_bo_handle; + gem_mmap.offset = 0; + gem_mmap.size = DATA_BO_SIZE; + gem_mmap.flags = 0; + ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_MMAP, + (void *)&gem_mmap), 0) << strerror(errno); + data_map = (void *)(uintptr_t)gem_mmap.addr_ptr; + input = (char *)data_map + INPUT_DATA_OFFSET; + output = (char *)data_map + OUTPUT_DATA_OFFSET; + + // Fill the test data with garbage + memset(data_map, 139, DATA_BO_SIZE); + memset(&canary, 139, sizeof(canary)); + + gen_mi_builder_init(&b, this); +} + +void * +gen_mi_builder_test::emit_dwords(int num_dwords) +{ + void *ptr = (void *)((char *)batch_map + batch_offset); + batch_offset += num_dwords * 4; + assert(batch_offset < BATCH_BO_SIZE); + return ptr; +} + +void +gen_mi_builder_test::submit_batch() +{ + gen_mi_builder_emit(&b, GENX(MI_BATCH_BUFFER_END), bbe); + + // Round batch up to an even number of dwords. + if (batch_offset & 4) + gen_mi_builder_emit(&b, GENX(MI_NOOP), noop); + + drm_i915_gem_exec_object2 objects[2]; + memset(objects, 0, sizeof(objects)); + + objects[0].handle = data_bo_handle; + objects[0].relocation_count = 0; + objects[0].relocs_ptr = 0; + objects[0].flags = EXEC_OBJECT_WRITE; + objects[0].offset = -1; + if (GEN_GEN >= 8) + objects[0].flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS; + + objects[1].handle = batch_bo_handle; + objects[1].relocation_count = relocs.size(); + objects[1].relocs_ptr = (uintptr_t)(void *)&relocs[0]; + objects[1].flags = 0; + objects[1].offset = -1; + if (GEN_GEN >= 8) + objects[1].flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS; + + drm_i915_gem_execbuffer2 execbuf = drm_i915_gem_execbuffer2(); + execbuf.buffers_ptr = (uintptr_t)(void *)objects; + execbuf.buffer_count = 2; + execbuf.batch_start_offset = 0; + execbuf.batch_len = batch_offset; + execbuf.flags = I915_EXEC_HANDLE_LUT | I915_EXEC_RENDER; + + ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, + (void *)&execbuf), 0) << strerror(errno); + + drm_i915_gem_wait gem_wait = drm_i915_gem_wait(); + gem_wait.bo_handle = batch_bo_handle; + gem_wait.timeout_ns = INT64_MAX; + ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_WAIT, + (void *)&gem_wait), 0) << strerror(errno); +} + +uint64_t +__gen_combine_address(gen_mi_builder_test *test, void *location, + address addr, uint32_t delta) +{ + drm_i915_gem_relocation_entry reloc = drm_i915_gem_relocation_entry(); + reloc.target_handle = addr.gem_handle == test->data_bo_handle ? 0 : 1; + reloc.delta = addr.offset + delta; + reloc.offset = (char *)location - (char *)test->batch_map; + reloc.presumed_offset = -1; + test->relocs.push_back(reloc); + + return reloc.delta; +} + +void * +__gen_get_batch_dwords(gen_mi_builder_test *test, unsigned num_dwords) +{ + return test->emit_dwords(num_dwords); +} + +#include "genxml/genX_pack.h" +#include "gen_mi_builder.h" + +TEST_F(gen_mi_builder_test, imm_mem) +{ + const uint64_t value = 0x0123456789abcdef; + + gen_mi_store(&b, out_mem64(0), gen_mi_imm(value)); + gen_mi_store(&b, out_mem32(8), gen_mi_imm(value)); + + submit_batch(); + + // 64 -> 64 + EXPECT_EQ(*(uint64_t *)(output + 0), value); + + // 64 -> 32 + EXPECT_EQ(*(uint32_t *)(output + 8), (uint32_t)value); + EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary); +} + +TEST_F(gen_mi_builder_test, mem_mem) +{ + const uint64_t value = 0x0123456789abcdef; + *(uint64_t *)input = value; + + gen_mi_store(&b, out_mem64(0), in_mem64(0)); + gen_mi_store(&b, out_mem32(8), in_mem64(0)); + gen_mi_store(&b, out_mem32(16), in_mem32(0)); + gen_mi_store(&b, out_mem64(24), in_mem32(0)); + + submit_batch(); + + // 64 -> 64 + EXPECT_EQ(*(uint64_t *)(output + 0), value); + + // 64 -> 32 + EXPECT_EQ(*(uint32_t *)(output + 8), (uint32_t)value); + EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary); + + // 32 -> 32 + EXPECT_EQ(*(uint32_t *)(output + 16), (uint32_t)value); + EXPECT_EQ(*(uint32_t *)(output + 20), (uint32_t)canary); + + // 32 -> 64 + EXPECT_EQ(*(uint64_t *)(output + 24), (uint64_t)(uint32_t)value); +} + +TEST_F(gen_mi_builder_test, imm_reg) +{ + const uint64_t value = 0x0123456789abcdef; + + gen_mi_store(&b, gen_mi_reg64(RSVD_TEMP_REG), gen_mi_imm(canary)); + gen_mi_store(&b, gen_mi_reg64(RSVD_TEMP_REG), gen_mi_imm(value)); + gen_mi_store(&b, out_mem64(0), gen_mi_reg64(RSVD_TEMP_REG)); + + gen_mi_store(&b, gen_mi_reg64(RSVD_TEMP_REG), gen_mi_imm(canary)); + gen_mi_store(&b, gen_mi_reg32(RSVD_TEMP_REG), gen_mi_imm(value)); + gen_mi_store(&b, out_mem64(8), gen_mi_reg64(RSVD_TEMP_REG)); + + submit_batch(); + + // 64 -> 64 + EXPECT_EQ(*(uint64_t *)(output + 0), value); + + // 64 -> 32 + EXPECT_EQ(*(uint32_t *)(output + 8), (uint32_t)value); + EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary); +} + +TEST_F(gen_mi_builder_test, mem_reg) +{ + const uint64_t value = 0x0123456789abcdef; + *(uint64_t *)input = value; + + gen_mi_store(&b, gen_mi_reg64(RSVD_TEMP_REG), gen_mi_imm(canary)); + gen_mi_store(&b, gen_mi_reg64(RSVD_TEMP_REG), in_mem64(0)); + gen_mi_store(&b, out_mem64(0), gen_mi_reg64(RSVD_TEMP_REG)); + + gen_mi_store(&b, gen_mi_reg64(RSVD_TEMP_REG), gen_mi_imm(canary)); + gen_mi_store(&b, gen_mi_reg32(RSVD_TEMP_REG), in_mem64(0)); + gen_mi_store(&b, out_mem64(8), gen_mi_reg64(RSVD_TEMP_REG)); + + gen_mi_store(&b, gen_mi_reg64(RSVD_TEMP_REG), gen_mi_imm(canary)); + gen_mi_store(&b, gen_mi_reg32(RSVD_TEMP_REG), in_mem32(0)); + gen_mi_store(&b, out_mem64(16), gen_mi_reg64(RSVD_TEMP_REG)); + + gen_mi_store(&b, gen_mi_reg64(RSVD_TEMP_REG), gen_mi_imm(canary)); + gen_mi_store(&b, gen_mi_reg64(RSVD_TEMP_REG), in_mem32(0)); + gen_mi_store(&b, out_mem64(24), gen_mi_reg64(RSVD_TEMP_REG)); + + submit_batch(); + + // 64 -> 64 + EXPECT_EQ(*(uint64_t *)(output + 0), value); + + // 64 -> 32 + EXPECT_EQ(*(uint32_t *)(output + 8), (uint32_t)value); + EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary); + + // 32 -> 32 + EXPECT_EQ(*(uint32_t *)(output + 16), (uint32_t)value); + EXPECT_EQ(*(uint32_t *)(output + 20), (uint32_t)canary); + + // 32 -> 64 + EXPECT_EQ(*(uint64_t *)(output + 24), (uint64_t)(uint32_t)value); +} + +/* Start of MI_MATH section */ +#if GEN_GEN >= 8 || GEN_IS_HASWELL + +/* Test adding of immediates of all kinds including + * + * - All zeroes + * - All ones + * - inverted constants + */ +TEST_F(gen_mi_builder_test, add_imm) +{ + const uint64_t value = 0x0123456789abcdef; + const uint64_t add = 0xdeadbeefac0ffee2; + memcpy(input, &value, sizeof(value)); + + gen_mi_store(&b, out_mem64(0), + gen_mi_iadd(&b, in_mem64(0), gen_mi_imm(0))); + gen_mi_store(&b, out_mem64(8), + gen_mi_iadd(&b, in_mem64(0), gen_mi_imm(-1))); + gen_mi_store(&b, out_mem64(16), + gen_mi_iadd(&b, in_mem64(0), gen_mi_inot(&b, gen_mi_imm(0)))); + gen_mi_store(&b, out_mem64(24), + gen_mi_iadd(&b, in_mem64(0), gen_mi_inot(&b, gen_mi_imm(-1)))); + gen_mi_store(&b, out_mem64(32), + gen_mi_iadd(&b, in_mem64(0), gen_mi_imm(add))); + gen_mi_store(&b, out_mem64(40), + gen_mi_iadd(&b, in_mem64(0), gen_mi_inot(&b, gen_mi_imm(add)))); + gen_mi_store(&b, out_mem64(48), + gen_mi_iadd(&b, gen_mi_imm(0), in_mem64(0))); + gen_mi_store(&b, out_mem64(56), + gen_mi_iadd(&b, gen_mi_imm(-1), in_mem64(0))); + gen_mi_store(&b, out_mem64(64), + gen_mi_iadd(&b, gen_mi_inot(&b, gen_mi_imm(0)), in_mem64(0))); + gen_mi_store(&b, out_mem64(72), + gen_mi_iadd(&b, gen_mi_inot(&b, gen_mi_imm(-1)), in_mem64(0))); + gen_mi_store(&b, out_mem64(80), + gen_mi_iadd(&b, gen_mi_imm(add), in_mem64(0))); + gen_mi_store(&b, out_mem64(88), + gen_mi_iadd(&b, gen_mi_inot(&b, gen_mi_imm(add)), in_mem64(0))); + + // And som add_imm just for good measure + gen_mi_store(&b, out_mem64(96), gen_mi_iadd_imm(&b, in_mem64(0), 0)); + gen_mi_store(&b, out_mem64(104), gen_mi_iadd_imm(&b, in_mem64(0), add)); + + submit_batch(); + + EXPECT_EQ(*(uint64_t *)(output + 0), value); + EXPECT_EQ(*(uint64_t *)(output + 8), value - 1); + EXPECT_EQ(*(uint64_t *)(output + 16), value - 1); + EXPECT_EQ(*(uint64_t *)(output + 24), value); + EXPECT_EQ(*(uint64_t *)(output + 32), value + add); + EXPECT_EQ(*(uint64_t *)(output + 40), value + ~add); + EXPECT_EQ(*(uint64_t *)(output + 48), value); + EXPECT_EQ(*(uint64_t *)(output + 56), value - 1); + EXPECT_EQ(*(uint64_t *)(output + 64), value - 1); + EXPECT_EQ(*(uint64_t *)(output + 72), value); + EXPECT_EQ(*(uint64_t *)(output + 80), value + add); + EXPECT_EQ(*(uint64_t *)(output + 88), value + ~add); + EXPECT_EQ(*(uint64_t *)(output + 96), value); + EXPECT_EQ(*(uint64_t *)(output + 104), value + add); +} + +TEST_F(gen_mi_builder_test, ilt_uge) +{ + uint64_t values[8] = { + 0x0123456789abcdef, + 0xdeadbeefac0ffee2, + (uint64_t)-1, + 1, + 0, + 1049571, + (uint64_t)-240058, + 20204184, + }; + memcpy(input, values, sizeof(values)); + + for (unsigned i = 0; i < ARRAY_SIZE(values); i++) { + for (unsigned j = 0; j < ARRAY_SIZE(values); j++) { + gen_mi_store(&b, out_mem32(i * 64 + j * 8 + 0), + gen_mi_ult(&b, in_mem64(i * 8), in_mem64(j * 8))); + gen_mi_store(&b, out_mem32(i * 64 + j * 8 + 4), + gen_mi_uge(&b, in_mem64(i * 8), in_mem64(j * 8))); + } + } + + submit_batch(); + + for (unsigned i = 0; i < ARRAY_SIZE(values); i++) { + for (unsigned j = 0; j < ARRAY_SIZE(values); j++) { + uint32_t *out_u32 = (uint32_t *)(output + i * 64 + j * 8); + EXPECT_EQ(out_u32[0], values[i] < values[j] ? ~0u : 0u); + EXPECT_EQ(out_u32[1], values[i] >= values[j] ? ~0u : 0u); + } + } +} + +TEST_F(gen_mi_builder_test, iand) +{ + const uint64_t values[2] = { + 0x0123456789abcdef, + 0xdeadbeefac0ffee2, + }; + memcpy(input, values, sizeof(values)); + + gen_mi_store(&b, out_mem64(0), gen_mi_iand(&b, in_mem64(0), in_mem64(8))); + + submit_batch(); + + EXPECT_EQ(*(uint64_t *)output, values[0] & values[1]); +} + +TEST_F(gen_mi_builder_test, imul_imm) +{ + uint64_t lhs[2] = { + 0x0123456789abcdef, + 0xdeadbeefac0ffee2, + }; + memcpy(input, lhs, sizeof(lhs)); + + /* Some random 32-bit unsigned integers. The first four have been + * hand-chosen just to ensure some good low integers; the rest were + * generated with a python script. + */ + uint32_t rhs[20] = { + 1, 2, 3, 5, + 10800, 193, 64, 40, + 3796, 256, 88, 473, + 1421, 706, 175, 850, + 39, 38985, 1941, 17, + }; + + for (unsigned i = 0; i < ARRAY_SIZE(lhs); i++) { + for (unsigned j = 0; j < ARRAY_SIZE(rhs); j++) { + gen_mi_store(&b, out_mem64(i * 160 + j * 8), + gen_mi_imul_imm(&b, in_mem64(i * 8), rhs[j])); + } + } + + submit_batch(); + + for (unsigned i = 0; i < ARRAY_SIZE(lhs); i++) { + for (unsigned j = 0; j < ARRAY_SIZE(rhs); j++) { + EXPECT_EQ(*(uint64_t *)(output + i * 160 + j * 8), lhs[i] * rhs[j]); + } + } +} + +TEST_F(gen_mi_builder_test, ishl_imm) +{ + const uint64_t value = 0x0123456789abcdef; + memcpy(input, &value, sizeof(value)); + + const unsigned max_shift = 64; + + for (unsigned i = 0; i <= max_shift; i++) + gen_mi_store(&b, out_mem64(i * 8), gen_mi_ishl_imm(&b, in_mem64(0), i)); + + submit_batch(); + + for (unsigned i = 0; i <= max_shift; i++) { + if (i >= 64) { + EXPECT_EQ(*(uint64_t *)(output + i * 8), 0); + } else { + EXPECT_EQ(*(uint64_t *)(output + i * 8), value << i); + } + } +} + +TEST_F(gen_mi_builder_test, ushr32_imm) +{ + const uint64_t value = 0x0123456789abcdef; + memcpy(input, &value, sizeof(value)); + + const unsigned max_shift = 31; + + for (unsigned i = 0; i <= max_shift; i++) + gen_mi_store(&b, out_mem64(i * 8), gen_mi_ushr32_imm(&b, in_mem64(0), i)); + + submit_batch(); + + for (unsigned i = 0; i <= max_shift; i++) + EXPECT_EQ(*(uint64_t *)(output + i * 8), (value >> i) & UINT32_MAX); +} + +TEST_F(gen_mi_builder_test, udiv32_imm) +{ + /* Some random 32-bit unsigned integers. The first four have been + * hand-chosen just to ensure some good low integers; the rest were + * generated with a python script. + */ + uint32_t values[20] = { + 1, 2, 3, 5, + 10800, 193, 64, 40, + 3796, 256, 88, 473, + 1421, 706, 175, 850, + 39, 38985, 1941, 17, + }; + memcpy(input, values, sizeof(values)); + + for (unsigned i = 0; i < ARRAY_SIZE(values); i++) { + for (unsigned j = 0; j < ARRAY_SIZE(values); j++) { + gen_mi_store(&b, out_mem32(i * 80 + j * 4), + gen_mi_udiv32_imm(&b, in_mem32(i * 4), values[j])); + } + } + + submit_batch(); + + for (unsigned i = 0; i < ARRAY_SIZE(values); i++) { + for (unsigned j = 0; j < ARRAY_SIZE(values); j++) { + EXPECT_EQ(*(uint32_t *)(output + i * 80 + j * 4), + values[i] / values[j]); + } + } +} + +#endif /* GEN_GEN >= 8 || GEN_IS_HASWELL */ |