diff options
author | Kristian H. Kristensen <[email protected]> | 2019-10-22 16:16:35 -0700 |
---|---|---|
committer | Kristian H. Kristensen <[email protected]> | 2019-11-07 16:36:44 -0800 |
commit | fe450ef4cf672f4f66ea1966cc96bc706b864357 (patch) | |
tree | cffc04275701d2eb17900c22d8c47d35de1b4b03 /src | |
parent | 5d67da13a3f9e22bc5490e2e658f46806125fce1 (diff) |
freedreno/ir3: Add load and store intrinsics for global io
These intrinsics take a ivec2 for the 64 bit base address and a
integer offset.
Signed-off-by: Kristian H. Kristensen <[email protected]>
Acked-by: Eric Anholt <[email protected]>
Reviewed-by: Rob Clark <[email protected]>
Diffstat (limited to 'src')
-rw-r--r-- | src/compiler/nir/nir_intrinsics.py | 11 | ||||
-rw-r--r-- | src/freedreno/ir3/ir3_compiler_nir.c | 49 |
2 files changed, 60 insertions, 0 deletions
diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 02c781e7181..9c1418f2a4e 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -800,6 +800,17 @@ store("shared_ir3", 2, [BASE, WRMASK, ALIGN_MUL, ALIGN_OFFSET]) # src[] = { offset }. load("shared_ir3", 1, [BASE, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE]) +# IR3-specific load/store global intrinsics. They take a 64-bit base address +# and a 32-bit offset. The hardware will add the base and the offset, which +# saves us from doing 64-bit math on the base address. + +# src[] = { value, address(vec2 of hi+lo uint32_t), offset }. +# const_index[] = { write_mask, align_mul, align_offset } +intrinsic("store_global_ir3", [0, 2, 1], indices=[WRMASK, ACCESS, ALIGN_MUL, ALIGN_OFFSET]) +# src[] = { address(vec2 of hi+lo uint32_t), offset }. +# const_index[] = { access, align_mul, align_offset } +intrinsic("load_global_ir3", [2, 1], dest_comp=0, indices=[ACCESS, ALIGN_MUL, ALIGN_OFFSET], flags=[CAN_ELIMINATE]) + # Intrinsics used by the Midgard/Bifrost blend pipeline. These are defined # within a blend shader to read/write the raw value from the tile buffer, # without applying any format conversion in the process. If the shader needs diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index e0bf9280975..f7af73f8227 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -1376,6 +1376,55 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr) dst[0] = ctx->primitive_id; break; + case nir_intrinsic_store_global_ir3: { + struct ir3_instruction *value, *addr, *offset; + + addr = ir3_create_collect(ctx, (struct ir3_instruction*[]){ + ir3_get_src(ctx, &intr->src[1])[0], + ir3_get_src(ctx, &intr->src[1])[1] + }, 2); + + offset = ir3_get_src(ctx, &intr->src[2])[0]; + + value = ir3_create_collect(ctx, ir3_get_src(ctx, &intr->src[0]), + intr->num_components); + + struct ir3_instruction *stg = + ir3_STG_G(ctx->block, addr, 0, value, 0, + create_immed(ctx->block, intr->num_components), 0, offset, 0); + stg->cat6.type = TYPE_U32; + stg->cat6.iim_val = 1; + + array_insert(b, b->keeps, stg); + + stg->barrier_class = IR3_BARRIER_BUFFER_W; + stg->barrier_conflict = IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W; + break; + } + + case nir_intrinsic_load_global_ir3: { + struct ir3_instruction *addr, *offset; + + addr = ir3_create_collect(ctx, (struct ir3_instruction*[]){ + ir3_get_src(ctx, &intr->src[0])[0], + ir3_get_src(ctx, &intr->src[0])[1] + }, 2); + + offset = ir3_get_src(ctx, &intr->src[1])[0]; + + struct ir3_instruction *load = + ir3_LDG(b, addr, 0, create_immed(ctx->block, intr->num_components), + 0, offset, 0); + load->cat6.type = TYPE_U32; + load->regs[0]->wrmask = MASK(intr->num_components); + + load->barrier_class = IR3_BARRIER_BUFFER_R; + load->barrier_conflict = IR3_BARRIER_BUFFER_W; + + ir3_split_dest(b, dst, load, 0, intr->num_components); + break; + } + case nir_intrinsic_load_ubo: emit_intrinsic_load_ubo(ctx, intr, dst); break; |