diff options
Diffstat (limited to 'src/gallium/drivers')
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c | 222 |
1 files changed, 78 insertions, 144 deletions
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c index a54db9e8596..1c653839aea 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c @@ -1008,32 +1008,16 @@ static void atomic_emit( static void set_tex_fetch_args(struct si_shader_context *ctx, struct lp_build_emit_data *emit_data, - unsigned target, - LLVMValueRef res_ptr, LLVMValueRef samp_ptr, - LLVMValueRef *param, unsigned count, - unsigned dmask) + struct ac_image_args *args, + unsigned target) { - struct ac_image_args args = {}; - - /* Pad to power of two vector */ - while (count < util_next_power_of_two(count)) - param[count++] = LLVMGetUndef(ctx->i32); - - if (count > 1) - args.addr = lp_build_gather_values(&ctx->gallivm, param, count); - else - args.addr = param[0]; - - args.dim = ac_texture_dim_from_tgsi_target(ctx->screen, target); - args.resource = res_ptr; - args.sampler = samp_ptr; - args.dmask = dmask; - args.unorm = target == TGSI_TEXTURE_RECT || - target == TGSI_TEXTURE_SHADOWRECT; + args->dim = ac_texture_dim_from_tgsi_target(ctx->screen, target); + args->unorm = target == TGSI_TEXTURE_RECT || + target == TGSI_TEXTURE_SHADOWRECT; /* Ugly, but we seem to have no other choice right now. */ - STATIC_ASSERT(sizeof(args) <= sizeof(emit_data->args)); - memcpy(emit_data->args, &args, sizeof(args)); + STATIC_ASSERT(sizeof(*args) <= sizeof(emit_data->args)); + memcpy(emit_data->args, args, sizeof(*args)); } static LLVMValueRef fix_resinfo(struct si_shader_context *ctx, @@ -1083,7 +1067,7 @@ static void resq_fetch_args( &emit_data->args[0]); emit_data->arg_count = 1; } else { - LLVMValueRef res_ptr; + struct ac_image_args args = {}; unsigned image_target; if (inst->Memory.Texture == TGSI_TEXTURE_3D) @@ -1092,10 +1076,10 @@ static void resq_fetch_args( image_target = inst->Memory.Texture; image_fetch_rsrc(bld_base, reg, false, inst->Memory.Texture, - &res_ptr); - set_tex_fetch_args(ctx, emit_data, image_target, - res_ptr, NULL, &ctx->i32_0, 1, - 0xf); + &args.resource); + args.lod = ctx->i32_0; + args.dmask = 0xf; + set_tex_fetch_args(ctx, emit_data, &args, image_target); } } @@ -1262,22 +1246,21 @@ static void txq_fetch_args( struct si_shader_context *ctx = si_shader_context(bld_base); const struct tgsi_full_instruction *inst = emit_data->inst; unsigned target = inst->Texture.Texture; - LLVMValueRef res_ptr; - LLVMValueRef address; + struct ac_image_args args = {}; - tex_fetch_ptrs(bld_base, emit_data, &res_ptr, NULL, NULL); + tex_fetch_ptrs(bld_base, emit_data, &args.resource, NULL, NULL); if (target == TGSI_TEXTURE_BUFFER) { /* Read the size from the buffer descriptor directly. */ - emit_data->args[0] = get_buffer_size(bld_base, res_ptr); + emit_data->args[0] = get_buffer_size(bld_base, args.resource); return; } /* Textures - set the mip level. */ - address = lp_build_emit_fetch(bld_base, inst, 0, TGSI_CHAN_X); + args.lod = lp_build_emit_fetch(bld_base, inst, 0, TGSI_CHAN_X); + args.dmask = 0xf; - set_tex_fetch_args(ctx, emit_data, target, res_ptr, - NULL, &address, 1, 0xf); + set_tex_fetch_args(ctx, emit_data, &args, target); } static void txq_emit(const struct lp_build_tgsi_action *action, @@ -1310,22 +1293,17 @@ static void tex_fetch_args( const struct tgsi_full_instruction *inst = emit_data->inst; unsigned opcode = inst->Instruction.Opcode; unsigned target = inst->Texture.Texture; - LLVMValueRef coords[5], derivs[6]; - LLVMValueRef address[16]; - unsigned num_coords = tgsi_util_get_texture_coord_dim(target); + struct ac_image_args args = {}; int ref_pos = tgsi_util_get_shadow_ref_src_index(target); - unsigned count = 0; unsigned chan; - unsigned num_deriv_channels = 0; bool has_offset = inst->Texture.NumOffsets > 0; - LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL; - unsigned dmask = 0xf; + LLVMValueRef fmask_ptr = NULL; - tex_fetch_ptrs(bld_base, emit_data, &res_ptr, &samp_ptr, &fmask_ptr); + tex_fetch_ptrs(bld_base, emit_data, &args.resource, &args.sampler, &fmask_ptr); if (target == TGSI_TEXTURE_BUFFER) { emit_data->dst_type = ctx->v4f32; - emit_data->args[0] = res_ptr; + emit_data->args[0] = args.resource; emit_data->args[1] = ctx->i32_0; emit_data->args[2] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_X); emit_data->arg_count = 3; @@ -1333,20 +1311,19 @@ static void tex_fetch_args( } /* Fetch and project texture coordinates */ - coords[3] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W); + args.coords[3] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W); for (chan = 0; chan < 3; chan++) { - coords[chan] = lp_build_emit_fetch(bld_base, + args.coords[chan] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, chan); if (opcode == TGSI_OPCODE_TXP) - coords[chan] = lp_build_emit_llvm_binary(bld_base, - TGSI_OPCODE_DIV, - coords[chan], - coords[3]); + args.coords[chan] = lp_build_emit_llvm_binary( + bld_base, TGSI_OPCODE_DIV, + args.coords[chan], args.coords[3]); } if (opcode == TGSI_OPCODE_TXP) - coords[3] = ctx->ac.f32_1; + args.coords[3] = ctx->ac.f32_1; /* Pack offsets. */ if (has_offset && @@ -1371,14 +1348,14 @@ static void tex_fetch_args( pack = LLVMBuildOr(ctx->ac.builder, offset[0], offset[1], ""); pack = LLVMBuildOr(ctx->ac.builder, pack, offset[2], ""); - address[count++] = pack; + args.offset = pack; } /* Pack LOD bias value */ if (opcode == TGSI_OPCODE_TXB) - address[count++] = coords[3]; + args.bias = args.coords[3]; if (opcode == TGSI_OPCODE_TXB2) - address[count++] = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X); + args.bias = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X); /* Pack depth comparison value */ if (tgsi_is_shadow_target(target) && opcode != TGSI_OPCODE_LODQ) { @@ -1388,7 +1365,7 @@ static void tex_fetch_args( z = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X); } else { assert(ref_pos >= 0); - z = coords[ref_pos]; + z = args.coords[ref_pos]; } /* Section 8.23.1 (Depth Texture Comparison Mode) of the @@ -1405,7 +1382,7 @@ static void tex_fetch_args( if (ctx->screen->info.chip_class >= VI) { LLVMValueRef upgraded; LLVMValueRef clamped; - upgraded = LLVMBuildExtractElement(ctx->ac.builder, samp_ptr, + upgraded = LLVMBuildExtractElement(ctx->ac.builder, args.sampler, LLVMConstInt(ctx->i32, 3, false), ""); upgraded = LLVMBuildLShr(ctx->ac.builder, upgraded, LLVMConstInt(ctx->i32, 29, false), ""); @@ -1414,7 +1391,7 @@ static void tex_fetch_args( z = LLVMBuildSelect(ctx->ac.builder, upgraded, clamped, z, ""); } - address[count++] = z; + args.compare = z; } /* Pack user derivatives */ @@ -1425,7 +1402,6 @@ static void tex_fetch_args( case TGSI_TEXTURE_3D: num_src_deriv_channels = 3; num_dst_deriv_channels = 3; - num_deriv_channels = 3; break; case TGSI_TEXTURE_2D: case TGSI_TEXTURE_SHADOW2D: @@ -1435,7 +1411,6 @@ static void tex_fetch_args( case TGSI_TEXTURE_SHADOW2D_ARRAY: num_src_deriv_channels = 2; num_dst_deriv_channels = 2; - num_deriv_channels = 2; break; case TGSI_TEXTURE_CUBE: case TGSI_TEXTURE_SHADOWCUBE: @@ -1444,7 +1419,6 @@ static void tex_fetch_args( /* Cube derivatives will be converted to 2D. */ num_src_deriv_channels = 3; num_dst_deriv_channels = 3; - num_deriv_channels = 2; break; case TGSI_TEXTURE_1D: case TGSI_TEXTURE_SHADOW1D: @@ -1455,10 +1429,8 @@ static void tex_fetch_args( /* 1D textures are allocated and used as 2D on GFX9. */ if (ctx->screen->info.chip_class >= GFX9) { num_dst_deriv_channels = 2; - num_deriv_channels = 2; } else { num_dst_deriv_channels = 1; - num_deriv_channels = 1; } break; default: @@ -1467,13 +1439,13 @@ static void tex_fetch_args( for (param = 0; param < 2; param++) { for (chan = 0; chan < num_src_deriv_channels; chan++) - derivs[param * num_dst_deriv_channels + chan] = + args.derivs[param * num_dst_deriv_channels + chan] = lp_build_emit_fetch(bld_base, inst, param+1, chan); /* Fill in the rest with zeros. */ for (chan = num_src_deriv_channels; chan < num_dst_deriv_channels; chan++) - derivs[param * num_dst_deriv_channels + chan] = + args.derivs[param * num_dst_deriv_channels + chan] = ctx->ac.f32_0; } } @@ -1487,28 +1459,17 @@ static void tex_fetch_args( target == TGSI_TEXTURE_CUBE_ARRAY || target == TGSI_TEXTURE_SHADOWCUBE_ARRAY, opcode == TGSI_OPCODE_LODQ, - coords, derivs); + args.coords, args.derivs); } else if (tgsi_is_array_sampler(target) && opcode != TGSI_OPCODE_TXF && opcode != TGSI_OPCODE_TXF_LZ && ctx->screen->info.chip_class <= VI) { unsigned array_coord = target == TGSI_TEXTURE_1D_ARRAY ? 1 : 2; - coords[array_coord] = + args.coords[array_coord] = ac_build_intrinsic(&ctx->ac, "llvm.rint.f32", ctx->f32, - &coords[array_coord], 1, 0); + &args.coords[array_coord], 1, 0); } - if (opcode == TGSI_OPCODE_TXD) - for (int i = 0; i < num_deriv_channels * 2; i++) - address[count++] = derivs[i]; - - /* Pack texture coordinates */ - address[count++] = coords[0]; - if (num_coords > 1) - address[count++] = coords[1]; - if (num_coords > 2) - address[count++] = coords[2]; - /* 1D textures are allocated and used as 2D on GFX9. */ if (ctx->screen->info.chip_class >= GFX9) { LLVMValueRef filler; @@ -1522,32 +1483,31 @@ static void tex_fetch_args( if (target == TGSI_TEXTURE_1D || target == TGSI_TEXTURE_SHADOW1D) { - address[count++] = filler; + args.coords[1] = filler; } else if (target == TGSI_TEXTURE_1D_ARRAY || target == TGSI_TEXTURE_SHADOW1D_ARRAY) { - address[count] = address[count - 1]; - address[count - 1] = filler; - count++; + args.coords[2] = args.coords[1]; + args.coords[1] = filler; } } /* Pack LOD or sample index */ - if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXF) - address[count++] = coords[3]; + if (opcode == TGSI_OPCODE_TXL) + args.lod = args.coords[3]; else if (opcode == TGSI_OPCODE_TXL2) - address[count++] = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X); - - if (count > 16) { - assert(!"Cannot handle more than 16 texture address parameters"); - count = 16; + args.lod = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X); + else if (opcode == TGSI_OPCODE_TXF) { + if (target == TGSI_TEXTURE_2D_MSAA) { + /* No LOD, but move sample index into the right place. */ + args.coords[2] = args.coords[3]; + } else if (target != TGSI_TEXTURE_2D_ARRAY_MSAA) { + args.lod = args.coords[3]; + } } - for (chan = 0; chan < count; chan++) - address[chan] = ac_to_integer(&ctx->ac, address[chan]); - if (target == TGSI_TEXTURE_2D_MSAA || target == TGSI_TEXTURE_2D_ARRAY_MSAA) { - ac_apply_fmask_to_sample(&ctx->ac, fmask_ptr, address, + ac_apply_fmask_to_sample(&ctx->ac, fmask_ptr, args.coords, target == TGSI_TEXTURE_2D_ARRAY_MSAA); } @@ -1562,7 +1522,7 @@ static void tex_fetch_args( switch (target) { case TGSI_TEXTURE_3D: - address[2] = lp_build_add(uint_bld, address[2], + args.coords[2] = lp_build_add(uint_bld, args.coords[2], ctx->imms[off->Index * TGSI_NUM_CHANNELS + off->SwizzleZ]); /* fall through */ case TGSI_TEXTURE_2D: @@ -1571,16 +1531,16 @@ static void tex_fetch_args( case TGSI_TEXTURE_SHADOWRECT: case TGSI_TEXTURE_2D_ARRAY: case TGSI_TEXTURE_SHADOW2D_ARRAY: - address[1] = - lp_build_add(uint_bld, address[1], + args.coords[1] = + lp_build_add(uint_bld, args.coords[1], ctx->imms[off->Index * TGSI_NUM_CHANNELS + off->SwizzleY]); /* fall through */ case TGSI_TEXTURE_1D: case TGSI_TEXTURE_SHADOW1D: case TGSI_TEXTURE_1D_ARRAY: case TGSI_TEXTURE_SHADOW1D_ARRAY: - address[0] = - lp_build_add(uint_bld, address[0], + args.coords[0] = + lp_build_add(uint_bld, args.coords[0], ctx->imms[off->Index * TGSI_NUM_CHANNELS + off->SwizzleX]); break; /* texture offsets do not apply to other texture targets */ @@ -1588,6 +1548,8 @@ static void tex_fetch_args( } } + args.dmask = 0xf; + if (opcode == TGSI_OPCODE_TG4) { unsigned gather_comp = 0; @@ -1611,11 +1573,10 @@ static void tex_fetch_args( gather_comp = CLAMP(gather_comp, 0, 3); } - dmask = 1 << gather_comp; + args.dmask = 1 << gather_comp; } - set_tex_fetch_args(ctx, emit_data, target, res_ptr, - samp_ptr, address, count, dmask); + set_tex_fetch_args(ctx, emit_data, &args, target); } /* Gather4 should follow the same rules as bilinear filtering, but the hardware @@ -1641,14 +1602,7 @@ si_lower_gather4_integer(struct si_shader_context *ctx, { LLVMBuilderRef builder = ctx->ac.builder; LLVMValueRef wa_8888 = NULL; - LLVMValueRef coord = args->addr; LLVMValueRef half_texel[2]; - /* Texture coordinates start after: - * {offset, bias, z-compare, derivatives} - * Only the offset and z-compare can occur here. - */ - unsigned coord_vgpr_index = (int)args->offset + (int)args->compare; - int c; assert(return_type == TGSI_RETURN_TYPE_SINT || return_type == TGSI_RETURN_TYPE_UINT); @@ -1691,6 +1645,7 @@ si_lower_gather4_integer(struct si_shader_context *ctx, half_texel[0] = half_texel[1] = LLVMConstReal(ctx->f32, -0.5); } else { struct tgsi_full_instruction txq_inst = {}; + struct ac_image_args txq_args = {}; struct lp_build_emit_data txq_emit_data = {}; struct lp_build_if_state if_ctx; @@ -1703,13 +1658,15 @@ si_lower_gather4_integer(struct si_shader_context *ctx, txq_inst.Texture.Texture = target; txq_emit_data.inst = &txq_inst; txq_emit_data.dst_type = ctx->v4i32; - set_tex_fetch_args(ctx, &txq_emit_data, target, - args->resource, NULL, &ctx->i32_0, - 1, 0xf); + txq_args.resource = args->resource; + txq_args.sampler = args->sampler; + txq_args.lod = ctx->ac.i32_0; + txq_args.dmask = 0xf; + set_tex_fetch_args(ctx, &txq_emit_data, &txq_args, target); txq_emit(NULL, &ctx->bld_base, &txq_emit_data); /* Compute -0.5 / size. */ - for (c = 0; c < 2; c++) { + for (unsigned c = 0; c < 2; c++) { half_texel[c] = LLVMBuildExtractElement(builder, txq_emit_data.output[0], LLVMConstInt(ctx->i32, c, 0), ""); @@ -1726,7 +1683,7 @@ si_lower_gather4_integer(struct si_shader_context *ctx, LLVMBasicBlockRef bb[2] = { if_ctx.true_block, if_ctx.entry_block }; - for (c = 0; c < 2; c++) { + for (unsigned c = 0; c < 2; c++) { LLVMValueRef values[2] = { half_texel[c], ctx->ac.f32_0 }; half_texel[c] = ac_build_phi(&ctx->ac, ctx->f32, 2, values, bb); @@ -1734,19 +1691,13 @@ si_lower_gather4_integer(struct si_shader_context *ctx, } } - for (c = 0; c < 2; c++) { + for (unsigned c = 0; c < 2; c++) { LLVMValueRef tmp; - LLVMValueRef index = LLVMConstInt(ctx->i32, coord_vgpr_index + c, 0); - - tmp = LLVMBuildExtractElement(builder, coord, index, ""); - tmp = ac_to_float(&ctx->ac, tmp); + tmp = ac_to_float(&ctx->ac, args->coords[c]); tmp = LLVMBuildFAdd(builder, tmp, half_texel[c], ""); - tmp = ac_to_integer(&ctx->ac, tmp); - coord = LLVMBuildInsertElement(builder, coord, tmp, index, ""); + args->coords[c] = ac_to_integer(&ctx->ac, tmp); } - args->addr = coord; - return wa_8888; } @@ -1811,8 +1762,6 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action, memcpy(&args, emit_data->args, sizeof(args)); /* ugly */ args.opcode = ac_image_sample; - args.compare = tgsi_is_shadow_target(target); - args.offset = inst->Texture.NumOffsets > 0; switch (opcode) { case TGSI_OPCODE_TXF: @@ -1821,13 +1770,9 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action, target == TGSI_TEXTURE_2D_MSAA || target == TGSI_TEXTURE_2D_ARRAY_MSAA ? ac_image_load : ac_image_load_mip; - args.compare = false; - args.offset = false; break; case TGSI_OPCODE_LODQ: args.opcode = ac_image_get_lod; - args.compare = false; - args.offset = false; break; case TGSI_OPCODE_TEX: case TGSI_OPCODE_TEX2: @@ -1841,14 +1786,11 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action, case TGSI_OPCODE_TXB: case TGSI_OPCODE_TXB2: assert(ctx->type == PIPE_SHADER_FRAGMENT); - args.bias = true; break; case TGSI_OPCODE_TXL: case TGSI_OPCODE_TXL2: - args.lod = true; break; case TGSI_OPCODE_TXD: - args.deriv = true; break; case TGSI_OPCODE_TG4: args.opcode = ac_image_gather4; @@ -1897,7 +1839,6 @@ static void si_llvm_emit_txqs( tex_fetch_ptrs(bld_base, emit_data, &res_ptr, &samp_ptr, &fmask_ptr); - /* Read the samples from the descriptor directly. */ res = LLVMBuildBitCast(ctx->ac.builder, res_ptr, ctx->v8i32, ""); samples = LLVMBuildExtractElement(ctx->ac.builder, res, @@ -1932,36 +1873,29 @@ static void si_llvm_emit_fbfetch(const struct lp_build_tgsi_action *action, image = ac_build_load_to_sgpr(&ctx->ac, ptr, LLVMConstInt(ctx->i32, SI_PS_IMAGE_COLORBUF0 / 2, 0)); - LLVMValueRef addr[4]; unsigned chan = 0; - addr[chan++] = si_unpack_param(ctx, SI_PARAM_POS_FIXED_PT, 0, 16); + args.coords[chan++] = si_unpack_param(ctx, SI_PARAM_POS_FIXED_PT, 0, 16); if (!ctx->shader->key.mono.u.ps.fbfetch_is_1D) - addr[chan++] = si_unpack_param(ctx, SI_PARAM_POS_FIXED_PT, 16, 16); + args.coords[chan++] = si_unpack_param(ctx, SI_PARAM_POS_FIXED_PT, 16, 16); /* Get the current render target layer index. */ if (ctx->shader->key.mono.u.ps.fbfetch_layered) - addr[chan++] = si_unpack_param(ctx, SI_PARAM_ANCILLARY, 16, 11); + args.coords[chan++] = si_unpack_param(ctx, SI_PARAM_ANCILLARY, 16, 11); if (ctx->shader->key.mono.u.ps.fbfetch_msaa) - addr[chan++] = si_get_sample_id(ctx); - - while (chan < 4) - addr[chan++] = LLVMGetUndef(ctx->i32); + args.coords[chan++] = si_get_sample_id(ctx); if (ctx->shader->key.mono.u.ps.fbfetch_msaa) { fmask = ac_build_load_to_sgpr(&ctx->ac, ptr, LLVMConstInt(ctx->i32, SI_PS_IMAGE_COLORBUF0_FMASK / 2, 0)); - ac_apply_fmask_to_sample(&ctx->ac, fmask, addr, false); + ac_apply_fmask_to_sample(&ctx->ac, fmask, args.coords, false); } - addr_vec = ac_build_gather_values(&ctx->ac, addr, ARRAY_SIZE(addr)); - args.opcode = ac_image_load; args.resource = image; - args.addr = addr_vec; args.dmask = 0xf; if (ctx->shader->key.mono.u.ps.fbfetch_msaa) args.dim = ctx->shader->key.mono.u.ps.fbfetch_layered ? |