summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorJason Ekstrand <[email protected]>2016-11-28 18:13:02 -0800
committerJason Ekstrand <[email protected]>2016-11-29 07:44:01 -0800
commitfaf20df143a63e58aa729446f21c38ae39a438f2 (patch)
treee110100e593499830d36f4e84d9151056f0fff68 /src
parent05533ce418851b12fd0a1e940a633f9280203aab (diff)
i965/fs: Refactor handling of constant tg4 offsets
Previously, we had an OFFSET_VALUE source for logical texture instructions that was intended to mean exactly what it says, "offset". In reality, we only fully used it for tg4 offsets. We used offset_value.file == IMM to mean, "you have a constant offset, go look in instr->offset" and didn't actually use the contents of the register at all in that case except for in nir_emit_texture where we used it as a temporary before we copy it into instr->offset. This commit renames OFFSET_VALUE to TG4_OFFSET and restricts its usage to indirect tg4 offsets only. The nir_emit_texture code is refactored so that we explicitly build a header_bits value which is placed in instr->offset and the constant offset values (both for tg4 and regular texture operations) are used to construct header_bits and don't go through the offset source at all. Finally, we stop passing offset_value in to lower_sampler_logical_send_gen5 because we can't do indirect offsets until gen7 anyway. Reviewed-by: Kenneth Graunke <[email protected]>
Diffstat (limited to 'src')
-rw-r--r--src/mesa/drivers/dri/i965/brw_defines.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp19
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_nir.cpp42
3 files changed, 29 insertions, 34 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 84a51b48cc6..f22a52f7394 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1420,7 +1420,7 @@ enum tex_logical_srcs {
/** Texture sampler index */
TEX_LOGICAL_SRC_SAMPLER,
/** Texel offset for gathers */
- TEX_LOGICAL_SRC_OFFSET_VALUE,
+ TEX_LOGICAL_SRC_TG4_OFFSET,
/** REQUIRED: Number of coordinate components (as UD immediate) */
TEX_LOGICAL_SRC_COORD_COMPONENTS,
/** REQUIRED: Number of derivative components (as UD immediate) */
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 3cdf0bf39f7..c218f56684d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -730,7 +730,7 @@ fs_inst::components_read(unsigned i) const
opcode == SHADER_OPCODE_TXD_LOGICAL)
return src[TEX_LOGICAL_SRC_GRAD_COMPONENTS].ud;
/* Texture offset. */
- else if (i == TEX_LOGICAL_SRC_OFFSET_VALUE)
+ else if (i == TEX_LOGICAL_SRC_TG4_OFFSET)
return 2;
/* MCS */
else if (i == TEX_LOGICAL_SRC_MCS && opcode == SHADER_OPCODE_TXF_CMS_W_LOGICAL)
@@ -3877,7 +3877,6 @@ lower_sampler_logical_send_gen5(const fs_builder &bld, fs_inst *inst, opcode op,
const fs_reg &sample_index,
const fs_reg &surface,
const fs_reg &sampler,
- const fs_reg &offset_value,
unsigned coord_components,
unsigned grad_components)
{
@@ -3885,7 +3884,7 @@ lower_sampler_logical_send_gen5(const fs_builder &bld, fs_inst *inst, opcode op,
fs_reg msg_coords = message;
unsigned header_size = 0;
- if (offset_value.file != BAD_FILE) {
+ if (inst->offset != 0) {
/* The offsets set up by the visitor are in the m1 header, so we can't
* go headerless.
*/
@@ -3985,7 +3984,7 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op,
const fs_reg &mcs,
const fs_reg &surface,
const fs_reg &sampler,
- const fs_reg &offset_value,
+ const fs_reg &tg4_offset,
unsigned coord_components,
unsigned grad_components)
{
@@ -3997,7 +3996,7 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op,
sources[i] = bld.vgrf(BRW_REGISTER_TYPE_F);
if (op == SHADER_OPCODE_TG4 || op == SHADER_OPCODE_TG4_OFFSET ||
- offset_value.file != BAD_FILE || inst->eot ||
+ inst->offset != 0 || inst->eot ||
op == SHADER_OPCODE_SAMPLEINFO ||
is_high_sampler(devinfo, sampler)) {
/* For general texture offsets (no txf workaround), we need a header to
@@ -4142,7 +4141,7 @@ lower_sampler_logical_send_gen7(const fs_builder &bld, fs_inst *inst, opcode op,
for (unsigned i = 0; i < 2; i++) /* offu, offv */
bld.MOV(retype(sources[length++], BRW_REGISTER_TYPE_D),
- offset(offset_value, bld, i));
+ offset(tg4_offset, bld, i));
if (coord_components == 3) /* r if present */
bld.MOV(sources[length++], offset(coordinate, bld, 2));
@@ -4194,7 +4193,7 @@ lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst, opcode op)
const fs_reg &mcs = inst->src[TEX_LOGICAL_SRC_MCS];
const fs_reg &surface = inst->src[TEX_LOGICAL_SRC_SURFACE];
const fs_reg &sampler = inst->src[TEX_LOGICAL_SRC_SAMPLER];
- const fs_reg &offset_value = inst->src[TEX_LOGICAL_SRC_OFFSET_VALUE];
+ const fs_reg &tg4_offset = inst->src[TEX_LOGICAL_SRC_TG4_OFFSET];
assert(inst->src[TEX_LOGICAL_SRC_COORD_COMPONENTS].file == IMM);
const unsigned coord_components = inst->src[TEX_LOGICAL_SRC_COORD_COMPONENTS].ud;
assert(inst->src[TEX_LOGICAL_SRC_GRAD_COMPONENTS].file == IMM);
@@ -4203,12 +4202,12 @@ lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst, opcode op)
if (devinfo->gen >= 7) {
lower_sampler_logical_send_gen7(bld, inst, op, coordinate,
shadow_c, lod, lod2, sample_index,
- mcs, surface, sampler, offset_value,
+ mcs, surface, sampler, tg4_offset,
coord_components, grad_components);
} else if (devinfo->gen >= 5) {
lower_sampler_logical_send_gen5(bld, inst, op, coordinate,
shadow_c, lod, lod2, sample_index,
- surface, sampler, offset_value,
+ surface, sampler,
coord_components, grad_components);
} else {
lower_sampler_logical_send_gen4(bld, inst, op, coordinate,
@@ -4677,7 +4676,7 @@ get_sampler_lowered_simd_width(const struct gen_device_info *devinfo,
inst->components_read(TEX_LOGICAL_SRC_LOD2) +
inst->components_read(TEX_LOGICAL_SRC_SAMPLE_INDEX) +
(inst->opcode == SHADER_OPCODE_TG4_OFFSET_LOGICAL ?
- inst->components_read(TEX_LOGICAL_SRC_OFFSET_VALUE) : 0) +
+ inst->components_read(TEX_LOGICAL_SRC_TG4_OFFSET) : 0) +
inst->components_read(TEX_LOGICAL_SRC_MCS);
/* SIMD16 messages with more than five arguments exceed the maximum message
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index c88fa77472c..baa973c31a0 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -4431,6 +4431,7 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF)
srcs[TEX_LOGICAL_SRC_LOD] = brw_imm_d(0);
+ uint32_t header_bits = 0;
for (unsigned i = 0; i < instr->num_srcs; i++) {
fs_reg src = get_nir_src(instr->src[i].src);
switch (instr->src[i].src_type) {
@@ -4485,11 +4486,9 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
nir_const_value *const_offset =
nir_src_as_const_value(instr->src[i].src);
if (const_offset) {
- unsigned header_bits = brw_texture_offset(const_offset->i32, 3);
- if (header_bits != 0)
- srcs[TEX_LOGICAL_SRC_OFFSET_VALUE] = brw_imm_ud(header_bits);
+ header_bits |= brw_texture_offset(const_offset->i32, 3);
} else {
- srcs[TEX_LOGICAL_SRC_OFFSET_VALUE] =
+ srcs[TEX_LOGICAL_SRC_TG4_OFFSET] =
retype(src, BRW_REGISTER_TYPE_D);
}
break;
@@ -4607,8 +4606,7 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
opcode = SHADER_OPCODE_LOD_LOGICAL;
break;
case nir_texop_tg4:
- if (srcs[TEX_LOGICAL_SRC_OFFSET_VALUE].file != BAD_FILE &&
- srcs[TEX_LOGICAL_SRC_OFFSET_VALUE].file != IMM)
+ if (srcs[TEX_LOGICAL_SRC_TG4_OFFSET].file != BAD_FILE)
opcode = SHADER_OPCODE_TG4_OFFSET_LOGICAL;
else
opcode = SHADER_OPCODE_TG4_LOGICAL;
@@ -4639,8 +4637,21 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
unreachable("unknown texture opcode");
}
+ if (instr->op == nir_texop_tg4) {
+ if (instr->component == 1 &&
+ key_tex->gather_channel_quirk_mask & (1 << texture)) {
+ /* gather4 sampler is broken for green channel on RG32F --
+ * we must ask for blue instead.
+ */
+ header_bits |= 2 << 16;
+ } else {
+ header_bits |= instr->component << 16;
+ }
+ }
+
fs_reg dst = bld.vgrf(brw_type_for_nir_type(instr->dest_type), 4);
fs_inst *inst = bld.emit(opcode, dst, srcs, ARRAY_SIZE(srcs));
+ inst->offset = header_bits;
const unsigned dest_size = nir_tex_instr_dest_size(instr);
if (devinfo->gen >= 9 &&
@@ -4658,23 +4669,8 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr)
if (srcs[TEX_LOGICAL_SRC_SHADOW_C].file != BAD_FILE)
inst->shadow_compare = true;
- if (srcs[TEX_LOGICAL_SRC_OFFSET_VALUE].file == IMM)
- inst->offset = srcs[TEX_LOGICAL_SRC_OFFSET_VALUE].ud;
-
- if (instr->op == nir_texop_tg4) {
- if (instr->component == 1 &&
- key_tex->gather_channel_quirk_mask & (1 << texture)) {
- /* gather4 sampler is broken for green channel on RG32F --
- * we must ask for blue instead.
- */
- inst->offset |= 2 << 16;
- } else {
- inst->offset |= instr->component << 16;
- }
-
- if (devinfo->gen == 6)
- emit_gen6_gather_wa(key_tex->gen6_gather_wa[texture], dst);
- }
+ if (instr->op == nir_texop_tg4 && devinfo->gen == 6)
+ emit_gen6_gather_wa(key_tex->gen6_gather_wa[texture], dst);
fs_reg nir_dest[4];
for (unsigned i = 0; i < dest_size; i++)