summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFrancisco Jerez <[email protected]>2016-05-19 00:10:03 -0700
committerFrancisco Jerez <[email protected]>2016-05-27 23:29:04 -0700
commit81bc6de8c0f7faafd0f3b0aee944a14ba3ef0b64 (patch)
tree15be42f1cedc34eb105d7d8fb7d0838c5c9a005c
parent41562eb8f33558f02ff8f53b3094a0e6d54e4c49 (diff)
i965/ir: Make BROADCAST emit an unmasked single-channel move.
Alternatively we could have extended the current semantics to 32-wide mode by changing brw_broadcast() to emit multiple indexed MOV instructions in the generator copying the selected value to all destination registers, but it seemed rather silly to waste EU cycles unnecessarily copying the exact same value 32 times in the GRF. The vstride change in the Align16 path is required to avoid assertions in validate_reg() since the change causes the execution size of the MOV and SEL instructions to be equal to the source region width. Reviewed-by: Jason Ekstrand <[email protected]>
-rw-r--r--src/mesa/drivers/dri/i965/brw_defines.h6
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu_emit.c12
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_generator.cpp1
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_generator.cpp1
4 files changed, 17 insertions, 3 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index a5f3b52fa35..51eb856d7b1 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1082,6 +1082,12 @@ enum opcode {
/**
* Pick the channel from its first source register given by the index
* specified as second source. Useful for variable indexing of surfaces.
+ *
+ * Note that because the result of this instruction is by definition
+ * uniform and it can always be splatted to multiple channels using a
+ * scalar regioning mode, only the first channel of the destination region
+ * is guaranteed to be updated, which implies that BROADCAST instructions
+ * should usually be marked force_writemask_all.
*/
SHADER_OPCODE_BROADCAST,
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 2a3b0b03d80..1c2ccb4e163 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -3426,6 +3426,10 @@ brw_broadcast(struct brw_codegen *p,
const bool align1 = brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1;
brw_inst *inst;
+ brw_push_insn_state(p);
+ brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_default_exec_size(p, align1 ? BRW_EXECUTE_1 : BRW_EXECUTE_4);
+
assert(src.file == BRW_GENERAL_REGISTER_FILE &&
src.address_mode == BRW_ADDRESS_DIRECT);
@@ -3476,19 +3480,21 @@ brw_broadcast(struct brw_codegen *p,
*/
inst = brw_MOV(p,
brw_null_reg(),
- stride(brw_swizzle(idx, BRW_SWIZZLE_XXXX), 0, 4, 1));
+ stride(brw_swizzle(idx, BRW_SWIZZLE_XXXX), 4, 4, 1));
brw_inst_set_pred_control(devinfo, inst, BRW_PREDICATE_NONE);
brw_inst_set_cond_modifier(devinfo, inst, BRW_CONDITIONAL_NZ);
brw_inst_set_flag_reg_nr(devinfo, inst, 1);
/* and use predicated SEL to pick the right channel. */
inst = brw_SEL(p, dst,
- stride(suboffset(src, 4), 0, 4, 1),
- stride(src, 0, 4, 1));
+ stride(suboffset(src, 4), 4, 4, 1),
+ stride(src, 4, 4, 1));
brw_inst_set_pred_control(devinfo, inst, BRW_PREDICATE_NORMAL);
brw_inst_set_flag_reg_nr(devinfo, inst, 1);
}
}
+
+ brw_pop_insn_state(p);
}
/**
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 1d7fc6c9b40..3b0717e17d4 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -2023,6 +2023,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
break;
case SHADER_OPCODE_BROADCAST:
+ assert(inst->force_writemask_all);
brw_broadcast(p, dst, src[0], src[1]);
break;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index baf442212cc..bb0254ee51e 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -1886,6 +1886,7 @@ generate_code(struct brw_codegen *p,
break;
case SHADER_OPCODE_BROADCAST:
+ assert(inst->force_writemask_all);
brw_broadcast(p, dst, src[0], src[1]);
break;