summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Forbes <[email protected]>2013-08-08 06:31:33 +1200
committerChris Forbes <[email protected]>2013-08-16 07:24:38 +1200
commita9be50f77675a70a44d231fc1f7fa85f875c5153 (patch)
tree195a26134a44f6149742331659a7c180fc02638a
parent9e2c1e28a14bb7c5ec49d6e7638b07a9e03ddca9 (diff)
i965: add new VS_OPCODE_UNPACK_FLAGS_SIMD4X2
Splits the bottom 8 bits of f0.0 for further wrangling in a SIMD4x2 program. The 4 bits corresponding to the channels in each program flow are copied to the LSBs of dst.x visible to each flow. This is useful for working with clipping flags in the VS. V3: - Fixup immediate types - Teach scheduler about the hidden dep on flags Signed-off-by: Chris Forbes <[email protected]> V2: Reviewed-by: Paul Berry <[email protected]>
-rw-r--r--src/mesa/drivers/dri/i965/brw_defines.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_shader.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.h4
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_emit.cpp23
4 files changed, 29 insertions, 1 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index d8b3b1727cf..2ab0a2b00eb 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -798,6 +798,7 @@ enum opcode {
VS_OPCODE_SCRATCH_WRITE,
VS_OPCODE_PULL_CONSTANT_LOAD,
VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
+ VS_OPCODE_UNPACK_FLAGS_SIMD4X2,
};
#define BRW_PREDICATE_NONE 0
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 9a2e8bebfd0..6c7e827e895 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -494,6 +494,8 @@ brw_instruction_name(enum opcode op)
return "pull_constant_load";
case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
return "pull_constant_load_gen7";
+ case VS_OPCODE_UNPACK_FLAGS_SIMD4X2:
+ return "unpack_flags_simd4x2";
default:
/* Yes, this leaks. It's in debug code, it should never occur, and if
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index ee14cd82600..8986648793a 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -188,7 +188,7 @@ public:
bool depends_on_flags()
{
- return predicate;
+ return predicate || opcode == VS_OPCODE_UNPACK_FLAGS_SIMD4X2;
}
};
@@ -592,6 +592,8 @@ private:
struct brw_reg dst,
struct brw_reg surf_index,
struct brw_reg offset);
+ void generate_unpack_flags(vec4_instruction *inst,
+ struct brw_reg dst);
struct brw_context *brw;
struct gl_context *ctx;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index c82af0e79a3..6ebc318a13d 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -440,6 +440,25 @@ vec4_generator::generate_oword_dual_block_offsets(struct brw_reg m1,
}
void
+vec4_generator::generate_unpack_flags(vec4_instruction *inst,
+ struct brw_reg dst)
+{
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_access_mode(p, BRW_ALIGN_1);
+
+ struct brw_reg flags = brw_flag_reg(0, 0);
+ struct brw_reg dst_0 = suboffset(vec1(dst), 0);
+ struct brw_reg dst_4 = suboffset(vec1(dst), 4);
+
+ brw_AND(p, dst_0, flags, brw_imm_ud(0x0f));
+ brw_AND(p, dst_4, flags, brw_imm_ud(0xf0));
+ brw_SHR(p, dst_4, dst_4, brw_imm_ud(4));
+
+ brw_pop_insn_state(p);
+}
+
+void
vec4_generator::generate_scratch_read(vec4_instruction *inst,
struct brw_reg dst,
struct brw_reg index)
@@ -851,6 +870,10 @@ vec4_generator::generate_vec4_instruction(vec4_instruction *instruction,
brw_shader_time_add(p, src[0], SURF_INDEX_VS_SHADER_TIME);
break;
+ case VS_OPCODE_UNPACK_FLAGS_SIMD4X2:
+ generate_unpack_flags(inst, dst);
+ break;
+
default:
if (inst->opcode < (int) ARRAY_SIZE(opcode_descs)) {
_mesa_problem(ctx, "Unsupported opcode in `%s' in VS\n",