summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatt Turner <[email protected]>2013-04-17 18:57:58 -0700
committerMatt Turner <[email protected]>2013-05-06 10:17:14 -0700
commitacd2bccd852f1e4edbac2e57dd47139908e79b5d (patch)
tree6a4973003f0b4ff64a2fea2bd8859907664565e1
parent1f0f26d60c148e360908af34130c4e00dba8f3df (diff)
i965/vs: Add support for bit instructions.
v2: Rebase on LRP addition. Use fix_3src_operand() when emitting BFE and BFI2. Add BFE and BFI2 to is_3src_inst check in brw_vec4_copy_propagation.cpp. Subtract result of FBH from 31 (unless an error) to convert MSB counts to LSB counts Reviewed-by: Chris Forbes <[email protected]>
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.h7
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp5
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_emit.cpp29
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp66
4 files changed, 106 insertions, 1 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 06b0f6a33e1..7614ac58652 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -375,6 +375,13 @@ public:
vec4_instruction *SCRATCH_READ(dst_reg dst, src_reg index);
vec4_instruction *SCRATCH_WRITE(dst_reg dst, src_reg src, src_reg index);
vec4_instruction *LRP(dst_reg dst, src_reg a, src_reg y, src_reg x);
+ vec4_instruction *BFREV(dst_reg dst, src_reg value);
+ vec4_instruction *BFE(dst_reg dst, src_reg bits, src_reg offset, src_reg value);
+ vec4_instruction *BFI1(dst_reg dst, src_reg bits, src_reg offset);
+ vec4_instruction *BFI2(dst_reg dst, src_reg bfi1_dst, src_reg insert, src_reg base);
+ vec4_instruction *FBH(dst_reg dst, src_reg value);
+ vec4_instruction *FBL(dst_reg dst, src_reg value);
+ vec4_instruction *CBIT(dst_reg dst, src_reg value);
int implied_mrf_writes(vec4_instruction *inst);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
index f2c6cd60911..39eef4b0d65 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
@@ -215,7 +215,10 @@ vec4_visitor::try_copy_propagation(struct intel_context *intel,
if (has_source_modifiers && !can_do_source_mods(inst))
return false;
- if (inst->opcode == BRW_OPCODE_LRP && value.file == UNIFORM)
+ bool is_3src_inst = (inst->opcode == BRW_OPCODE_LRP ||
+ inst->opcode == BRW_OPCODE_BFE ||
+ inst->opcode == BRW_OPCODE_BFI2);
+ if (is_3src_inst && value.file == UNIFORM)
return false;
/* We can't copy-propagate a UD negation into a condmod
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index 96b4965045b..91101f29b0d 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -842,6 +842,35 @@ vec4_generator::generate_code(exec_list *instructions)
brw_LRP(p, dst, src[0], src[1], src[2]);
break;
+ case BRW_OPCODE_BFREV:
+ /* BFREV only supports UD type for src and dst. */
+ brw_BFREV(p, retype(dst, BRW_REGISTER_TYPE_UD),
+ retype(src[0], BRW_REGISTER_TYPE_UD));
+ break;
+ case BRW_OPCODE_FBH:
+ /* FBH only supports UD type for dst. */
+ brw_FBH(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
+ break;
+ case BRW_OPCODE_FBL:
+ /* FBL only supports UD type for dst. */
+ brw_FBL(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
+ break;
+ case BRW_OPCODE_CBIT:
+ /* CBIT only supports UD type for dst. */
+ brw_CBIT(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
+ break;
+
+ case BRW_OPCODE_BFE:
+ brw_BFE(p, dst, src[0], src[1], src[2]);
+ break;
+
+ case BRW_OPCODE_BFI1:
+ brw_BFI1(p, dst, src[0], src[1]);
+ break;
+ case BRW_OPCODE_BFI2:
+ brw_BFI2(p, dst, src[0], src[1], src[2]);
+ break;
+
case BRW_OPCODE_IF:
if (inst->src[0].file != BAD_FILE) {
/* The instruction has an embedded compare (only allowed on gen6) */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 3b0687f615a..cda425efde8 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -136,6 +136,13 @@ ALU2(SHL)
ALU2(SHR)
ALU2(ASR)
ALU3(LRP)
+ALU1(BFREV)
+ALU3(BFE)
+ALU2(BFI1)
+ALU3(BFI2)
+ALU1(FBH)
+ALU1(FBL)
+ALU1(CBIT)
/** Gen4 predicated IF. */
vec4_instruction *
@@ -1382,6 +1389,39 @@ vec4_visitor::visit(ir_expression *ir)
assert(!"derivatives not valid in vertex shader");
break;
+ case ir_unop_bitfield_reverse:
+ emit(BFREV(result_dst, op[0]));
+ break;
+ case ir_unop_bit_count:
+ emit(CBIT(result_dst, op[0]));
+ break;
+ case ir_unop_find_msb: {
+ src_reg temp = src_reg(this, glsl_type::uint_type);
+
+ inst = emit(FBH(dst_reg(temp), op[0]));
+ inst->dst.writemask = WRITEMASK_XYZW;
+
+ /* FBH counts from the MSB side, while GLSL's findMSB() wants the count
+ * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then
+ * subtract the result from 31 to convert the MSB count into an LSB count.
+ */
+
+ /* FBH only supports UD type for dst, so use a MOV to convert UD to D. */
+ temp.swizzle = BRW_SWIZZLE_NOOP;
+ emit(MOV(result_dst, temp));
+
+ src_reg src_tmp = src_reg(result_dst);
+ emit(CMP(dst_null_d(), src_tmp, src_reg(-1), BRW_CONDITIONAL_NZ));
+
+ src_tmp.negate = true;
+ inst = emit(ADD(result_dst, src_tmp, src_reg(31)));
+ inst->predicate = BRW_PREDICATE_NORMAL;
+ break;
+ }
+ case ir_unop_find_lsb:
+ emit(FBL(result_dst, op[0]));
+ break;
+
case ir_unop_noise:
assert(!"not reached: should be handled by lower_noise");
break;
@@ -1582,6 +1622,10 @@ vec4_visitor::visit(ir_expression *ir)
inst = emit(SHR(result_dst, op[0], op[1]));
break;
+ case ir_binop_bfm:
+ emit(BFI1(result_dst, op[0], op[1]));
+ break;
+
case ir_binop_ubo_load: {
ir_constant *uniform_block = ir->operands[0]->as_constant();
ir_constant *const_offset_ir = ir->operands[1]->as_constant();
@@ -1637,6 +1681,28 @@ vec4_visitor::visit(ir_expression *ir)
emit(LRP(result_dst, op[2], op[1], op[0]));
break;
+ case ir_triop_bfi:
+ op[0] = fix_3src_operand(op[0]);
+ op[1] = fix_3src_operand(op[1]);
+ op[2] = fix_3src_operand(op[2]);
+ emit(BFI2(result_dst, op[0], op[1], op[2]));
+ break;
+
+ case ir_triop_bitfield_extract:
+ op[0] = fix_3src_operand(op[0]);
+ op[1] = fix_3src_operand(op[1]);
+ op[2] = fix_3src_operand(op[2]);
+ /* Note that the instruction's argument order is reversed from GLSL
+ * and the IR.
+ */
+ emit(BFE(result_dst, op[2], op[1], op[0]));
+ break;
+
+ case ir_quadop_bitfield_insert:
+ assert(!"not reached: should be handled by "
+ "bitfield_insert_to_bfm_bfi\n");
+ break;
+
case ir_quadop_vector:
assert(!"not reached: should be handled by lower_quadop_vector");
break;