summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFrancisco Jerez <[email protected]>2016-05-17 23:18:38 -0700
committerFrancisco Jerez <[email protected]>2016-05-27 23:19:21 -0700
commitd8a3294ac21741c3a78eef72b832902e15fbd948 (patch)
tree3db7dc5409867194d29a0dc864573e67c9569f8c
parent0bc5ad8d1997fe33dd43bb476c67163039f065ff (diff)
i965/fs: Hide varying pull constant load message setup behind logical opcode.
This will allow the SIMD lowering pass to split 32-wide varying pull constant loads (not natively supported by the hardware) into 16-wide instructions. Reviewed-by: Jason Ekstrand <[email protected]>
-rw-r--r--src/mesa/drivers/dri/i965/brw_defines.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp50
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.h6
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_cse.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_generator.cpp9
-rw-r--r--src/mesa/drivers/dri/i965/brw_shader.cpp2
6 files changed, 39 insertions, 31 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 31b33367727..432a1aec0df 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1116,6 +1116,7 @@ enum opcode {
FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7,
FS_OPCODE_VARYING_PULL_CONSTANT_LOAD,
FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7,
+ FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL,
FS_OPCODE_GET_BUFFER_SIZE,
FS_OPCODE_MOV_DISPATCH_TO_FLAGS,
FS_OPCODE_DISCARD_JUMP,
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 71df1e57fa3..336806b9e51 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -188,32 +188,16 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder &bld,
scale = 2;
}
- enum opcode op;
- if (devinfo->gen >= 7)
- op = FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7;
- else
- op = FS_OPCODE_VARYING_PULL_CONSTANT_LOAD;
-
/* The pull load message will load a vec4 (16 bytes). If we are loading
* a double this means we are only loading 2 elements worth of data.
* We also want to use a 32-bit data type for the dst of the load operation
* so other parts of the driver don't get confused about the size of the
* result.
*/
- int regs_written = 4 * (bld.dispatch_width() / 8) * scale;
- fs_reg vec4_result = fs_reg(VGRF, alloc.allocate(regs_written),
- BRW_REGISTER_TYPE_F);
- fs_inst *inst = bld.emit(op, vec4_result, surf_index, vec4_offset);
- inst->regs_written = regs_written;
-
- if (devinfo->gen < 7) {
- inst->base_mrf = FIRST_PULL_LOAD_MRF(devinfo->gen);
- inst->header_size = 1;
- if (devinfo->gen == 4)
- inst->mlen = 3;
- else
- inst->mlen = 1 + bld.dispatch_width() / 8;
- }
+ fs_reg vec4_result = bld.vgrf(BRW_REGISTER_TYPE_F, 4 * scale);
+ fs_inst *inst = bld.emit(FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL,
+ vec4_result, surf_index, vec4_offset);
+ inst->regs_written = 4 * (bld.dispatch_width() / 8) * scale;
if (type_sz(dst.type) == 8) {
assert(scale == 1);
@@ -4439,6 +4423,28 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst, opcode op,
delete[] components;
}
+static void
+lower_varying_pull_constant_logical_send(const fs_builder &bld, fs_inst *inst)
+{
+ const brw_device_info *devinfo = bld.shader->devinfo;
+
+ if (devinfo->gen >= 7) {
+ inst->opcode = FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7;
+
+ } else {
+ const fs_reg payload(MRF, FIRST_PULL_LOAD_MRF(devinfo->gen),
+ BRW_REGISTER_TYPE_UD);
+
+ bld.MOV(byte_offset(payload, REG_SIZE), inst->src[1]);
+
+ inst->opcode = FS_OPCODE_VARYING_PULL_CONSTANT_LOAD;
+ inst->resize_sources(1);
+ inst->base_mrf = payload.nr;
+ inst->header_size = 1;
+ inst->mlen = 1 + inst->exec_size / 8;
+ }
+}
+
bool
fs_visitor::lower_logical_sends()
{
@@ -4544,6 +4550,10 @@ fs_visitor::lower_logical_sends()
ibld.sample_mask_reg());
break;
+ case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL:
+ lower_varying_pull_constant_logical_send(ibld, inst);
+ break;
+
default:
continue;
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 062fcd54592..75759b7ba26 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -463,9 +463,9 @@ private:
struct brw_reg dst,
struct brw_reg surf_index,
struct brw_reg offset);
- void generate_varying_pull_constant_load(fs_inst *inst, struct brw_reg dst,
- struct brw_reg index,
- struct brw_reg offset);
+ void generate_varying_pull_constant_load(fs_inst *inst,
+ struct brw_reg dst,
+ struct brw_reg index);
void generate_varying_pull_constant_load_gen7(fs_inst *inst,
struct brw_reg dst,
struct brw_reg index,
diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
index b17a082780a..99121c503f0 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
@@ -72,8 +72,8 @@ is_expression(const fs_visitor *v, const fs_inst *const inst)
case BRW_OPCODE_MAD:
case BRW_OPCODE_LRP:
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
+ case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL:
case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7:
- case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD:
case FS_OPCODE_CINTERP:
case FS_OPCODE_LINTERP:
case SHADER_OPCODE_FIND_LIVE_CHANNEL:
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 0a847f8637f..d979518c922 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -1347,8 +1347,7 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst,
void
fs_generator::generate_varying_pull_constant_load(fs_inst *inst,
struct brw_reg dst,
- struct brw_reg index,
- struct brw_reg offset)
+ struct brw_reg index)
{
assert(devinfo->gen < 7); /* Should use the gen7 variant. */
assert(inst->header_size != 0);
@@ -1380,10 +1379,6 @@ fs_generator::generate_varying_pull_constant_load(fs_inst *inst,
simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
}
- struct brw_reg offset_mrf = retype(brw_message_reg(inst->base_mrf + 1),
- BRW_REGISTER_TYPE_D);
- brw_MOV(p, offset_mrf, offset);
-
struct brw_reg header = brw_vec8_grf(0, 0);
gen6_resolve_implied_move(p, &header, inst->base_mrf);
@@ -2186,7 +2181,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
break;
case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD:
- generate_varying_pull_constant_load(inst, dst, src[0], src[1]);
+ generate_varying_pull_constant_load(inst, dst, src[0]);
break;
case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7:
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index d24db58a046..551d9c23926 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -348,6 +348,8 @@ brw_instruction_name(const struct brw_device_info *devinfo, enum opcode op)
return "varying_pull_const";
case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7:
return "varying_pull_const_gen7";
+ case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL:
+ return "varying_pull_const_logical";
case FS_OPCODE_MOV_DISPATCH_TO_FLAGS:
return "mov_dispatch_to_flags";