summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFrancisco Jerez <[email protected]>2016-12-08 20:05:18 -0800
committerFrancisco Jerez <[email protected]>2016-12-14 16:50:26 -0800
commit9b22a0d295316b7547667ebbfe1e1b6182439186 (patch)
tree16ad3ba1442bb9b0830796be623e885563778e82
parent7a6aadb76ff3f6ef73216b53b0dc5edda5bae978 (diff)
i965/fs: Expose arbitrary pull constant load sizes to the IR.
Change the FS generator to ask the dataport for enough owords worth of constants to fill the execution size of the instruction -- Which means that the visitor now needs to set the execution size correctly for uniform pull constant load instructions, which we were kind of neglecting until now. Reviewed-by: Kenneth Graunke <[email protected]>
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu_emit.c15
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_generator.cpp27
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_nir.cpp9
4 files changed, 26 insertions, 27 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 6141bfb99ea..8536a130725 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -2256,7 +2256,7 @@ gen7_block_read_scratch(struct brw_codegen *p,
}
/**
- * Read a float[4] vector from the data port constant cache.
+ * Read float[4] vectors from the data port constant cache.
* Location (in buffer) should be a multiple of 16.
* Used for fetching shader constants.
*/
@@ -2270,6 +2270,7 @@ void brw_oword_block_read(struct brw_codegen *p,
const unsigned target_cache =
(devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_CONSTANT_CACHE :
BRW_DATAPORT_READ_TARGET_DATA_CACHE);
+ const unsigned exec_size = 1 << brw_inst_exec_size(devinfo, p->current);
/* On newer hardware, offset is in units of owords. */
if (devinfo->gen >= 6)
@@ -2278,11 +2279,12 @@ void brw_oword_block_read(struct brw_codegen *p,
mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
brw_push_insn_state(p);
- brw_set_default_exec_size(p, BRW_EXECUTE_8);
brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+ brw_push_insn_state(p);
+ brw_set_default_exec_size(p, BRW_EXECUTE_8);
brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
/* set message header global offset field (reg 0, element 2) */
@@ -2291,6 +2293,7 @@ void brw_oword_block_read(struct brw_codegen *p,
mrf.nr,
2), BRW_REGISTER_TYPE_UD),
brw_imm_ud(offset));
+ brw_pop_insn_state(p);
brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
@@ -2305,15 +2308,13 @@ void brw_oword_block_read(struct brw_codegen *p,
brw_inst_set_base_mrf(devinfo, insn, mrf.nr);
}
- brw_set_dp_read_message(p,
- insn,
- bind_table_index,
- BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
+ brw_set_dp_read_message(p, insn, bind_table_index,
+ BRW_DATAPORT_OWORD_BLOCK_DWORDS(exec_size),
BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
target_cache,
1, /* msg_length */
true, /* header_present */
- 1); /* response_length (1 reg, 2 owords!) */
+ DIV_ROUND_UP(exec_size, 8)); /* response_length */
brw_pop_insn_state(p);
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index b22dc9a1a7b..977fd8c35f5 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -2121,7 +2121,7 @@ fs_visitor::lower_constant_loads()
assert(inst->src[i].stride == 0);
- const fs_builder ubld = ibld.exec_all().group(8, 0);
+ const fs_builder ubld = ibld.exec_all().group(4, 0);
struct brw_reg offset = brw_imm_ud((unsigned)(pull_index * 4) & ~15);
ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
dst, brw_imm_ud(index), offset);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 8b9fa8e504b..93f4c4199b3 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -1127,6 +1127,7 @@ fs_generator::generate_uniform_pull_constant_load(fs_inst *inst,
struct brw_reg index,
struct brw_reg offset)
{
+ assert(type_sz(dst.type) == 4);
assert(inst->mlen != 0);
assert(index.file == BRW_IMMEDIATE_VALUE &&
@@ -1149,27 +1150,25 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst,
{
assert(index.type == BRW_REGISTER_TYPE_UD);
assert(payload.file == BRW_GENERAL_REGISTER_FILE);
+ assert(type_sz(dst.type) == 4);
if (index.file == BRW_IMMEDIATE_VALUE) {
const uint32_t surf_index = index.ud;
brw_push_insn_state(p);
- brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND);
- brw_inst_set_exec_size(devinfo, send, BRW_EXECUTE_4);
brw_pop_insn_state(p);
- brw_set_dest(p, send, vec4(retype(dst, BRW_REGISTER_TYPE_UD)));
- brw_set_src0(p, send, vec4(retype(payload, BRW_REGISTER_TYPE_UD)));
- brw_set_dp_read_message(p, send,
- surf_index,
- BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
+ brw_set_dest(p, send, retype(dst, BRW_REGISTER_TYPE_UD));
+ brw_set_src0(p, send, retype(payload, BRW_REGISTER_TYPE_UD));
+ brw_set_dp_read_message(p, send, surf_index,
+ BRW_DATAPORT_OWORD_BLOCK_DWORDS(inst->exec_size),
GEN7_DATAPORT_DC_OWORD_BLOCK_READ,
GEN6_SFID_DATAPORT_CONSTANT_CACHE,
1, /* mlen */
true, /* header */
- 1); /* rlen */
+ DIV_ROUND_UP(inst->size_written, REG_SIZE));
} else {
struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD));
@@ -1188,17 +1187,15 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst,
/* dst = send(payload, a0.0 | <descriptor>) */
brw_inst *insn = brw_send_indirect_message(
p, GEN6_SFID_DATAPORT_CONSTANT_CACHE,
- vec4(retype(dst, BRW_REGISTER_TYPE_UD)),
- vec4(retype(payload, BRW_REGISTER_TYPE_UD)), addr);
- brw_inst_set_exec_size(p->devinfo, insn, BRW_EXECUTE_4);
- brw_set_dp_read_message(p, insn,
- 0, /* surface */
- BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
+ retype(dst, BRW_REGISTER_TYPE_UD),
+ retype(payload, BRW_REGISTER_TYPE_UD), addr);
+ brw_set_dp_read_message(p, insn, 0 /* surface */,
+ BRW_DATAPORT_OWORD_BLOCK_DWORDS(inst->exec_size),
GEN7_DATAPORT_DC_OWORD_BLOCK_READ,
GEN6_SFID_DATAPORT_CONSTANT_CACHE,
1, /* mlen */
true, /* header */
- 1); /* rlen */
+ DIV_ROUND_UP(inst->size_written, REG_SIZE));
brw_pop_insn_state(p);
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index bfb286bb344..7df74232457 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -4059,7 +4059,9 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
* and we have to split it if necessary.
*/
const unsigned type_size = type_sz(dest.type);
- const fs_reg packed_consts = bld.vgrf(BRW_REGISTER_TYPE_F);
+ const fs_builder ubld = bld.exec_all().group(4, 0);
+ const fs_reg packed_consts = ubld.vgrf(BRW_REGISTER_TYPE_F);
+
for (unsigned c = 0; c < instr->num_components;) {
const unsigned base = const_offset->u32[0] + c * type_size;
@@ -4067,9 +4069,8 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
const unsigned count = MIN2(instr->num_components - c,
(16 - base % 16) / type_size);
- bld.exec_all()
- .emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
- packed_consts, surf_index, brw_imm_ud(base & ~15));
+ ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
+ packed_consts, surf_index, brw_imm_ud(base & ~15));
const fs_reg consts =
retype(byte_offset(packed_consts, base & 15), dest.type);