summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/mesa/drivers/dri/i965/brw_defines.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp4
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.h8
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_emit.cpp75
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_visitor.cpp32
5 files changed, 114 insertions, 7 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 40571a4d54d..ab206d1920f 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -676,10 +676,12 @@ enum opcode {
FS_OPCODE_SPILL,
FS_OPCODE_UNSPILL,
FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
+ FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7,
FS_OPCODE_VARYING_PULL_CONSTANT_LOAD,
FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7,
FS_OPCODE_MOV_DISPATCH_TO_FLAGS,
FS_OPCODE_DISCARD_JUMP,
+ FS_OPCODE_SET_GLOBAL_OFFSET,
VS_OPCODE_URB_WRITE,
VS_OPCODE_SCRATCH_READ,
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 9a18410ac5f..83128117328 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -330,7 +330,9 @@ fs_inst::is_math()
bool
fs_inst::is_send_from_grf()
{
- return opcode == FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7;
+ return (opcode == FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7 ||
+ (opcode == FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD &&
+ src[1].file == GRF));
}
bool
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index b75314cd665..87257123f27 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -529,6 +529,10 @@ private:
void generate_uniform_pull_constant_load(fs_inst *inst, struct brw_reg dst,
struct brw_reg index,
struct brw_reg offset);
+ void generate_uniform_pull_constant_load_gen7(fs_inst *inst,
+ struct brw_reg dst,
+ struct brw_reg surf_index,
+ struct brw_reg offset);
void generate_varying_pull_constant_load(fs_inst *inst, struct brw_reg dst,
struct brw_reg index);
void generate_varying_pull_constant_load_gen7(fs_inst *inst,
@@ -536,6 +540,10 @@ private:
struct brw_reg index,
struct brw_reg offset);
void generate_mov_dispatch_to_flags(fs_inst *inst);
+ void generate_set_global_offset(fs_inst *inst,
+ struct brw_reg dst,
+ struct brw_reg src,
+ struct brw_reg offset);
void generate_discard_jump(fs_inst *inst);
void patch_discard_jumps_to_fb_writes();
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
index 9a891414e62..63f09fe7941 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
@@ -666,6 +666,44 @@ fs_generator::generate_uniform_pull_constant_load(fs_inst *inst,
}
void
+fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst,
+ struct brw_reg dst,
+ struct brw_reg index,
+ struct brw_reg offset)
+{
+ assert(inst->mlen == 0);
+
+ assert(index.file == BRW_IMMEDIATE_VALUE &&
+ index.type == BRW_REGISTER_TYPE_UD);
+ uint32_t surf_index = index.dw1.ud;
+
+ assert(offset.file == BRW_GENERAL_REGISTER_FILE);
+
+ brw_push_insn_state(p);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
+ brw_pop_insn_state(p);
+
+ brw_set_dest(p, send, dst);
+ brw_set_src0(p, send, offset);
+ if (intel->gen < 6)
+ send->header.destreg__conditionalmod = inst->base_mrf;
+
+ uint32_t msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
+ uint32_t msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ;
+ bool header_present = true;
+ brw_set_dp_read_message(p, send,
+ surf_index,
+ msg_control,
+ msg_type,
+ BRW_DATAPORT_READ_TARGET_DATA_CACHE,
+ 1,
+ header_present,
+ 1);
+}
+
+void
fs_generator::generate_varying_pull_constant_load(fs_inst *inst,
struct brw_reg dst,
struct brw_reg index)
@@ -852,6 +890,35 @@ brw_reg_from_fs_reg(fs_reg *reg)
return brw_reg;
}
+/**
+ * Sets the second dword of a vgrf for gen7+ message setup.
+ *
+ * For setting up gen7 messages in VGRFs, we need to be able to set the second
+ * dword for some payloads where in the MRF world we'd have just used
+ * brw_message_reg(). We don't want to bake it into the send message's code
+ * generation because that means we don't get a chance to schedule the
+ * instructions.
+ */
+void
+fs_generator::generate_set_global_offset(fs_inst *inst,
+ struct brw_reg dst,
+ struct brw_reg src,
+ struct brw_reg value)
+{
+ /* We use a matching src and dst to get the information on how this
+ * instruction works exposed to various optimization passes that would
+ * otherwise treat it as completely overwriting the dst.
+ */
+ assert(src.file == dst.file && src.nr == dst.nr);
+ assert(value.file == BRW_IMMEDIATE_VALUE);
+
+ brw_push_insn_state(p);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_MOV(p, retype(brw_vec1_reg(dst.file, dst.nr, 2), value.type), value);
+ brw_pop_insn_state(p);
+}
+
void
fs_generator::generate_code(exec_list *instructions)
{
@@ -1127,6 +1194,10 @@ fs_generator::generate_code(exec_list *instructions)
generate_uniform_pull_constant_load(inst, dst, src[0], src[1]);
break;
+ case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7:
+ generate_uniform_pull_constant_load_gen7(inst, dst, src[0], src[1]);
+ break;
+
case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD:
generate_varying_pull_constant_load(inst, dst, src[0]);
break;
@@ -1151,6 +1222,10 @@ fs_generator::generate_code(exec_list *instructions)
brw_shader_time_add(p, inst->base_mrf, SURF_INDEX_WM_SHADER_TIME);
break;
+ case FS_OPCODE_SET_GLOBAL_OFFSET:
+ generate_set_global_offset(inst, dst, src[0], src[1]);
+ break;
+
default:
if (inst->opcode < (int) ARRAY_SIZE(opcode_descs)) {
_mesa_problem(ctx, "Unsupported opcode `%s' in FS",
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index ccf905ebc62..6a39f98509e 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -581,12 +581,32 @@ fs_visitor::visit(ir_expression *ir)
if (const_offset) {
fs_reg packed_consts = fs_reg(this, glsl_type::float_type);
packed_consts.type = result.type;
- fs_inst *pull = emit(fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
- packed_consts,
- surf_index,
- fs_reg(const_offset->value.u[0])));
- pull->base_mrf = 14;
- pull->mlen = 1;
+
+ if (intel->gen >= 7) {
+ fs_reg const_offset_reg = fs_reg(const_offset->value.u[0] / 16);
+ fs_reg payload = fs_reg(this, glsl_type::uint_type);
+ struct brw_reg g0 = retype(brw_vec8_grf(0, 0),
+ BRW_REGISTER_TYPE_UD);
+ fs_inst *setup = emit(MOV(payload, fs_reg(g0)));
+ setup->force_writemask_all = true;
+ /* We don't need the second half of this vgrf to be filled with g1
+ * in the 16-wide case, but if we use force_uncompressed then live
+ * variable analysis won't consider this a def!
+ */
+
+ emit(FS_OPCODE_SET_GLOBAL_OFFSET, payload,
+ payload, const_offset_reg);
+ emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7, packed_consts,
+ surf_index, payload);
+ } else {
+ fs_reg const_offset_reg = fs_reg(const_offset->value.u[0]);
+ fs_inst *pull = emit(fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
+ packed_consts,
+ surf_index,
+ const_offset_reg));
+ pull->base_mrf = 14;
+ pull->mlen = 1;
+ }
packed_consts.smear = const_offset->value.u[0] % 16 / 4;
for (int i = 0; i < ir->type->vector_elements; i++) {