summaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri
diff options
context:
space:
mode:
authorJason Ekstrand <[email protected]>2016-02-09 15:30:39 -0800
committerJason Ekstrand <[email protected]>2016-02-09 15:30:39 -0800
commit768bd7f272e0dfd8cc17c49750fe8aaab78bb420 (patch)
tree5d8e07b6539565cbfe9ebff5d77cde8b6a8bf566 /src/mesa/drivers/dri
parent4c5dcccfba3c9d0e5c7302aa797ad8d31f18cf52 (diff)
parent8b0fb1c152fe191768953aa8c77b89034a377f83 (diff)
Merge commit '8b0fb1c152fe191768953aa8c77b89034a377f83' into vulkan
This pulls in Rob Clark's const_index changes for NIR
Diffstat (limited to 'src/mesa/drivers/dri')
-rw-r--r--src/mesa/drivers/dri/common/dri_util.c1
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp2
-rw-r--r--src/mesa/drivers/dri/i965/brw_misc_state.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_nir.c4
-rw-r--r--src/mesa/drivers/dri/i965/brw_pipe_control.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_program.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp7
-rw-r--r--src/mesa/drivers/dri/i965/gen7_l3_state.c34
-rw-r--r--src/mesa/drivers/dri/i965/intel_reg.h2
-rw-r--r--src/mesa/drivers/dri/i965/test_vec4_cmod_propagation.cpp14
-rw-r--r--src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp10
-rw-r--r--src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp10
12 files changed, 63 insertions, 27 deletions
diff --git a/src/mesa/drivers/dri/common/dri_util.c b/src/mesa/drivers/dri/common/dri_util.c
index 5cfa2f8ca4f..a6545084e31 100644
--- a/src/mesa/drivers/dri/common/dri_util.c
+++ b/src/mesa/drivers/dri/common/dri_util.c
@@ -46,6 +46,7 @@
#include "main/mtypes.h"
#include "main/framebuffer.h"
#include "main/version.h"
+#include "main/debug_output.h"
#include "main/errors.h"
#include "main/macros.h"
diff --git a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
index f2faceeb579..9b0750026c4 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp
@@ -1140,7 +1140,7 @@ namespace brw {
dims, rsize, op, pred);
/* An unbound surface access should give zero as result. */
- if (rsize)
+ if (rsize && pred)
set_predicate(pred, bld.SEL(tmp, tmp, brw_imm_d(0)));
return tmp;
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index 319c2a5669f..ab1a0d7255f 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -919,7 +919,7 @@ brw_emit_select_pipeline(struct brw_context *brw, enum brw_pipeline pipeline)
* MI_PIPELINE_SELECT command to change the Pipeline Select Mode.
*/
const unsigned dc_flush =
- brw->gen >= 7 ? PIPE_CONTROL_DATA_CACHE_INVALIDATE : 0;
+ brw->gen >= 7 ? PIPE_CONTROL_DATA_CACHE_FLUSH : 0;
if (brw->gen == 6) {
/* Hardware workaround: SNB B-Spec says:
diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
index ef5b34cc687..0a916c99947 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -490,6 +490,10 @@ brw_preprocess_nir(nir_shader *nir, bool is_scalar)
nir = nir_optimize(nir, is_scalar);
+ if (is_scalar) {
+ OPT_V(nir_lower_load_const_to_scalar);
+ }
+
/* Lower a bunch of stuff */
OPT_V(nir_lower_var_copies);
diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.c b/src/mesa/drivers/dri/i965/brw_pipe_control.c
index 6c636d26139..b41e28e1ec8 100644
--- a/src/mesa/drivers/dri/i965/brw_pipe_control.c
+++ b/src/mesa/drivers/dri/i965/brw_pipe_control.c
@@ -51,7 +51,7 @@ gen8_add_cs_stall_workaround_bits(uint32_t *flags)
PIPE_CONTROL_WRITE_TIMESTAMP |
PIPE_CONTROL_STALL_AT_SCOREBOARD |
PIPE_CONTROL_DEPTH_STALL |
- PIPE_CONTROL_DATA_CACHE_INVALIDATE;
+ PIPE_CONTROL_DATA_CACHE_FLUSH;
/* If we're doing a CS stall, and don't already have one of the
* workaround bits set, add "Stall at Pixel Scoreboard."
diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c
index c9872b68d75..b093a87bb82 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -209,7 +209,7 @@ static void
brw_memory_barrier(struct gl_context *ctx, GLbitfield barriers)
{
struct brw_context *brw = brw_context(ctx);
- unsigned bits = (PIPE_CONTROL_DATA_CACHE_INVALIDATE |
+ unsigned bits = (PIPE_CONTROL_DATA_CACHE_FLUSH |
PIPE_CONTROL_NO_WRITE |
PIPE_CONTROL_CS_STALL);
assert(brw->gen >= 7 && brw->gen <= 9);
diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
index 60f7fd9cfcd..4f97577515a 100644
--- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
@@ -939,8 +939,9 @@ fs_instruction_scheduler::calculate_deps()
foreach_in_list(schedule_node, n, &instructions) {
fs_inst *inst = (fs_inst *)n->inst;
- if (inst->opcode == FS_OPCODE_PLACEHOLDER_HALT ||
- inst->has_side_effects())
+ if ((inst->opcode == FS_OPCODE_PLACEHOLDER_HALT ||
+ inst->has_side_effects()) &&
+ inst->opcode != FS_OPCODE_FB_WRITE)
add_barrier_deps(n);
/* read-after-write deps. */
@@ -1195,7 +1196,7 @@ vec4_instruction_scheduler::calculate_deps()
foreach_in_list(schedule_node, n, &instructions) {
vec4_instruction *inst = (vec4_instruction *)n->inst;
- if (inst->has_side_effects())
+ if (inst->has_side_effects() && inst->opcode != FS_OPCODE_FB_WRITE)
add_barrier_deps(n);
/* read-after-write deps. */
diff --git a/src/mesa/drivers/dri/i965/gen7_l3_state.c b/src/mesa/drivers/dri/i965/gen7_l3_state.c
index b63e61ca8f0..0c1813f9048 100644
--- a/src/mesa/drivers/dri/i965/gen7_l3_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_l3_state.c
@@ -330,23 +330,39 @@ setup_l3_config(struct brw_context *brw, const struct brw_l3_config *cfg)
/* According to the hardware docs, the L3 partitioning can only be changed
* while the pipeline is completely drained and the caches are flushed,
- * which involves a first PIPE_CONTROL flush which stalls the pipeline and
- * initiates invalidation of the relevant caches...
+ * which involves a first PIPE_CONTROL flush which stalls the pipeline...
+ */
+ brw_emit_pipe_control_flush(brw,
+ PIPE_CONTROL_DATA_CACHE_FLUSH |
+ PIPE_CONTROL_NO_WRITE |
+ PIPE_CONTROL_CS_STALL);
+
+ /* ...followed by a second pipelined PIPE_CONTROL that initiates
+ * invalidation of the relevant caches. Note that because RO invalidation
+ * happens at the top of the pipeline (i.e. right away as the PIPE_CONTROL
+ * command is processed by the CS) we cannot combine it with the previous
+ * stalling flush as the hardware documentation suggests, because that
+ * would cause the CS to stall on previous rendering *after* RO
+ * invalidation and wouldn't prevent the RO caches from being polluted by
+ * concurrent rendering before the stall completes. This intentionally
+ * doesn't implement the SKL+ hardware workaround suggesting to enable CS
+ * stall on PIPE_CONTROLs with the texture cache invalidation bit set for
+ * GPGPU workloads because the previous and subsequent PIPE_CONTROLs
+ * already guarantee that there is no concurrent GPGPU kernel execution
+ * (see SKL HSD 2132585).
*/
brw_emit_pipe_control_flush(brw,
PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
PIPE_CONTROL_CONST_CACHE_INVALIDATE |
PIPE_CONTROL_INSTRUCTION_INVALIDATE |
- PIPE_CONTROL_DATA_CACHE_INVALIDATE |
- PIPE_CONTROL_NO_WRITE |
- PIPE_CONTROL_CS_STALL);
+ PIPE_CONTROL_STATE_CACHE_INVALIDATE |
+ PIPE_CONTROL_NO_WRITE);
- /* ...followed by a second stalling flush which guarantees that
- * invalidation is complete when the L3 configuration registers are
- * modified.
+ /* Now send a third stalling flush to make sure that invalidation is
+ * complete when the L3 configuration registers are modified.
*/
brw_emit_pipe_control_flush(brw,
- PIPE_CONTROL_DATA_CACHE_INVALIDATE |
+ PIPE_CONTROL_DATA_CACHE_FLUSH |
PIPE_CONTROL_NO_WRITE |
PIPE_CONTROL_CS_STALL);
diff --git a/src/mesa/drivers/dri/i965/intel_reg.h b/src/mesa/drivers/dri/i965/intel_reg.h
index 8888d6f776c..365c045b8b0 100644
--- a/src/mesa/drivers/dri/i965/intel_reg.h
+++ b/src/mesa/drivers/dri/i965/intel_reg.h
@@ -86,7 +86,7 @@
#define PIPE_CONTROL_INTERRUPT_ENABLE (1 << 8)
#define PIPE_CONTROL_FLUSH_ENABLE (1 << 7) /* Gen7+ only */
/* GT */
-#define PIPE_CONTROL_DATA_CACHE_INVALIDATE (1 << 5)
+#define PIPE_CONTROL_DATA_CACHE_FLUSH (1 << 5)
#define PIPE_CONTROL_VF_CACHE_INVALIDATE (1 << 4)
#define PIPE_CONTROL_CONST_CACHE_INVALIDATE (1 << 3)
#define PIPE_CONTROL_STATE_CACHE_INVALIDATE (1 << 2)
diff --git a/src/mesa/drivers/dri/i965/test_vec4_cmod_propagation.cpp b/src/mesa/drivers/dri/i965/test_vec4_cmod_propagation.cpp
index e5e566c60bc..8d4a447a88b 100644
--- a/src/mesa/drivers/dri/i965/test_vec4_cmod_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/test_vec4_cmod_propagation.cpp
@@ -40,6 +40,7 @@ public:
struct gl_context *ctx;
struct gl_shader_program *shader_prog;
struct brw_vertex_program *vp;
+ struct brw_vue_prog_data *prog_data;
vec4_visitor *v;
};
@@ -47,9 +48,13 @@ class cmod_propagation_vec4_visitor : public vec4_visitor
{
public:
cmod_propagation_vec4_visitor(struct brw_compiler *compiler,
- nir_shader *shader)
- : vec4_visitor(compiler, NULL, NULL, NULL, shader, NULL,
- false, -1) {}
+ nir_shader *shader,
+ struct brw_vue_prog_data *prog_data)
+ : vec4_visitor(compiler, NULL, NULL, prog_data, shader, NULL,
+ false, -1)
+ {
+ prog_data->dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT;
+ }
protected:
/* Dummy implementation for pure virtual methods */
@@ -96,13 +101,14 @@ void cmod_propagation_test::SetUp()
ctx = (struct gl_context *)calloc(1, sizeof(*ctx));
compiler = (struct brw_compiler *)calloc(1, sizeof(*compiler));
devinfo = (struct brw_device_info *)calloc(1, sizeof(*devinfo));
+ prog_data = (struct brw_vue_prog_data *)calloc(1, sizeof(*prog_data));
compiler->devinfo = devinfo;
vp = ralloc(NULL, struct brw_vertex_program);
nir_shader *shader = nir_shader_create(NULL, MESA_SHADER_VERTEX, NULL);
- v = new cmod_propagation_vec4_visitor(compiler, shader);
+ v = new cmod_propagation_vec4_visitor(compiler, shader, prog_data);
_mesa_init_gl_program(&vp->program.Base, GL_VERTEX_SHADER, 0);
diff --git a/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp
index 12667ffd23c..311f07a7cca 100644
--- a/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp
@@ -39,6 +39,7 @@ public:
struct gl_context *ctx;
struct gl_shader_program *shader_prog;
struct brw_vertex_program *vp;
+ struct brw_vue_prog_data *prog_data;
vec4_visitor *v;
};
@@ -46,10 +47,12 @@ class copy_propagation_vec4_visitor : public vec4_visitor
{
public:
copy_propagation_vec4_visitor(struct brw_compiler *compiler,
- nir_shader *shader)
- : vec4_visitor(compiler, NULL, NULL, NULL, shader, NULL,
+ nir_shader *shader,
+ struct brw_vue_prog_data *prog_data)
+ : vec4_visitor(compiler, NULL, NULL, prog_data, shader, NULL,
false /* no_spills */, -1)
{
+ prog_data->dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT;
}
protected:
@@ -91,13 +94,14 @@ void copy_propagation_test::SetUp()
ctx = (struct gl_context *)calloc(1, sizeof(*ctx));
compiler = (struct brw_compiler *)calloc(1, sizeof(*compiler));
devinfo = (struct brw_device_info *)calloc(1, sizeof(*devinfo));
+ prog_data = (struct brw_vue_prog_data *)calloc(1, sizeof(*prog_data));
compiler->devinfo = devinfo;
vp = ralloc(NULL, struct brw_vertex_program);
nir_shader *shader = nir_shader_create(NULL, MESA_SHADER_VERTEX, NULL);
- v = new copy_propagation_vec4_visitor(compiler, shader);
+ v = new copy_propagation_vec4_visitor(compiler, shader, prog_data);
_mesa_init_gl_program(&vp->program.Base, GL_VERTEX_SHADER, 0);
diff --git a/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp b/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp
index 34dcf95dc48..cc4a2de89d5 100644
--- a/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp
+++ b/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp
@@ -41,6 +41,7 @@ public:
struct gl_context *ctx;
struct gl_shader_program *shader_prog;
struct brw_vertex_program *vp;
+ struct brw_vue_prog_data *prog_data;
vec4_visitor *v;
};
@@ -49,10 +50,12 @@ class register_coalesce_vec4_visitor : public vec4_visitor
{
public:
register_coalesce_vec4_visitor(struct brw_compiler *compiler,
- nir_shader *shader)
- : vec4_visitor(compiler, NULL, NULL, NULL, shader, NULL,
+ nir_shader *shader,
+ struct brw_vue_prog_data *prog_data)
+ : vec4_visitor(compiler, NULL, NULL, prog_data, shader, NULL,
false /* no_spills */, -1)
{
+ prog_data->dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT;
}
protected:
@@ -94,13 +97,14 @@ void register_coalesce_test::SetUp()
ctx = (struct gl_context *)calloc(1, sizeof(*ctx));
compiler = (struct brw_compiler *)calloc(1, sizeof(*compiler));
devinfo = (struct brw_device_info *)calloc(1, sizeof(*devinfo));
+ prog_data = (struct brw_vue_prog_data *)calloc(1, sizeof(*prog_data));
compiler->devinfo = devinfo;
vp = ralloc(NULL, struct brw_vertex_program);
nir_shader *shader = nir_shader_create(NULL, MESA_SHADER_VERTEX, NULL);
- v = new register_coalesce_vec4_visitor(compiler, shader);
+ v = new register_coalesce_vec4_visitor(compiler, shader, prog_data);
_mesa_init_gl_program(&vp->program.Base, GL_VERTEX_SHADER, 0);