diff options
author | Jason Ekstrand <[email protected]> | 2016-02-09 15:30:39 -0800 |
---|---|---|
committer | Jason Ekstrand <[email protected]> | 2016-02-09 15:30:39 -0800 |
commit | 768bd7f272e0dfd8cc17c49750fe8aaab78bb420 (patch) | |
tree | 5d8e07b6539565cbfe9ebff5d77cde8b6a8bf566 /src/mesa/drivers/dri | |
parent | 4c5dcccfba3c9d0e5c7302aa797ad8d31f18cf52 (diff) | |
parent | 8b0fb1c152fe191768953aa8c77b89034a377f83 (diff) |
Merge commit '8b0fb1c152fe191768953aa8c77b89034a377f83' into vulkan
This pulls in Rob Clark's const_index changes for NIR
Diffstat (limited to 'src/mesa/drivers/dri')
-rw-r--r-- | src/mesa/drivers/dri/common/dri_util.c | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_misc_state.c | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_nir.c | 4 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_pipe_control.c | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_program.c | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp | 7 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/gen7_l3_state.c | 34 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/intel_reg.h | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/test_vec4_cmod_propagation.cpp | 14 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp | 10 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp | 10 |
12 files changed, 63 insertions, 27 deletions
diff --git a/src/mesa/drivers/dri/common/dri_util.c b/src/mesa/drivers/dri/common/dri_util.c index 5cfa2f8ca4f..a6545084e31 100644 --- a/src/mesa/drivers/dri/common/dri_util.c +++ b/src/mesa/drivers/dri/common/dri_util.c @@ -46,6 +46,7 @@ #include "main/mtypes.h" #include "main/framebuffer.h" #include "main/version.h" +#include "main/debug_output.h" #include "main/errors.h" #include "main/macros.h" diff --git a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp index f2faceeb579..9b0750026c4 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_surface_builder.cpp @@ -1140,7 +1140,7 @@ namespace brw { dims, rsize, op, pred); /* An unbound surface access should give zero as result. */ - if (rsize) + if (rsize && pred) set_predicate(pred, bld.SEL(tmp, tmp, brw_imm_d(0))); return tmp; diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 319c2a5669f..ab1a0d7255f 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -919,7 +919,7 @@ brw_emit_select_pipeline(struct brw_context *brw, enum brw_pipeline pipeline) * MI_PIPELINE_SELECT command to change the Pipeline Select Mode. */ const unsigned dc_flush = - brw->gen >= 7 ? PIPE_CONTROL_DATA_CACHE_INVALIDATE : 0; + brw->gen >= 7 ? PIPE_CONTROL_DATA_CACHE_FLUSH : 0; if (brw->gen == 6) { /* Hardware workaround: SNB B-Spec says: diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c index ef5b34cc687..0a916c99947 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.c +++ b/src/mesa/drivers/dri/i965/brw_nir.c @@ -490,6 +490,10 @@ brw_preprocess_nir(nir_shader *nir, bool is_scalar) nir = nir_optimize(nir, is_scalar); + if (is_scalar) { + OPT_V(nir_lower_load_const_to_scalar); + } + /* Lower a bunch of stuff */ OPT_V(nir_lower_var_copies); diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.c b/src/mesa/drivers/dri/i965/brw_pipe_control.c index 6c636d26139..b41e28e1ec8 100644 --- a/src/mesa/drivers/dri/i965/brw_pipe_control.c +++ b/src/mesa/drivers/dri/i965/brw_pipe_control.c @@ -51,7 +51,7 @@ gen8_add_cs_stall_workaround_bits(uint32_t *flags) PIPE_CONTROL_WRITE_TIMESTAMP | PIPE_CONTROL_STALL_AT_SCOREBOARD | PIPE_CONTROL_DEPTH_STALL | - PIPE_CONTROL_DATA_CACHE_INVALIDATE; + PIPE_CONTROL_DATA_CACHE_FLUSH; /* If we're doing a CS stall, and don't already have one of the * workaround bits set, add "Stall at Pixel Scoreboard." diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index c9872b68d75..b093a87bb82 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -209,7 +209,7 @@ static void brw_memory_barrier(struct gl_context *ctx, GLbitfield barriers) { struct brw_context *brw = brw_context(ctx); - unsigned bits = (PIPE_CONTROL_DATA_CACHE_INVALIDATE | + unsigned bits = (PIPE_CONTROL_DATA_CACHE_FLUSH | PIPE_CONTROL_NO_WRITE | PIPE_CONTROL_CS_STALL); assert(brw->gen >= 7 && brw->gen <= 9); diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp index 60f7fd9cfcd..4f97577515a 100644 --- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp +++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp @@ -939,8 +939,9 @@ fs_instruction_scheduler::calculate_deps() foreach_in_list(schedule_node, n, &instructions) { fs_inst *inst = (fs_inst *)n->inst; - if (inst->opcode == FS_OPCODE_PLACEHOLDER_HALT || - inst->has_side_effects()) + if ((inst->opcode == FS_OPCODE_PLACEHOLDER_HALT || + inst->has_side_effects()) && + inst->opcode != FS_OPCODE_FB_WRITE) add_barrier_deps(n); /* read-after-write deps. */ @@ -1195,7 +1196,7 @@ vec4_instruction_scheduler::calculate_deps() foreach_in_list(schedule_node, n, &instructions) { vec4_instruction *inst = (vec4_instruction *)n->inst; - if (inst->has_side_effects()) + if (inst->has_side_effects() && inst->opcode != FS_OPCODE_FB_WRITE) add_barrier_deps(n); /* read-after-write deps. */ diff --git a/src/mesa/drivers/dri/i965/gen7_l3_state.c b/src/mesa/drivers/dri/i965/gen7_l3_state.c index b63e61ca8f0..0c1813f9048 100644 --- a/src/mesa/drivers/dri/i965/gen7_l3_state.c +++ b/src/mesa/drivers/dri/i965/gen7_l3_state.c @@ -330,23 +330,39 @@ setup_l3_config(struct brw_context *brw, const struct brw_l3_config *cfg) /* According to the hardware docs, the L3 partitioning can only be changed * while the pipeline is completely drained and the caches are flushed, - * which involves a first PIPE_CONTROL flush which stalls the pipeline and - * initiates invalidation of the relevant caches... + * which involves a first PIPE_CONTROL flush which stalls the pipeline... + */ + brw_emit_pipe_control_flush(brw, + PIPE_CONTROL_DATA_CACHE_FLUSH | + PIPE_CONTROL_NO_WRITE | + PIPE_CONTROL_CS_STALL); + + /* ...followed by a second pipelined PIPE_CONTROL that initiates + * invalidation of the relevant caches. Note that because RO invalidation + * happens at the top of the pipeline (i.e. right away as the PIPE_CONTROL + * command is processed by the CS) we cannot combine it with the previous + * stalling flush as the hardware documentation suggests, because that + * would cause the CS to stall on previous rendering *after* RO + * invalidation and wouldn't prevent the RO caches from being polluted by + * concurrent rendering before the stall completes. This intentionally + * doesn't implement the SKL+ hardware workaround suggesting to enable CS + * stall on PIPE_CONTROLs with the texture cache invalidation bit set for + * GPGPU workloads because the previous and subsequent PIPE_CONTROLs + * already guarantee that there is no concurrent GPGPU kernel execution + * (see SKL HSD 2132585). */ brw_emit_pipe_control_flush(brw, PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | PIPE_CONTROL_CONST_CACHE_INVALIDATE | PIPE_CONTROL_INSTRUCTION_INVALIDATE | - PIPE_CONTROL_DATA_CACHE_INVALIDATE | - PIPE_CONTROL_NO_WRITE | - PIPE_CONTROL_CS_STALL); + PIPE_CONTROL_STATE_CACHE_INVALIDATE | + PIPE_CONTROL_NO_WRITE); - /* ...followed by a second stalling flush which guarantees that - * invalidation is complete when the L3 configuration registers are - * modified. + /* Now send a third stalling flush to make sure that invalidation is + * complete when the L3 configuration registers are modified. */ brw_emit_pipe_control_flush(brw, - PIPE_CONTROL_DATA_CACHE_INVALIDATE | + PIPE_CONTROL_DATA_CACHE_FLUSH | PIPE_CONTROL_NO_WRITE | PIPE_CONTROL_CS_STALL); diff --git a/src/mesa/drivers/dri/i965/intel_reg.h b/src/mesa/drivers/dri/i965/intel_reg.h index 8888d6f776c..365c045b8b0 100644 --- a/src/mesa/drivers/dri/i965/intel_reg.h +++ b/src/mesa/drivers/dri/i965/intel_reg.h @@ -86,7 +86,7 @@ #define PIPE_CONTROL_INTERRUPT_ENABLE (1 << 8) #define PIPE_CONTROL_FLUSH_ENABLE (1 << 7) /* Gen7+ only */ /* GT */ -#define PIPE_CONTROL_DATA_CACHE_INVALIDATE (1 << 5) +#define PIPE_CONTROL_DATA_CACHE_FLUSH (1 << 5) #define PIPE_CONTROL_VF_CACHE_INVALIDATE (1 << 4) #define PIPE_CONTROL_CONST_CACHE_INVALIDATE (1 << 3) #define PIPE_CONTROL_STATE_CACHE_INVALIDATE (1 << 2) diff --git a/src/mesa/drivers/dri/i965/test_vec4_cmod_propagation.cpp b/src/mesa/drivers/dri/i965/test_vec4_cmod_propagation.cpp index e5e566c60bc..8d4a447a88b 100644 --- a/src/mesa/drivers/dri/i965/test_vec4_cmod_propagation.cpp +++ b/src/mesa/drivers/dri/i965/test_vec4_cmod_propagation.cpp @@ -40,6 +40,7 @@ public: struct gl_context *ctx; struct gl_shader_program *shader_prog; struct brw_vertex_program *vp; + struct brw_vue_prog_data *prog_data; vec4_visitor *v; }; @@ -47,9 +48,13 @@ class cmod_propagation_vec4_visitor : public vec4_visitor { public: cmod_propagation_vec4_visitor(struct brw_compiler *compiler, - nir_shader *shader) - : vec4_visitor(compiler, NULL, NULL, NULL, shader, NULL, - false, -1) {} + nir_shader *shader, + struct brw_vue_prog_data *prog_data) + : vec4_visitor(compiler, NULL, NULL, prog_data, shader, NULL, + false, -1) + { + prog_data->dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT; + } protected: /* Dummy implementation for pure virtual methods */ @@ -96,13 +101,14 @@ void cmod_propagation_test::SetUp() ctx = (struct gl_context *)calloc(1, sizeof(*ctx)); compiler = (struct brw_compiler *)calloc(1, sizeof(*compiler)); devinfo = (struct brw_device_info *)calloc(1, sizeof(*devinfo)); + prog_data = (struct brw_vue_prog_data *)calloc(1, sizeof(*prog_data)); compiler->devinfo = devinfo; vp = ralloc(NULL, struct brw_vertex_program); nir_shader *shader = nir_shader_create(NULL, MESA_SHADER_VERTEX, NULL); - v = new cmod_propagation_vec4_visitor(compiler, shader); + v = new cmod_propagation_vec4_visitor(compiler, shader, prog_data); _mesa_init_gl_program(&vp->program.Base, GL_VERTEX_SHADER, 0); diff --git a/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp index 12667ffd23c..311f07a7cca 100644 --- a/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp @@ -39,6 +39,7 @@ public: struct gl_context *ctx; struct gl_shader_program *shader_prog; struct brw_vertex_program *vp; + struct brw_vue_prog_data *prog_data; vec4_visitor *v; }; @@ -46,10 +47,12 @@ class copy_propagation_vec4_visitor : public vec4_visitor { public: copy_propagation_vec4_visitor(struct brw_compiler *compiler, - nir_shader *shader) - : vec4_visitor(compiler, NULL, NULL, NULL, shader, NULL, + nir_shader *shader, + struct brw_vue_prog_data *prog_data) + : vec4_visitor(compiler, NULL, NULL, prog_data, shader, NULL, false /* no_spills */, -1) { + prog_data->dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT; } protected: @@ -91,13 +94,14 @@ void copy_propagation_test::SetUp() ctx = (struct gl_context *)calloc(1, sizeof(*ctx)); compiler = (struct brw_compiler *)calloc(1, sizeof(*compiler)); devinfo = (struct brw_device_info *)calloc(1, sizeof(*devinfo)); + prog_data = (struct brw_vue_prog_data *)calloc(1, sizeof(*prog_data)); compiler->devinfo = devinfo; vp = ralloc(NULL, struct brw_vertex_program); nir_shader *shader = nir_shader_create(NULL, MESA_SHADER_VERTEX, NULL); - v = new copy_propagation_vec4_visitor(compiler, shader); + v = new copy_propagation_vec4_visitor(compiler, shader, prog_data); _mesa_init_gl_program(&vp->program.Base, GL_VERTEX_SHADER, 0); diff --git a/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp b/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp index 34dcf95dc48..cc4a2de89d5 100644 --- a/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp +++ b/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp @@ -41,6 +41,7 @@ public: struct gl_context *ctx; struct gl_shader_program *shader_prog; struct brw_vertex_program *vp; + struct brw_vue_prog_data *prog_data; vec4_visitor *v; }; @@ -49,10 +50,12 @@ class register_coalesce_vec4_visitor : public vec4_visitor { public: register_coalesce_vec4_visitor(struct brw_compiler *compiler, - nir_shader *shader) - : vec4_visitor(compiler, NULL, NULL, NULL, shader, NULL, + nir_shader *shader, + struct brw_vue_prog_data *prog_data) + : vec4_visitor(compiler, NULL, NULL, prog_data, shader, NULL, false /* no_spills */, -1) { + prog_data->dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT; } protected: @@ -94,13 +97,14 @@ void register_coalesce_test::SetUp() ctx = (struct gl_context *)calloc(1, sizeof(*ctx)); compiler = (struct brw_compiler *)calloc(1, sizeof(*compiler)); devinfo = (struct brw_device_info *)calloc(1, sizeof(*devinfo)); + prog_data = (struct brw_vue_prog_data *)calloc(1, sizeof(*prog_data)); compiler->devinfo = devinfo; vp = ralloc(NULL, struct brw_vertex_program); nir_shader *shader = nir_shader_create(NULL, MESA_SHADER_VERTEX, NULL); - v = new register_coalesce_vec4_visitor(compiler, shader); + v = new register_coalesce_vec4_visitor(compiler, shader, prog_data); _mesa_init_gl_program(&vp->program.Base, GL_VERTEX_SHADER, 0); |