diff options
author | Jason Ekstrand <[email protected]> | 2017-09-28 16:25:31 -0700 |
---|---|---|
committer | Jason Ekstrand <[email protected]> | 2017-10-12 22:39:29 -0700 |
commit | 2975e4c56a7aeade5a324aa4d446f18cc176fa06 (patch) | |
tree | f08787f03d0781b1d7823095acabf3e86d5522ec /src/intel/compiler | |
parent | faad828b16448c1008a1b15ac8d8a72b13005c09 (diff) |
intel: Rewrite the world of push/pull params
This moves us away to the array of pointers model and onto a model where
each param is represented by a generic uint32_t handle. We reserve 2^16
of these handles for builtins that get generated by somewhere inside the
compiler and have well-defined meanings. Generic params have handles
whose meanings are defined by the driver.
The primary downside to this new approach is that it moves a little bit
of the work that we would normally do at compile time to draw time. On
my laptop this hurts OglBatch6 by no more than 1% and doesn't seem to
have any measurable affect on OglBatch7. So, while this may come back
to bite us, it doesn't look too bad.
Reviewed-by: Jordan Justen <[email protected]>
Reviewed-by: Kenneth Graunke <[email protected]>
Diffstat (limited to 'src/intel/compiler')
-rw-r--r-- | src/intel/compiler/brw_compiler.h | 73 | ||||
-rw-r--r-- | src/intel/compiler/brw_fs.cpp | 12 | ||||
-rw-r--r-- | src/intel/compiler/brw_fs.h | 6 | ||||
-rw-r--r-- | src/intel/compiler/brw_fs_visitor.cpp | 8 | ||||
-rw-r--r-- | src/intel/compiler/brw_vec4.cpp | 17 | ||||
-rw-r--r-- | src/intel/compiler/brw_vec4_gs_visitor.cpp | 7 | ||||
-rw-r--r-- | src/intel/compiler/brw_vec4_vs.h | 3 | ||||
-rw-r--r-- | src/intel/compiler/brw_vec4_vs_visitor.cpp | 4 |
8 files changed, 90 insertions, 40 deletions
diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h index 038f3f95512..f2f9be750a0 100644 --- a/src/intel/compiler/brw_compiler.h +++ b/src/intel/compiler/brw_compiler.h @@ -36,7 +36,6 @@ extern "C" { struct ra_regs; struct nir_shader; struct brw_program; -union gl_constant_value; struct brw_compiler { const struct gen_device_info *devinfo; @@ -491,6 +490,66 @@ struct brw_ubo_range uint8_t length; }; +/* We reserve the first 2^16 values for builtins */ +#define BRW_PARAM_IS_BUILTIN(param) (((param) & 0xffff0000) == 0) + +enum brw_param_builtin { + BRW_PARAM_BUILTIN_ZERO, + + BRW_PARAM_BUILTIN_CLIP_PLANE_0_X, + BRW_PARAM_BUILTIN_CLIP_PLANE_0_Y, + BRW_PARAM_BUILTIN_CLIP_PLANE_0_Z, + BRW_PARAM_BUILTIN_CLIP_PLANE_0_W, + BRW_PARAM_BUILTIN_CLIP_PLANE_1_X, + BRW_PARAM_BUILTIN_CLIP_PLANE_1_Y, + BRW_PARAM_BUILTIN_CLIP_PLANE_1_Z, + BRW_PARAM_BUILTIN_CLIP_PLANE_1_W, + BRW_PARAM_BUILTIN_CLIP_PLANE_2_X, + BRW_PARAM_BUILTIN_CLIP_PLANE_2_Y, + BRW_PARAM_BUILTIN_CLIP_PLANE_2_Z, + BRW_PARAM_BUILTIN_CLIP_PLANE_2_W, + BRW_PARAM_BUILTIN_CLIP_PLANE_3_X, + BRW_PARAM_BUILTIN_CLIP_PLANE_3_Y, + BRW_PARAM_BUILTIN_CLIP_PLANE_3_Z, + BRW_PARAM_BUILTIN_CLIP_PLANE_3_W, + BRW_PARAM_BUILTIN_CLIP_PLANE_4_X, + BRW_PARAM_BUILTIN_CLIP_PLANE_4_Y, + BRW_PARAM_BUILTIN_CLIP_PLANE_4_Z, + BRW_PARAM_BUILTIN_CLIP_PLANE_4_W, + BRW_PARAM_BUILTIN_CLIP_PLANE_5_X, + BRW_PARAM_BUILTIN_CLIP_PLANE_5_Y, + BRW_PARAM_BUILTIN_CLIP_PLANE_5_Z, + BRW_PARAM_BUILTIN_CLIP_PLANE_5_W, + BRW_PARAM_BUILTIN_CLIP_PLANE_6_X, + BRW_PARAM_BUILTIN_CLIP_PLANE_6_Y, + BRW_PARAM_BUILTIN_CLIP_PLANE_6_Z, + BRW_PARAM_BUILTIN_CLIP_PLANE_6_W, + BRW_PARAM_BUILTIN_CLIP_PLANE_7_X, + BRW_PARAM_BUILTIN_CLIP_PLANE_7_Y, + BRW_PARAM_BUILTIN_CLIP_PLANE_7_Z, + BRW_PARAM_BUILTIN_CLIP_PLANE_7_W, + + BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X, + BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_Y, + BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_Z, + BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_W, + BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X, + BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_Y, +}; + +#define BRW_PARAM_BUILTIN_CLIP_PLANE(idx, comp) \ + (BRW_PARAM_BUILTIN_CLIP_PLANE_0_X + ((idx) << 2) + (comp)) + +#define BRW_PARAM_BUILTIN_IS_CLIP_PLANE(param) \ + ((param) >= BRW_PARAM_BUILTIN_CLIP_PLANE_0_X && \ + (param) <= BRW_PARAM_BUILTIN_CLIP_PLANE_7_W) + +#define BRW_PARAM_BUILTIN_CLIP_PLANE_IDX(param) \ + (((param) - BRW_PARAM_BUILTIN_CLIP_PLANE_0_X) >> 2) + +#define BRW_PARAM_BUILTIN_CLIP_PLANE_COMP(param) \ + (((param) - BRW_PARAM_BUILTIN_CLIP_PLANE_0_X) & 0x3) + struct brw_stage_prog_data { struct { /** size of our binding table. */ @@ -529,11 +588,14 @@ struct brw_stage_prog_data { bool use_alt_mode; /**< Use ALT floating point mode? Otherwise, IEEE. */ - /* Pointers to tracked values (only valid once - * _mesa_load_state_parameters has been called at runtime). + /* 32-bit identifiers for all push/pull parameters. These can be anything + * the driver wishes them to be; the core of the back-end compiler simply + * re-arranges them. The one restriction is that the bottom 2^16 values + * are reserved for builtins defined in the brw_param_builtin enum defined + * above. */ - const union gl_constant_value **param; - const union gl_constant_value **pull_param; + uint32_t *param; + uint32_t *pull_param; /** Image metadata passed to the shader as uniforms. */ struct brw_image_param *image_param; @@ -1020,7 +1082,6 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data, const struct brw_vs_prog_key *key, struct brw_vs_prog_data *prog_data, const struct nir_shader *shader, - gl_clip_plane *clip_planes, bool use_legacy_snorm_formula, int shader_time_index, unsigned *final_assembly_size, diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index a40b910c1a0..c1d67750a3a 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -2084,10 +2084,9 @@ fs_visitor::assign_constant_locations() /* As the uniforms are going to be reordered, take the data from a temporary * copy of the original param[]. */ - gl_constant_value **param = ralloc_array(NULL, gl_constant_value*, - stage_prog_data->nr_params); + uint32_t *param = ralloc_array(NULL, uint32_t, stage_prog_data->nr_params); memcpy(param, stage_prog_data->param, - sizeof(gl_constant_value*) * stage_prog_data->nr_params); + sizeof(uint32_t) * stage_prog_data->nr_params); stage_prog_data->nr_params = num_push_constants; stage_prog_data->nr_pull_params = num_pull_constants; @@ -2115,8 +2114,7 @@ fs_visitor::assign_constant_locations() */ int new_thread_local_id_index = -1; for (unsigned int i = 0; i < uniforms; i++) { - const gl_constant_value *value = param[i]; - + uint32_t value = param[i]; if (pull_constant_loc[i] != -1) { stage_prog_data->pull_param[pull_constant_loc[i]] = value; } else if (push_constant_loc[i] != -1) { @@ -5967,7 +5965,7 @@ fs_visitor::allocate_registers(bool allow_spilling) } bool -fs_visitor::run_vs(gl_clip_plane *clip_planes) +fs_visitor::run_vs() { assert(stage == MESA_SHADER_VERTEX); @@ -5981,7 +5979,7 @@ fs_visitor::run_vs(gl_clip_plane *clip_planes) if (failed) return false; - compute_clip_distance(clip_planes); + compute_clip_distance(); emit_urb_writes(); diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index f1ba193de7e..20405750b78 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -80,8 +80,8 @@ public: fs_reg vgrf(const glsl_type *const type); void import_uniforms(fs_visitor *v); - void setup_uniform_clipplane_values(gl_clip_plane *clip_planes); - void compute_clip_distance(gl_clip_plane *clip_planes); + void setup_uniform_clipplane_values(); + void compute_clip_distance(); fs_inst *get_instruction_generating_reg(fs_inst *start, fs_inst *end, @@ -95,7 +95,7 @@ public: void DEP_RESOLVE_MOV(const brw::fs_builder &bld, int grf); bool run_fs(bool allow_spilling, bool do_rep_send); - bool run_vs(gl_clip_plane *clip_planes); + bool run_vs(); bool run_tcs_single_patch(); bool run_tes(); bool run_gs(); diff --git a/src/intel/compiler/brw_fs_visitor.cpp b/src/intel/compiler/brw_fs_visitor.cpp index 758c8bf44a9..4ee1d4e0022 100644 --- a/src/intel/compiler/brw_fs_visitor.cpp +++ b/src/intel/compiler/brw_fs_visitor.cpp @@ -465,7 +465,7 @@ fs_visitor::emit_fb_writes() } void -fs_visitor::setup_uniform_clipplane_values(gl_clip_plane *clip_planes) +fs_visitor::setup_uniform_clipplane_values() { const struct brw_vs_prog_key *key = (const struct brw_vs_prog_key *) this->key; @@ -474,7 +474,7 @@ fs_visitor::setup_uniform_clipplane_values(gl_clip_plane *clip_planes) this->userplane[i] = fs_reg(UNIFORM, uniforms); for (int j = 0; j < 4; ++j) { stage_prog_data->param[uniforms + j] = - (gl_constant_value *) &clip_planes[i][j]; + BRW_PARAM_BUILTIN_CLIP_PLANE(i, j); } uniforms += 4; } @@ -486,7 +486,7 @@ fs_visitor::setup_uniform_clipplane_values(gl_clip_plane *clip_planes) * This does nothing if the shader uses gl_ClipDistance or user clipping is * disabled altogether. */ -void fs_visitor::compute_clip_distance(gl_clip_plane *clip_planes) +void fs_visitor::compute_clip_distance() { struct brw_vue_prog_data *vue_prog_data = brw_vue_prog_data(prog_data); const struct brw_vs_prog_key *key = @@ -518,7 +518,7 @@ void fs_visitor::compute_clip_distance(gl_clip_plane *clip_planes) if (outputs[clip_vertex].file == BAD_FILE) return; - setup_uniform_clipplane_values(clip_planes); + setup_uniform_clipplane_values(); const fs_builder abld = bld.annotate("user clip distances"); diff --git a/src/intel/compiler/brw_vec4.cpp b/src/intel/compiler/brw_vec4.cpp index 9b9f5863721..8614886967c 100644 --- a/src/intel/compiler/brw_vec4.cpp +++ b/src/intel/compiler/brw_vec4.cpp @@ -698,10 +698,9 @@ vec4_visitor::pack_uniform_registers() /* As the uniforms are going to be reordered, take the data from a temporary * copy of the original param[]. */ - gl_constant_value **param = ralloc_array(NULL, gl_constant_value*, - stage_prog_data->nr_params); + uint32_t *param = ralloc_array(NULL, uint32_t, stage_prog_data->nr_params); memcpy(param, stage_prog_data->param, - sizeof(gl_constant_value*) * stage_prog_data->nr_params); + sizeof(uint32_t) * stage_prog_data->nr_params); /* Now, figure out a packing of the live uniform vectors into our * push constants. Start with dvec{3,4} because they are aligned to @@ -907,7 +906,7 @@ vec4_visitor::move_push_constants_to_pull_constants() pull_constant_loc[i / 4] = -1; if (i >= max_uniform_components) { - const gl_constant_value **values = &stage_prog_data->param[i]; + uint32_t *values = &stage_prog_data->param[i]; /* Try to find an existing copy of this uniform in the pull * constants if it was part of an array access already. @@ -1764,11 +1763,10 @@ vec4_visitor::setup_uniforms(int reg) */ if (devinfo->gen < 6 && this->uniforms == 0) { stage_prog_data->param = - reralloc(NULL, stage_prog_data->param, const gl_constant_value *, 4); + reralloc(NULL, stage_prog_data->param, uint32_t, 4); for (unsigned int i = 0; i < 4; i++) { unsigned int slot = this->uniforms * 4 + i; - static gl_constant_value zero = { 0.0 }; - stage_prog_data->param[slot] = &zero; + stage_prog_data->param[slot] = BRW_PARAM_BUILTIN_ZERO; } this->uniforms++; @@ -2742,7 +2740,6 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data, const struct brw_vs_prog_key *key, struct brw_vs_prog_data *prog_data, const nir_shader *src_shader, - gl_clip_plane *clip_planes, bool use_legacy_snorm_formula, int shader_time_index, unsigned *final_assembly_size, @@ -2866,7 +2863,7 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data, fs_visitor v(compiler, log_data, mem_ctx, key, &prog_data->base.base, NULL, /* prog; Only used for TEXTURE_RECTANGLE on gen < 8 */ shader, 8, shader_time_index); - if (!v.run_vs(clip_planes)) { + if (!v.run_vs()) { if (error_str) *error_str = ralloc_strdup(mem_ctx, v.fail_msg); @@ -2895,7 +2892,7 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data, prog_data->base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT; vec4_vs_visitor v(compiler, log_data, key, prog_data, - shader, clip_planes, mem_ctx, + shader, mem_ctx, shader_time_index, use_legacy_snorm_formula); if (!v.run()) { if (error_str) diff --git a/src/intel/compiler/brw_vec4_gs_visitor.cpp b/src/intel/compiler/brw_vec4_gs_visitor.cpp index a8e445c473c..5df6d562ce6 100644 --- a/src/intel/compiler/brw_vec4_gs_visitor.cpp +++ b/src/intel/compiler/brw_vec4_gs_visitor.cpp @@ -890,10 +890,9 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data, * values. */ const unsigned param_count = prog_data->base.base.nr_params; - gl_constant_value **param = ralloc_array(NULL, gl_constant_value*, - param_count); + uint32_t *param = ralloc_array(NULL, uint32_t, param_count); memcpy(param, prog_data->base.base.param, - sizeof(gl_constant_value*) * param_count); + sizeof(uint32_t) * param_count); if (v.run()) { /* Success! Backup is not needed */ @@ -910,7 +909,7 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data, * FIXME: Could more variables be modified by this execution? */ memcpy(prog_data->base.base.param, param, - sizeof(gl_constant_value*) * param_count); + sizeof(uint32_t) * param_count); prog_data->base.base.nr_params = param_count; prog_data->base.base.nr_pull_params = 0; ralloc_free(param); diff --git a/src/intel/compiler/brw_vec4_vs.h b/src/intel/compiler/brw_vec4_vs.h index cd07e0e99de..b2a862fdbde 100644 --- a/src/intel/compiler/brw_vec4_vs.h +++ b/src/intel/compiler/brw_vec4_vs.h @@ -36,7 +36,6 @@ public: const struct brw_vs_prog_key *key, struct brw_vs_prog_data *vs_prog_data, const nir_shader *shader, - gl_clip_plane *clip_planes, void *mem_ctx, int shader_time_index, bool use_legacy_snorm_formula); @@ -57,8 +56,6 @@ private: const struct brw_vs_prog_key *const key; struct brw_vs_prog_data * const vs_prog_data; - gl_clip_plane *clip_planes; - bool use_legacy_snorm_formula; }; diff --git a/src/intel/compiler/brw_vec4_vs_visitor.cpp b/src/intel/compiler/brw_vec4_vs_visitor.cpp index ad7f067ad60..86f365e2f0f 100644 --- a/src/intel/compiler/brw_vec4_vs_visitor.cpp +++ b/src/intel/compiler/brw_vec4_vs_visitor.cpp @@ -124,7 +124,7 @@ vec4_vs_visitor::setup_uniform_clipplane_values() this->userplane[i].type = BRW_REGISTER_TYPE_F; for (int j = 0; j < 4; ++j) { stage_prog_data->param[this->uniforms * 4 + j] = - (gl_constant_value *) &clip_planes[i][j]; + BRW_PARAM_BUILTIN_CLIP_PLANE(i, j); } ++this->uniforms; } @@ -164,7 +164,6 @@ vec4_vs_visitor::vec4_vs_visitor(const struct brw_compiler *compiler, const struct brw_vs_prog_key *key, struct brw_vs_prog_data *vs_prog_data, const nir_shader *shader, - gl_clip_plane *clip_planes, void *mem_ctx, int shader_time_index, bool use_legacy_snorm_formula) @@ -172,7 +171,6 @@ vec4_vs_visitor::vec4_vs_visitor(const struct brw_compiler *compiler, mem_ctx, false /* no_spills */, shader_time_index), key(key), vs_prog_data(vs_prog_data), - clip_planes(clip_planes), use_legacy_snorm_formula(use_legacy_snorm_formula) { } |