summaryrefslogtreecommitdiffstats
path: root/src/intel/compiler
diff options
context:
space:
mode:
authorJason Ekstrand <[email protected]>2017-09-28 16:25:31 -0700
committerJason Ekstrand <[email protected]>2017-10-12 22:39:29 -0700
commit2975e4c56a7aeade5a324aa4d446f18cc176fa06 (patch)
treef08787f03d0781b1d7823095acabf3e86d5522ec /src/intel/compiler
parentfaad828b16448c1008a1b15ac8d8a72b13005c09 (diff)
intel: Rewrite the world of push/pull params
This moves us away to the array of pointers model and onto a model where each param is represented by a generic uint32_t handle. We reserve 2^16 of these handles for builtins that get generated by somewhere inside the compiler and have well-defined meanings. Generic params have handles whose meanings are defined by the driver. The primary downside to this new approach is that it moves a little bit of the work that we would normally do at compile time to draw time. On my laptop this hurts OglBatch6 by no more than 1% and doesn't seem to have any measurable affect on OglBatch7. So, while this may come back to bite us, it doesn't look too bad. Reviewed-by: Jordan Justen <[email protected]> Reviewed-by: Kenneth Graunke <[email protected]>
Diffstat (limited to 'src/intel/compiler')
-rw-r--r--src/intel/compiler/brw_compiler.h73
-rw-r--r--src/intel/compiler/brw_fs.cpp12
-rw-r--r--src/intel/compiler/brw_fs.h6
-rw-r--r--src/intel/compiler/brw_fs_visitor.cpp8
-rw-r--r--src/intel/compiler/brw_vec4.cpp17
-rw-r--r--src/intel/compiler/brw_vec4_gs_visitor.cpp7
-rw-r--r--src/intel/compiler/brw_vec4_vs.h3
-rw-r--r--src/intel/compiler/brw_vec4_vs_visitor.cpp4
8 files changed, 90 insertions, 40 deletions
diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h
index 038f3f95512..f2f9be750a0 100644
--- a/src/intel/compiler/brw_compiler.h
+++ b/src/intel/compiler/brw_compiler.h
@@ -36,7 +36,6 @@ extern "C" {
struct ra_regs;
struct nir_shader;
struct brw_program;
-union gl_constant_value;
struct brw_compiler {
const struct gen_device_info *devinfo;
@@ -491,6 +490,66 @@ struct brw_ubo_range
uint8_t length;
};
+/* We reserve the first 2^16 values for builtins */
+#define BRW_PARAM_IS_BUILTIN(param) (((param) & 0xffff0000) == 0)
+
+enum brw_param_builtin {
+ BRW_PARAM_BUILTIN_ZERO,
+
+ BRW_PARAM_BUILTIN_CLIP_PLANE_0_X,
+ BRW_PARAM_BUILTIN_CLIP_PLANE_0_Y,
+ BRW_PARAM_BUILTIN_CLIP_PLANE_0_Z,
+ BRW_PARAM_BUILTIN_CLIP_PLANE_0_W,
+ BRW_PARAM_BUILTIN_CLIP_PLANE_1_X,
+ BRW_PARAM_BUILTIN_CLIP_PLANE_1_Y,
+ BRW_PARAM_BUILTIN_CLIP_PLANE_1_Z,
+ BRW_PARAM_BUILTIN_CLIP_PLANE_1_W,
+ BRW_PARAM_BUILTIN_CLIP_PLANE_2_X,
+ BRW_PARAM_BUILTIN_CLIP_PLANE_2_Y,
+ BRW_PARAM_BUILTIN_CLIP_PLANE_2_Z,
+ BRW_PARAM_BUILTIN_CLIP_PLANE_2_W,
+ BRW_PARAM_BUILTIN_CLIP_PLANE_3_X,
+ BRW_PARAM_BUILTIN_CLIP_PLANE_3_Y,
+ BRW_PARAM_BUILTIN_CLIP_PLANE_3_Z,
+ BRW_PARAM_BUILTIN_CLIP_PLANE_3_W,
+ BRW_PARAM_BUILTIN_CLIP_PLANE_4_X,
+ BRW_PARAM_BUILTIN_CLIP_PLANE_4_Y,
+ BRW_PARAM_BUILTIN_CLIP_PLANE_4_Z,
+ BRW_PARAM_BUILTIN_CLIP_PLANE_4_W,
+ BRW_PARAM_BUILTIN_CLIP_PLANE_5_X,
+ BRW_PARAM_BUILTIN_CLIP_PLANE_5_Y,
+ BRW_PARAM_BUILTIN_CLIP_PLANE_5_Z,
+ BRW_PARAM_BUILTIN_CLIP_PLANE_5_W,
+ BRW_PARAM_BUILTIN_CLIP_PLANE_6_X,
+ BRW_PARAM_BUILTIN_CLIP_PLANE_6_Y,
+ BRW_PARAM_BUILTIN_CLIP_PLANE_6_Z,
+ BRW_PARAM_BUILTIN_CLIP_PLANE_6_W,
+ BRW_PARAM_BUILTIN_CLIP_PLANE_7_X,
+ BRW_PARAM_BUILTIN_CLIP_PLANE_7_Y,
+ BRW_PARAM_BUILTIN_CLIP_PLANE_7_Z,
+ BRW_PARAM_BUILTIN_CLIP_PLANE_7_W,
+
+ BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X,
+ BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_Y,
+ BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_Z,
+ BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_W,
+ BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X,
+ BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_Y,
+};
+
+#define BRW_PARAM_BUILTIN_CLIP_PLANE(idx, comp) \
+ (BRW_PARAM_BUILTIN_CLIP_PLANE_0_X + ((idx) << 2) + (comp))
+
+#define BRW_PARAM_BUILTIN_IS_CLIP_PLANE(param) \
+ ((param) >= BRW_PARAM_BUILTIN_CLIP_PLANE_0_X && \
+ (param) <= BRW_PARAM_BUILTIN_CLIP_PLANE_7_W)
+
+#define BRW_PARAM_BUILTIN_CLIP_PLANE_IDX(param) \
+ (((param) - BRW_PARAM_BUILTIN_CLIP_PLANE_0_X) >> 2)
+
+#define BRW_PARAM_BUILTIN_CLIP_PLANE_COMP(param) \
+ (((param) - BRW_PARAM_BUILTIN_CLIP_PLANE_0_X) & 0x3)
+
struct brw_stage_prog_data {
struct {
/** size of our binding table. */
@@ -529,11 +588,14 @@ struct brw_stage_prog_data {
bool use_alt_mode; /**< Use ALT floating point mode? Otherwise, IEEE. */
- /* Pointers to tracked values (only valid once
- * _mesa_load_state_parameters has been called at runtime).
+ /* 32-bit identifiers for all push/pull parameters. These can be anything
+ * the driver wishes them to be; the core of the back-end compiler simply
+ * re-arranges them. The one restriction is that the bottom 2^16 values
+ * are reserved for builtins defined in the brw_param_builtin enum defined
+ * above.
*/
- const union gl_constant_value **param;
- const union gl_constant_value **pull_param;
+ uint32_t *param;
+ uint32_t *pull_param;
/** Image metadata passed to the shader as uniforms. */
struct brw_image_param *image_param;
@@ -1020,7 +1082,6 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data,
const struct brw_vs_prog_key *key,
struct brw_vs_prog_data *prog_data,
const struct nir_shader *shader,
- gl_clip_plane *clip_planes,
bool use_legacy_snorm_formula,
int shader_time_index,
unsigned *final_assembly_size,
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index a40b910c1a0..c1d67750a3a 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -2084,10 +2084,9 @@ fs_visitor::assign_constant_locations()
/* As the uniforms are going to be reordered, take the data from a temporary
* copy of the original param[].
*/
- gl_constant_value **param = ralloc_array(NULL, gl_constant_value*,
- stage_prog_data->nr_params);
+ uint32_t *param = ralloc_array(NULL, uint32_t, stage_prog_data->nr_params);
memcpy(param, stage_prog_data->param,
- sizeof(gl_constant_value*) * stage_prog_data->nr_params);
+ sizeof(uint32_t) * stage_prog_data->nr_params);
stage_prog_data->nr_params = num_push_constants;
stage_prog_data->nr_pull_params = num_pull_constants;
@@ -2115,8 +2114,7 @@ fs_visitor::assign_constant_locations()
*/
int new_thread_local_id_index = -1;
for (unsigned int i = 0; i < uniforms; i++) {
- const gl_constant_value *value = param[i];
-
+ uint32_t value = param[i];
if (pull_constant_loc[i] != -1) {
stage_prog_data->pull_param[pull_constant_loc[i]] = value;
} else if (push_constant_loc[i] != -1) {
@@ -5967,7 +5965,7 @@ fs_visitor::allocate_registers(bool allow_spilling)
}
bool
-fs_visitor::run_vs(gl_clip_plane *clip_planes)
+fs_visitor::run_vs()
{
assert(stage == MESA_SHADER_VERTEX);
@@ -5981,7 +5979,7 @@ fs_visitor::run_vs(gl_clip_plane *clip_planes)
if (failed)
return false;
- compute_clip_distance(clip_planes);
+ compute_clip_distance();
emit_urb_writes();
diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h
index f1ba193de7e..20405750b78 100644
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -80,8 +80,8 @@ public:
fs_reg vgrf(const glsl_type *const type);
void import_uniforms(fs_visitor *v);
- void setup_uniform_clipplane_values(gl_clip_plane *clip_planes);
- void compute_clip_distance(gl_clip_plane *clip_planes);
+ void setup_uniform_clipplane_values();
+ void compute_clip_distance();
fs_inst *get_instruction_generating_reg(fs_inst *start,
fs_inst *end,
@@ -95,7 +95,7 @@ public:
void DEP_RESOLVE_MOV(const brw::fs_builder &bld, int grf);
bool run_fs(bool allow_spilling, bool do_rep_send);
- bool run_vs(gl_clip_plane *clip_planes);
+ bool run_vs();
bool run_tcs_single_patch();
bool run_tes();
bool run_gs();
diff --git a/src/intel/compiler/brw_fs_visitor.cpp b/src/intel/compiler/brw_fs_visitor.cpp
index 758c8bf44a9..4ee1d4e0022 100644
--- a/src/intel/compiler/brw_fs_visitor.cpp
+++ b/src/intel/compiler/brw_fs_visitor.cpp
@@ -465,7 +465,7 @@ fs_visitor::emit_fb_writes()
}
void
-fs_visitor::setup_uniform_clipplane_values(gl_clip_plane *clip_planes)
+fs_visitor::setup_uniform_clipplane_values()
{
const struct brw_vs_prog_key *key =
(const struct brw_vs_prog_key *) this->key;
@@ -474,7 +474,7 @@ fs_visitor::setup_uniform_clipplane_values(gl_clip_plane *clip_planes)
this->userplane[i] = fs_reg(UNIFORM, uniforms);
for (int j = 0; j < 4; ++j) {
stage_prog_data->param[uniforms + j] =
- (gl_constant_value *) &clip_planes[i][j];
+ BRW_PARAM_BUILTIN_CLIP_PLANE(i, j);
}
uniforms += 4;
}
@@ -486,7 +486,7 @@ fs_visitor::setup_uniform_clipplane_values(gl_clip_plane *clip_planes)
* This does nothing if the shader uses gl_ClipDistance or user clipping is
* disabled altogether.
*/
-void fs_visitor::compute_clip_distance(gl_clip_plane *clip_planes)
+void fs_visitor::compute_clip_distance()
{
struct brw_vue_prog_data *vue_prog_data = brw_vue_prog_data(prog_data);
const struct brw_vs_prog_key *key =
@@ -518,7 +518,7 @@ void fs_visitor::compute_clip_distance(gl_clip_plane *clip_planes)
if (outputs[clip_vertex].file == BAD_FILE)
return;
- setup_uniform_clipplane_values(clip_planes);
+ setup_uniform_clipplane_values();
const fs_builder abld = bld.annotate("user clip distances");
diff --git a/src/intel/compiler/brw_vec4.cpp b/src/intel/compiler/brw_vec4.cpp
index 9b9f5863721..8614886967c 100644
--- a/src/intel/compiler/brw_vec4.cpp
+++ b/src/intel/compiler/brw_vec4.cpp
@@ -698,10 +698,9 @@ vec4_visitor::pack_uniform_registers()
/* As the uniforms are going to be reordered, take the data from a temporary
* copy of the original param[].
*/
- gl_constant_value **param = ralloc_array(NULL, gl_constant_value*,
- stage_prog_data->nr_params);
+ uint32_t *param = ralloc_array(NULL, uint32_t, stage_prog_data->nr_params);
memcpy(param, stage_prog_data->param,
- sizeof(gl_constant_value*) * stage_prog_data->nr_params);
+ sizeof(uint32_t) * stage_prog_data->nr_params);
/* Now, figure out a packing of the live uniform vectors into our
* push constants. Start with dvec{3,4} because they are aligned to
@@ -907,7 +906,7 @@ vec4_visitor::move_push_constants_to_pull_constants()
pull_constant_loc[i / 4] = -1;
if (i >= max_uniform_components) {
- const gl_constant_value **values = &stage_prog_data->param[i];
+ uint32_t *values = &stage_prog_data->param[i];
/* Try to find an existing copy of this uniform in the pull
* constants if it was part of an array access already.
@@ -1764,11 +1763,10 @@ vec4_visitor::setup_uniforms(int reg)
*/
if (devinfo->gen < 6 && this->uniforms == 0) {
stage_prog_data->param =
- reralloc(NULL, stage_prog_data->param, const gl_constant_value *, 4);
+ reralloc(NULL, stage_prog_data->param, uint32_t, 4);
for (unsigned int i = 0; i < 4; i++) {
unsigned int slot = this->uniforms * 4 + i;
- static gl_constant_value zero = { 0.0 };
- stage_prog_data->param[slot] = &zero;
+ stage_prog_data->param[slot] = BRW_PARAM_BUILTIN_ZERO;
}
this->uniforms++;
@@ -2742,7 +2740,6 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data,
const struct brw_vs_prog_key *key,
struct brw_vs_prog_data *prog_data,
const nir_shader *src_shader,
- gl_clip_plane *clip_planes,
bool use_legacy_snorm_formula,
int shader_time_index,
unsigned *final_assembly_size,
@@ -2866,7 +2863,7 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data,
fs_visitor v(compiler, log_data, mem_ctx, key, &prog_data->base.base,
NULL, /* prog; Only used for TEXTURE_RECTANGLE on gen < 8 */
shader, 8, shader_time_index);
- if (!v.run_vs(clip_planes)) {
+ if (!v.run_vs()) {
if (error_str)
*error_str = ralloc_strdup(mem_ctx, v.fail_msg);
@@ -2895,7 +2892,7 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data,
prog_data->base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT;
vec4_vs_visitor v(compiler, log_data, key, prog_data,
- shader, clip_planes, mem_ctx,
+ shader, mem_ctx,
shader_time_index, use_legacy_snorm_formula);
if (!v.run()) {
if (error_str)
diff --git a/src/intel/compiler/brw_vec4_gs_visitor.cpp b/src/intel/compiler/brw_vec4_gs_visitor.cpp
index a8e445c473c..5df6d562ce6 100644
--- a/src/intel/compiler/brw_vec4_gs_visitor.cpp
+++ b/src/intel/compiler/brw_vec4_gs_visitor.cpp
@@ -890,10 +890,9 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
* values.
*/
const unsigned param_count = prog_data->base.base.nr_params;
- gl_constant_value **param = ralloc_array(NULL, gl_constant_value*,
- param_count);
+ uint32_t *param = ralloc_array(NULL, uint32_t, param_count);
memcpy(param, prog_data->base.base.param,
- sizeof(gl_constant_value*) * param_count);
+ sizeof(uint32_t) * param_count);
if (v.run()) {
/* Success! Backup is not needed */
@@ -910,7 +909,7 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
* FIXME: Could more variables be modified by this execution?
*/
memcpy(prog_data->base.base.param, param,
- sizeof(gl_constant_value*) * param_count);
+ sizeof(uint32_t) * param_count);
prog_data->base.base.nr_params = param_count;
prog_data->base.base.nr_pull_params = 0;
ralloc_free(param);
diff --git a/src/intel/compiler/brw_vec4_vs.h b/src/intel/compiler/brw_vec4_vs.h
index cd07e0e99de..b2a862fdbde 100644
--- a/src/intel/compiler/brw_vec4_vs.h
+++ b/src/intel/compiler/brw_vec4_vs.h
@@ -36,7 +36,6 @@ public:
const struct brw_vs_prog_key *key,
struct brw_vs_prog_data *vs_prog_data,
const nir_shader *shader,
- gl_clip_plane *clip_planes,
void *mem_ctx,
int shader_time_index,
bool use_legacy_snorm_formula);
@@ -57,8 +56,6 @@ private:
const struct brw_vs_prog_key *const key;
struct brw_vs_prog_data * const vs_prog_data;
- gl_clip_plane *clip_planes;
-
bool use_legacy_snorm_formula;
};
diff --git a/src/intel/compiler/brw_vec4_vs_visitor.cpp b/src/intel/compiler/brw_vec4_vs_visitor.cpp
index ad7f067ad60..86f365e2f0f 100644
--- a/src/intel/compiler/brw_vec4_vs_visitor.cpp
+++ b/src/intel/compiler/brw_vec4_vs_visitor.cpp
@@ -124,7 +124,7 @@ vec4_vs_visitor::setup_uniform_clipplane_values()
this->userplane[i].type = BRW_REGISTER_TYPE_F;
for (int j = 0; j < 4; ++j) {
stage_prog_data->param[this->uniforms * 4 + j] =
- (gl_constant_value *) &clip_planes[i][j];
+ BRW_PARAM_BUILTIN_CLIP_PLANE(i, j);
}
++this->uniforms;
}
@@ -164,7 +164,6 @@ vec4_vs_visitor::vec4_vs_visitor(const struct brw_compiler *compiler,
const struct brw_vs_prog_key *key,
struct brw_vs_prog_data *vs_prog_data,
const nir_shader *shader,
- gl_clip_plane *clip_planes,
void *mem_ctx,
int shader_time_index,
bool use_legacy_snorm_formula)
@@ -172,7 +171,6 @@ vec4_vs_visitor::vec4_vs_visitor(const struct brw_compiler *compiler,
mem_ctx, false /* no_spills */, shader_time_index),
key(key),
vs_prog_data(vs_prog_data),
- clip_planes(clip_planes),
use_legacy_snorm_formula(use_legacy_snorm_formula)
{
}