diff options
author | Jason Ekstrand <[email protected]> | 2017-09-28 16:25:31 -0700 |
---|---|---|
committer | Jason Ekstrand <[email protected]> | 2017-10-12 22:39:29 -0700 |
commit | 2975e4c56a7aeade5a324aa4d446f18cc176fa06 (patch) | |
tree | f08787f03d0781b1d7823095acabf3e86d5522ec /src/mesa | |
parent | faad828b16448c1008a1b15ac8d8a72b13005c09 (diff) |
intel: Rewrite the world of push/pull params
This moves us away to the array of pointers model and onto a model where
each param is represented by a generic uint32_t handle. We reserve 2^16
of these handles for builtins that get generated by somewhere inside the
compiler and have well-defined meanings. Generic params have handles
whose meanings are defined by the driver.
The primary downside to this new approach is that it moves a little bit
of the work that we would normally do at compile time to draw time. On
my laptop this hurts OglBatch6 by no more than 1% and doesn't seem to
have any measurable affect on OglBatch7. So, while this may come back
to bite us, it doesn't look too bad.
Reviewed-by: Jordan Justen <[email protected]>
Reviewed-by: Kenneth Graunke <[email protected]>
Diffstat (limited to 'src/mesa')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_cs.c | 6 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_gs.c | 6 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp | 69 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_program.h | 23 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_state.h | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_tcs.c | 25 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_tes.c | 6 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vs.c | 7 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm.c | 6 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/gen6_constant_state.c | 87 |
10 files changed, 155 insertions, 82 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_cs.c b/src/mesa/drivers/dri/i965/brw_cs.c index bc09abd912c..68fca098f76 100644 --- a/src/mesa/drivers/dri/i965/brw_cs.c +++ b/src/mesa/drivers/dri/i965/brw_cs.c @@ -89,10 +89,8 @@ brw_codegen_cs_prog(struct brw_context *brw, /* The backend also sometimes adds params for texture size. */ param_count += 2 * ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits; - prog_data.base.param = - rzalloc_array(NULL, const gl_constant_value *, param_count); - prog_data.base.pull_param = - rzalloc_array(NULL, const gl_constant_value *, param_count); + prog_data.base.param = rzalloc_array(NULL, uint32_t, param_count); + prog_data.base.pull_param = rzalloc_array(NULL, uint32_t, param_count); prog_data.base.image_param = rzalloc_array(NULL, struct brw_image_param, cp->program.info.num_images); diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index e7062ee7899..c040665a3b6 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -99,10 +99,8 @@ brw_codegen_gs_prog(struct brw_context *brw, */ int param_count = gp->program.nir->num_uniforms / 4; - prog_data.base.base.param = - rzalloc_array(NULL, const gl_constant_value *, param_count); - prog_data.base.base.pull_param = - rzalloc_array(NULL, const gl_constant_value *, param_count); + prog_data.base.base.param = rzalloc_array(NULL, uint32_t, param_count); + prog_data.base.base.pull_param = rzalloc_array(NULL, uint32_t, param_count); prog_data.base.base.image_param = rzalloc_array(NULL, struct brw_image_param, gp->program.info.num_images); diff --git a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp index f0bccac14ef..de6df4835d4 100644 --- a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp +++ b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp @@ -23,6 +23,7 @@ #include "compiler/brw_nir.h" #include "compiler/glsl/ir_uniform.h" +#include "brw_program.h" static void brw_nir_setup_glsl_builtin_uniform(nir_variable *var, @@ -60,23 +61,21 @@ brw_nir_setup_glsl_builtin_uniform(nir_variable *var, last_swiz = swiz; stage_prog_data->param[uniform_index++] = - &prog->Parameters->ParameterValues[index][swiz]; + BRW_PARAM_PARAMETER(index, swiz); } } } static void -setup_vec4_uniform_value(const gl_constant_value **params, - const gl_constant_value *values, - unsigned n) +setup_vec4_image_param(uint32_t *params, uint32_t idx, + unsigned offset, unsigned n) { - static const gl_constant_value zero = { 0 }; - + assert(offset % sizeof(uint32_t) == 0); for (unsigned i = 0; i < n; ++i) - params[i] = &values[i]; + params[i] = BRW_PARAM_IMAGE(idx, offset / sizeof(uint32_t) + i); for (unsigned i = n; i < 4; ++i) - params[i] = &zero; + params[i] = BRW_PARAM_BUILTIN_ZERO; } static void @@ -85,29 +84,32 @@ brw_setup_image_uniform_values(gl_shader_stage stage, unsigned param_start_index, const gl_uniform_storage *storage) { - const gl_constant_value **param = - &stage_prog_data->param[param_start_index]; + uint32_t *param = &stage_prog_data->param[param_start_index]; for (unsigned i = 0; i < MAX2(storage->array_elements, 1); i++) { const unsigned image_idx = storage->opaque[stage].index + i; - const brw_image_param *image_param = - &stage_prog_data->image_param[image_idx]; /* Upload the brw_image_param structure. The order is expected to match * the BRW_IMAGE_PARAM_*_OFFSET defines. */ - setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET, - (const gl_constant_value *)&image_param->surface_idx, 1); - setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_OFFSET_OFFSET, - (const gl_constant_value *)image_param->offset, 2); - setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SIZE_OFFSET, - (const gl_constant_value *)image_param->size, 3); - setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_STRIDE_OFFSET, - (const gl_constant_value *)image_param->stride, 4); - setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_TILING_OFFSET, - (const gl_constant_value *)image_param->tiling, 3); - setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SWIZZLING_OFFSET, - (const gl_constant_value *)image_param->swizzling, 2); + setup_vec4_image_param(param + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET, + image_idx, + offsetof(brw_image_param, surface_idx), 1); + setup_vec4_image_param(param + BRW_IMAGE_PARAM_OFFSET_OFFSET, + image_idx, + offsetof(brw_image_param, offset), 2); + setup_vec4_image_param(param + BRW_IMAGE_PARAM_SIZE_OFFSET, + image_idx, + offsetof(brw_image_param, size), 3); + setup_vec4_image_param(param + BRW_IMAGE_PARAM_STRIDE_OFFSET, + image_idx, + offsetof(brw_image_param, stride), 4); + setup_vec4_image_param(param + BRW_IMAGE_PARAM_TILING_OFFSET, + image_idx, + offsetof(brw_image_param, tiling), 3); + setup_vec4_image_param(param + BRW_IMAGE_PARAM_SWIZZLING_OFFSET, + image_idx, + offsetof(brw_image_param, swizzling), 2); param += BRW_IMAGE_PARAM_SIZE; brw_mark_surface_used( @@ -167,14 +169,16 @@ brw_nir_setup_glsl_uniform(gl_shader_stage stage, nir_variable *var, for (unsigned s = 0; s < vector_count; s++) { unsigned i; for (i = 0; i < vector_size; i++) { - stage_prog_data->param[uniform_index++] = components++; + uint32_t idx = components - prog->sh.data->UniformDataSlots; + stage_prog_data->param[uniform_index++] = BRW_PARAM_UNIFORM(idx); + components++; } if (!is_scalar) { /* Pad out with zeros if needed (only needed for vec4) */ for (; i < max_vector_size; i++) { - static const gl_constant_value zero = { 0.0 }; - stage_prog_data->param[uniform_index++] = &zero; + stage_prog_data->param[uniform_index++] = + BRW_PARAM_BUILTIN_ZERO; } } } @@ -223,12 +227,9 @@ brw_nir_setup_arb_uniforms(nir_shader *shader, struct gl_program *prog, assert(plist->Parameters[p].Size <= 4); unsigned i; - for (i = 0; i < plist->Parameters[p].Size; i++) { - stage_prog_data->param[4 * p + i] = &plist->ParameterValues[p][i]; - } - for (; i < 4; i++) { - static const gl_constant_value zero = { 0.0 }; - stage_prog_data->param[4 * p + i] = &zero; - } + for (i = 0; i < plist->Parameters[p].Size; i++) + stage_prog_data->param[4 * p + i] = BRW_PARAM_PARAMETER(p, i); + for (; i < 4; i++) + stage_prog_data->param[4 * p + i] = BRW_PARAM_BUILTIN_ZERO; } } diff --git a/src/mesa/drivers/dri/i965/brw_program.h b/src/mesa/drivers/dri/i965/brw_program.h index c52193c691c..701b8da482e 100644 --- a/src/mesa/drivers/dri/i965/brw_program.h +++ b/src/mesa/drivers/dri/i965/brw_program.h @@ -33,6 +33,29 @@ extern "C" { struct brw_context; +enum brw_param_domain { + BRW_PARAM_DOMAIN_BUILTIN = 0, + BRW_PARAM_DOMAIN_PARAMETER, + BRW_PARAM_DOMAIN_UNIFORM, + BRW_PARAM_DOMAIN_IMAGE, +}; + +#define BRW_PARAM(domain, val) (BRW_PARAM_DOMAIN_##domain << 24 | (val)) +#define BRW_PARAM_DOMAIN(param) ((uint32_t)(param) >> 24) +#define BRW_PARAM_VALUE(param) ((uint32_t)(param) & 0x00ffffff) + +#define BRW_PARAM_PARAMETER(idx, comp) \ + BRW_PARAM(PARAMETER, ((idx) << 2) | (comp)) +#define BRW_PARAM_PARAMETER_IDX(param) (BRW_PARAM_VALUE(param) >> 2) +#define BRW_PARAM_PARAMETER_COMP(param) (BRW_PARAM_VALUE(param) & 0x3) + +#define BRW_PARAM_UNIFORM(idx) BRW_PARAM(UNIFORM, (idx)) +#define BRW_PARAM_UNIFORM_IDX(param) BRW_PARAM_VALUE(param) + +#define BRW_PARAM_IMAGE(idx, offset) BRW_PARAM(IMAGE, ((idx) << 8) | (offset)) +#define BRW_PARAM_IMAGE_IDX(value) (BRW_PARAM_VALUE(value) >> 8) +#define BRW_PARAM_IMAGE_OFFSET(value) (BRW_PARAM_VALUE(value) & 0xf) + struct nir_shader *brw_create_nir(struct brw_context *brw, const struct gl_shader_program *shader_prog, struct gl_program *prog, diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 42769b1deda..64acc1d9bf7 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -239,7 +239,7 @@ brw_populate_constant_data(struct brw_context *brw, const struct gl_program *prog, const struct brw_stage_prog_data *prog_data, void *dst, - const union gl_constant_value **param, + const uint32_t *param, unsigned nr_params); void brw_upload_pull_constants(struct brw_context *brw, diff --git a/src/mesa/drivers/dri/i965/brw_tcs.c b/src/mesa/drivers/dri/i965/brw_tcs.c index 474787b0ea6..272545453a4 100644 --- a/src/mesa/drivers/dri/i965/brw_tcs.c +++ b/src/mesa/drivers/dri/i965/brw_tcs.c @@ -188,10 +188,8 @@ brw_codegen_tcs_prog(struct brw_context *brw, struct brw_program *tcp, */ int param_count = nir->num_uniforms / 4; - prog_data.base.base.param = - rzalloc_array(NULL, const gl_constant_value *, param_count); - prog_data.base.base.pull_param = - rzalloc_array(NULL, const gl_constant_value *, param_count); + prog_data.base.base.param = rzalloc_array(NULL, uint32_t, param_count); + prog_data.base.base.pull_param = rzalloc_array(NULL, uint32_t, param_count); prog_data.base.base.nr_params = param_count; if (tcp) { @@ -211,26 +209,25 @@ brw_codegen_tcs_prog(struct brw_context *brw, struct brw_program *tcp, /* Upload the Patch URB Header as the first two uniforms. * Do the annoying scrambling so the shader doesn't have to. */ - const float **param = (const float **) prog_data.base.base.param; - static float zero = 0.0f; + uint32_t *param = prog_data.base.base.param; for (int i = 0; i < 8; i++) - param[i] = &zero; + param[i] = BRW_PARAM_BUILTIN_ZERO; if (key->tes_primitive_mode == GL_QUADS) { for (int i = 0; i < 4; i++) - param[7 - i] = &ctx->TessCtrlProgram.patch_default_outer_level[i]; + param[7 - i] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i; - param[3] = &ctx->TessCtrlProgram.patch_default_inner_level[0]; - param[2] = &ctx->TessCtrlProgram.patch_default_inner_level[1]; + param[3] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X; + param[2] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_Y; } else if (key->tes_primitive_mode == GL_TRIANGLES) { for (int i = 0; i < 3; i++) - param[7 - i] = &ctx->TessCtrlProgram.patch_default_outer_level[i]; + param[7 - i] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i; - param[4] = &ctx->TessCtrlProgram.patch_default_inner_level[0]; + param[4] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X; } else { assert(key->tes_primitive_mode == GL_ISOLINES); - param[7] = &ctx->TessCtrlProgram.patch_default_outer_level[1]; - param[6] = &ctx->TessCtrlProgram.patch_default_outer_level[0]; + param[7] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_Y; + param[6] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X; } } diff --git a/src/mesa/drivers/dri/i965/brw_tes.c b/src/mesa/drivers/dri/i965/brw_tes.c index 6564fefc06e..7ee925b0891 100644 --- a/src/mesa/drivers/dri/i965/brw_tes.c +++ b/src/mesa/drivers/dri/i965/brw_tes.c @@ -90,10 +90,8 @@ brw_codegen_tes_prog(struct brw_context *brw, */ int param_count = nir->num_uniforms / 4; - prog_data.base.base.param = - rzalloc_array(NULL, const gl_constant_value *, param_count); - prog_data.base.base.pull_param = - rzalloc_array(NULL, const gl_constant_value *, param_count); + prog_data.base.base.param = rzalloc_array(NULL, uint32_t, param_count); + prog_data.base.base.pull_param = rzalloc_array(NULL, uint32_t, param_count); prog_data.base.base.image_param = rzalloc_array(NULL, struct brw_image_param, tep->program.info.num_images); diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index 756e13db31f..9dd812e1981 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -191,10 +191,8 @@ brw_codegen_vs_prog(struct brw_context *brw, */ param_count += key->nr_userclip_plane_consts * 4; - stage_prog_data->param = - rzalloc_array(NULL, const gl_constant_value *, param_count); - stage_prog_data->pull_param = - rzalloc_array(NULL, const gl_constant_value *, param_count); + stage_prog_data->param = rzalloc_array(NULL, uint32_t, param_count); + stage_prog_data->pull_param = rzalloc_array(NULL, uint32_t, param_count); stage_prog_data->image_param = rzalloc_array(NULL, struct brw_image_param, stage_prog_data->nr_image_params); @@ -244,7 +242,6 @@ brw_codegen_vs_prog(struct brw_context *brw, char *error_str; program = brw_compile_vs(compiler, brw, mem_ctx, key, &prog_data, vp->program.nir, - brw_select_clip_planes(&brw->ctx), !_mesa_is_gles3(&brw->ctx), st_index, &program_size, &error_str); if (program == NULL) { diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index 3ab8e3930ae..ddafa526198 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -153,10 +153,8 @@ brw_codegen_wm_prog(struct brw_context *brw, prog_data.base.nr_image_params = fp->program.info.num_images; /* The backend also sometimes adds params for texture size. */ param_count += 2 * ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits; - prog_data.base.param = - rzalloc_array(NULL, const gl_constant_value *, param_count); - prog_data.base.pull_param = - rzalloc_array(NULL, const gl_constant_value *, param_count); + prog_data.base.param = rzalloc_array(NULL, uint32_t, param_count); + prog_data.base.pull_param = rzalloc_array(NULL, uint32_t, param_count); prog_data.base.image_param = rzalloc_array(NULL, struct brw_image_param, prog_data.base.nr_image_params); diff --git a/src/mesa/drivers/dri/i965/gen6_constant_state.c b/src/mesa/drivers/dri/i965/gen6_constant_state.c index b2e357fd9d9..93a12c706ff 100644 --- a/src/mesa/drivers/dri/i965/gen6_constant_state.c +++ b/src/mesa/drivers/dri/i965/gen6_constant_state.c @@ -24,21 +24,84 @@ #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" +#include "brw_program.h" #include "intel_batchbuffer.h" #include "intel_buffer_objects.h" #include "program/prog_parameter.h" +static uint32_t +f_as_u32(float f) +{ + return *(uint32_t *)&f; +} + +static uint32_t +brw_param_value(struct brw_context *brw, + const struct gl_program *prog, + const struct brw_stage_prog_data *prog_data, + uint32_t param) +{ + struct gl_context *ctx = &brw->ctx; + + switch (BRW_PARAM_DOMAIN(param)) { + case BRW_PARAM_DOMAIN_BUILTIN: + if (param == BRW_PARAM_BUILTIN_ZERO) { + return 0; + } else if (BRW_PARAM_BUILTIN_IS_CLIP_PLANE(param)) { + gl_clip_plane *clip_planes = brw_select_clip_planes(ctx); + unsigned idx = BRW_PARAM_BUILTIN_CLIP_PLANE_IDX(param); + unsigned comp = BRW_PARAM_BUILTIN_CLIP_PLANE_COMP(param); + return ((uint32_t *)clip_planes[idx])[comp]; + } else if (param >= BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X && + param <= BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_W) { + unsigned i = param - BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X; + return f_as_u32(ctx->TessCtrlProgram.patch_default_outer_level[i]); + } else if (param == BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X) { + return f_as_u32(ctx->TessCtrlProgram.patch_default_inner_level[0]); + } else if (param == BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_Y) { + return f_as_u32(ctx->TessCtrlProgram.patch_default_inner_level[1]); + } else { + unreachable("Invalid param builtin"); + } + + case BRW_PARAM_DOMAIN_PARAMETER: { + unsigned idx = BRW_PARAM_PARAMETER_IDX(param); + unsigned comp = BRW_PARAM_PARAMETER_COMP(param); + assert(idx < prog->Parameters->NumParameters); + return prog->Parameters->ParameterValues[idx][comp].u; + } + + case BRW_PARAM_DOMAIN_UNIFORM: { + unsigned idx = BRW_PARAM_UNIFORM_IDX(param); + assert(idx < prog->sh.data->NumUniformDataSlots); + return prog->sh.data->UniformDataSlots[idx].u; + } + + case BRW_PARAM_DOMAIN_IMAGE: { + unsigned idx = BRW_PARAM_IMAGE_IDX(param); + unsigned offset = BRW_PARAM_IMAGE_OFFSET(param); + assert(idx < prog_data->nr_image_params); + assert(offset < sizeof(struct brw_image_param)); + return ((uint32_t *)&prog_data->image_param[idx])[offset]; + } + + default: + unreachable("Invalid param domain"); + } +} + + void brw_populate_constant_data(struct brw_context *brw, const struct gl_program *prog, const struct brw_stage_prog_data *prog_data, void *void_dst, - const union gl_constant_value **param, + const uint32_t *param, unsigned nr_params) { - gl_constant_value *dst = void_dst; + uint32_t *dst = void_dst; for (unsigned i = 0; i < nr_params; i++) - dst[i] = *param[i]; + dst[i] = brw_param_value(brw, prog, prog_data, param[i]); } @@ -234,7 +297,7 @@ brw_upload_cs_push_constants(struct brw_context *brw, } - gl_constant_value *param = (gl_constant_value*) + uint32_t *param = brw_state_batch(brw, ALIGN(cs_prog_data->push.total.size, 64), 64, &stage_state->push_const_offset); assert(param); @@ -242,18 +305,18 @@ brw_upload_cs_push_constants(struct brw_context *brw, STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float)); if (cs_prog_data->push.cross_thread.size > 0) { - gl_constant_value *param_copy = param; + uint32_t *param_copy = param; assert(cs_prog_data->thread_local_id_index < 0 || cs_prog_data->thread_local_id_index >= cs_prog_data->push.cross_thread.dwords); for (unsigned i = 0; i < cs_prog_data->push.cross_thread.dwords; i++) { - param_copy[i] = *prog_data->param[i]; + param_copy[i] = brw_param_value(brw, prog, prog_data, + prog_data->param[i]); } } - gl_constant_value thread_id; if (cs_prog_data->push.per_thread.size > 0) { for (unsigned t = 0; t < cs_prog_data->threads; t++) { unsigned dst = @@ -261,11 +324,11 @@ brw_upload_cs_push_constants(struct brw_context *brw, cs_prog_data->push.cross_thread.regs); unsigned src = cs_prog_data->push.cross_thread.dwords; for ( ; src < prog_data->nr_params; src++, dst++) { - if (src != cs_prog_data->thread_local_id_index) - param[dst] = *prog_data->param[src]; - else { - thread_id.u = t * cs_prog_data->simd_size; - param[dst] = thread_id; + if (src != cs_prog_data->thread_local_id_index) { + param[dst] = brw_param_value(brw, prog, prog_data, + prog_data->param[src]); + } else { + param[dst] = t * cs_prog_data->simd_size; } } } |