diff options
author | Eric Anholt <[email protected]> | 2018-09-18 11:40:54 -0700 |
---|---|---|
committer | Eric Anholt <[email protected]> | 2018-10-30 10:46:52 -0700 |
commit | cc78676030d61df17ae2020d63757f69caec3cb5 (patch) | |
tree | 7e1486e18def00c97bbba65925b11888a97c15b3 /src/broadcom | |
parent | 8265dfaa87ef3b46288afdccae86221203c9b44e (diff) |
v3d: Split out NIR input setup between FS and VPM.
They don't share much code, and I'm about to rewrite the remaining shared
code for the VPM case.
Diffstat (limited to 'src/broadcom')
-rw-r--r-- | src/broadcom/compiler/nir_to_vir.c | 127 |
1 files changed, 80 insertions, 47 deletions
diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c index 714d76f58ba..9bcca9dfe71 100644 --- a/src/broadcom/compiler/nir_to_vir.c +++ b/src/broadcom/compiler/nir_to_vir.c @@ -1274,7 +1274,7 @@ ntq_emit_vpm_read(struct v3d_compile *c, } static void -ntq_setup_inputs(struct v3d_compile *c) +ntq_setup_vpm_inputs(struct v3d_compile *c) { unsigned num_entries = 0; unsigned num_components = 0; @@ -1297,24 +1297,22 @@ ntq_setup_inputs(struct v3d_compile *c) qsort(&vars, num_entries, sizeof(*vars), driver_location_compare); uint32_t vpm_components_queued = 0; - if (c->s->info.stage == MESA_SHADER_VERTEX) { - bool uses_iid = c->s->info.system_values_read & - (1ull << SYSTEM_VALUE_INSTANCE_ID); - bool uses_vid = c->s->info.system_values_read & - (1ull << SYSTEM_VALUE_VERTEX_ID); - - num_components += uses_iid; - num_components += uses_vid; - - if (uses_iid) { - c->iid = ntq_emit_vpm_read(c, &vpm_components_queued, - &num_components, ~0); - } + bool uses_iid = c->s->info.system_values_read & + (1ull << SYSTEM_VALUE_INSTANCE_ID); + bool uses_vid = c->s->info.system_values_read & + (1ull << SYSTEM_VALUE_VERTEX_ID); - if (uses_vid) { - c->vid = ntq_emit_vpm_read(c, &vpm_components_queued, - &num_components, ~0); - } + num_components += uses_iid; + num_components += uses_vid; + + if (uses_iid) { + c->iid = ntq_emit_vpm_read(c, &vpm_components_queued, + &num_components, ~0); + } + + if (uses_vid) { + c->vid = ntq_emit_vpm_read(c, &vpm_components_queued, + &num_components, ~0); } for (unsigned i = 0; i < num_entries; i++) { @@ -1327,40 +1325,71 @@ ntq_setup_inputs(struct v3d_compile *c) resize_qreg_array(c, &c->inputs, &c->inputs_array_size, (loc + 1) * 4); - if (c->s->info.stage == MESA_SHADER_FRAGMENT) { - if (var->data.location == VARYING_SLOT_POS) { - emit_fragcoord_input(c, loc); - } else if (var->data.location == VARYING_SLOT_PNTC || - (var->data.location >= VARYING_SLOT_VAR0 && - (c->fs_key->point_sprite_mask & - (1 << (var->data.location - - VARYING_SLOT_VAR0))))) { - c->inputs[loc * 4 + 0] = c->point_x; - c->inputs[loc * 4 + 1] = c->point_y; - } else { - emit_fragment_input(c, loc, var); - } - } else { - int var_components = glsl_get_components(var->type); + int var_components = glsl_get_components(var->type); - for (int i = 0; i < var_components; i++) { - c->inputs[loc * 4 + i] = - ntq_emit_vpm_read(c, - &vpm_components_queued, - &num_components, - loc * 4 + i); + for (int i = 0; i < var_components; i++) { + c->inputs[loc * 4 + i] = + ntq_emit_vpm_read(c, + &vpm_components_queued, + &num_components, + loc * 4 + i); - } - c->vattr_sizes[loc] = var_components; } + c->vattr_sizes[loc] = var_components; } - if (c->s->info.stage == MESA_SHADER_VERTEX) { - if (c->devinfo->ver >= 40) { - assert(vpm_components_queued == num_components); + if (c->devinfo->ver >= 40) { + assert(vpm_components_queued == num_components); + } else { + assert(vpm_components_queued == 0); + assert(num_components == 0); + } +} + +static void +ntq_setup_fs_inputs(struct v3d_compile *c) +{ + unsigned num_entries = 0; + unsigned num_components = 0; + nir_foreach_variable(var, &c->s->inputs) { + num_entries++; + num_components += glsl_get_components(var->type); + } + + nir_variable *vars[num_entries]; + + unsigned i = 0; + nir_foreach_variable(var, &c->s->inputs) + vars[i++] = var; + + /* Sort the variables so that we emit the input setup in + * driver_location order. This is required for VPM reads, whose data + * is fetched into the VPM in driver_location (TGSI register index) + * order. + */ + qsort(&vars, num_entries, sizeof(*vars), driver_location_compare); + + for (unsigned i = 0; i < num_entries; i++) { + nir_variable *var = vars[i]; + unsigned array_len = MAX2(glsl_get_length(var->type), 1); + unsigned loc = var->data.driver_location; + + assert(array_len == 1); + (void)array_len; + resize_qreg_array(c, &c->inputs, &c->inputs_array_size, + (loc + 1) * 4); + + if (var->data.location == VARYING_SLOT_POS) { + emit_fragcoord_input(c, loc); + } else if (var->data.location == VARYING_SLOT_PNTC || + (var->data.location >= VARYING_SLOT_VAR0 && + (c->fs_key->point_sprite_mask & + (1 << (var->data.location - + VARYING_SLOT_VAR0))))) { + c->inputs[loc * 4 + 0] = c->point_x; + c->inputs[loc * 4 + 1] = c->point_y; } else { - assert(vpm_components_queued == 0); - assert(num_components == 0); + emit_fragment_input(c, loc, var); } } } @@ -1903,7 +1932,11 @@ nir_to_vir(struct v3d_compile *c) } } - ntq_setup_inputs(c); + if (c->s->info.stage == MESA_SHADER_FRAGMENT) + ntq_setup_fs_inputs(c); + else + ntq_setup_vpm_inputs(c); + ntq_setup_outputs(c); ntq_setup_uniforms(c); ntq_setup_registers(c, &c->s->registers); |