summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/broadcom/common/v3d_limits.h5
-rw-r--r--src/broadcom/compiler/nir_to_vir.c175
-rw-r--r--src/broadcom/compiler/v3d_compiler.h33
-rw-r--r--src/broadcom/compiler/v3d_nir_lower_io.c357
-rw-r--r--src/broadcom/compiler/vir.c119
-rw-r--r--src/gallium/drivers/v3d/v3d_context.h30
-rw-r--r--src/gallium/drivers/v3d/v3d_program.c169
7 files changed, 778 insertions, 110 deletions
diff --git a/src/broadcom/common/v3d_limits.h b/src/broadcom/common/v3d_limits.h
index d65edddab74..e02582035f1 100644
--- a/src/broadcom/common/v3d_limits.h
+++ b/src/broadcom/common/v3d_limits.h
@@ -30,8 +30,11 @@
#define V3D_CHANNELS 16
#define V3D_MAX_FS_INPUTS 64
+#define V3D_MAX_GS_INPUTS 64
#define V3D_MAX_VS_INPUTS 64
-#define V3D_MAX_ANY_STAGE_INPUTS MAX2(V3D_MAX_VS_INPUTS, V3D_MAX_FS_INPUTS)
+#define V3D_MAX_ANY_STAGE_INPUTS MAX3(V3D_MAX_VS_INPUTS, \
+ V3D_MAX_GS_INPUTS, \
+ V3D_MAX_FS_INPUTS)
/* Not specifically a hardware limit, just coordination between compiler and
* driver.
diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c
index 6b566c68e07..d7bef12fef9 100644
--- a/src/broadcom/compiler/nir_to_vir.c
+++ b/src/broadcom/compiler/nir_to_vir.c
@@ -1367,11 +1367,20 @@ emit_frag_end(struct v3d_compile *c)
vir_emit_tlb_color_write(c, rt);
}
+static inline void
+vir_VPM_WRITE_indirect(struct v3d_compile *c,
+ struct qreg val,
+ struct qreg vpm_index)
+{
+ assert(c->devinfo->ver >= 40);
+ vir_STVPMV(c, vpm_index, val);
+}
+
static void
vir_VPM_WRITE(struct v3d_compile *c, struct qreg val, uint32_t vpm_index)
{
if (c->devinfo->ver >= 40) {
- vir_STVPMV(c, vir_uniform_ui(c, vpm_index), val);
+ vir_VPM_WRITE_indirect(c, val, vir_uniform_ui(c, vpm_index));
} else {
/* XXX: v3d33_vir_vpm_write_setup(c); */
vir_MOV_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_VPM), val);
@@ -1387,6 +1396,15 @@ emit_vert_end(struct v3d_compile *c)
vir_VPMWT(c);
}
+static void
+emit_geom_end(struct v3d_compile *c)
+{
+ /* GFXH-1684: VPM writes need to be complete by the end of the shader.
+ */
+ if (c->devinfo->ver >= 40 && c->devinfo->ver <= 42)
+ vir_VPMWT(c);
+}
+
void
v3d_optimize_nir(struct nir_shader *s)
{
@@ -1474,7 +1492,7 @@ ntq_emit_vpm_read(struct v3d_compile *c,
}
static void
-ntq_setup_vpm_inputs(struct v3d_compile *c)
+ntq_setup_vs_inputs(struct v3d_compile *c)
{
/* Figure out how many components of each vertex attribute the shader
* uses. Each variable should have been split to individual
@@ -1565,24 +1583,69 @@ program_reads_point_coord(struct v3d_compile *c)
}
static void
-ntq_setup_fs_inputs(struct v3d_compile *c)
+get_sorted_input_variables(struct v3d_compile *c,
+ unsigned *num_entries,
+ nir_variable ***vars)
{
- unsigned num_entries = 0;
+ *num_entries = 0;
nir_foreach_variable(var, &c->s->inputs)
- num_entries++;
+ (*num_entries)++;
- nir_variable *vars[num_entries];
+ *vars = ralloc_array(c, nir_variable *, *num_entries);
unsigned i = 0;
nir_foreach_variable(var, &c->s->inputs)
- vars[i++] = var;
+ (*vars)[i++] = var;
/* Sort the variables so that we emit the input setup in
* driver_location order. This is required for VPM reads, whose data
* is fetched into the VPM in driver_location (TGSI register index)
* order.
*/
- qsort(&vars, num_entries, sizeof(*vars), driver_location_compare);
+ qsort(*vars, *num_entries, sizeof(**vars), driver_location_compare);
+}
+
+static void
+ntq_setup_gs_inputs(struct v3d_compile *c)
+{
+ nir_variable **vars;
+ unsigned num_entries;
+ get_sorted_input_variables(c, &num_entries, &vars);
+
+ for (unsigned i = 0; i < num_entries; i++) {
+ nir_variable *var = vars[i];
+
+ /* All GS inputs are arrays with as many entries as vertices
+ * in the input primitive, but here we only care about the
+ * per-vertex input type.
+ */
+ const struct glsl_type *type = glsl_without_array(var->type);
+ unsigned array_len = MAX2(glsl_get_length(type), 1);
+ unsigned loc = var->data.driver_location;
+
+ resize_qreg_array(c, &c->inputs, &c->inputs_array_size,
+ (loc + array_len) * 4);
+
+ for (unsigned j = 0; j < array_len; j++) {
+ unsigned num_elements = glsl_get_vector_elements(type);
+ for (unsigned k = 0; k < num_elements; k++) {
+ unsigned chan = var->data.location_frac + k;
+ unsigned input_idx = c->num_inputs++;
+ struct v3d_varying_slot slot =
+ v3d_slot_from_slot_and_component(var->data.location + j, chan);
+ c->input_slots[input_idx] = slot;
+ }
+ }
+ }
+}
+
+
+static void
+ntq_setup_fs_inputs(struct v3d_compile *c)
+{
+ nir_variable **vars;
+ unsigned num_entries;
+ get_sorted_input_variables(c, &num_entries, &vars);
for (unsigned i = 0; i < num_entries; i++) {
nir_variable *var = vars[i];
@@ -1949,6 +2012,40 @@ ntq_emit_color_write(struct v3d_compile *c,
}
static void
+emit_store_output_gs(struct v3d_compile *c, nir_intrinsic_instr *instr)
+{
+ assert(instr->num_components == 1);
+
+ uint32_t base_offset = nir_intrinsic_base(instr);
+ struct qreg src_offset = ntq_get_src(c, instr->src[1], 0);
+ struct qreg offset =
+ vir_ADD(c, vir_uniform_ui(c, base_offset), src_offset);
+
+ vir_VPM_WRITE_indirect(c, ntq_get_src(c, instr->src[0], 0), offset);
+}
+
+static void
+ntq_emit_store_output(struct v3d_compile *c, nir_intrinsic_instr *instr)
+{
+ /* XXX perf: Use stvpmv with uniform non-constant offsets and
+ * stvpmd with non-uniform offsets and enable
+ * PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR.
+ */
+ if (c->s->info.stage == MESA_SHADER_FRAGMENT) {
+ ntq_emit_color_write(c, instr);
+ } else if (c->s->info.stage == MESA_SHADER_GEOMETRY) {
+ emit_store_output_gs(c, instr);
+ } else {
+ assert(c->s->info.stage == MESA_SHADER_VERTEX);
+ assert(instr->num_components == 1);
+
+ vir_VPM_WRITE(c,
+ ntq_get_src(c, instr->src[0], 0),
+ nir_intrinsic_base(instr));
+ }
+}
+
+static void
ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
{
switch (instr->intrinsic) {
@@ -2090,19 +2187,7 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
break;
case nir_intrinsic_store_output:
- /* XXX perf: Use stvpmv with uniform non-constant offsets and
- * stvpmd with non-uniform offsets and enable
- * PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR.
- */
- if (c->s->info.stage == MESA_SHADER_FRAGMENT) {
- ntq_emit_color_write(c, instr);
- } else {
- assert(instr->num_components == 1);
-
- vir_VPM_WRITE(c,
- ntq_get_src(c, instr->src[0], 0),
- nir_intrinsic_base(instr));
- }
+ ntq_emit_store_output(c, instr);
break;
case nir_intrinsic_image_deref_size:
@@ -2214,6 +2299,34 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
ntq_store_dest(c, &instr->dest, 0, vir_EIDX(c));
break;
+ case nir_intrinsic_load_per_vertex_input: {
+ /* col: vertex index, row = varying index */
+ struct qreg col = ntq_get_src(c, instr->src[0], 0);
+ uint32_t row_idx = nir_intrinsic_base(instr) * 4 +
+ nir_intrinsic_component(instr);
+ for (int i = 0; i < instr->num_components; i++) {
+ struct qreg row = vir_uniform_ui(c, row_idx++);
+ ntq_store_dest(c, &instr->dest, i,
+ vir_LDVPMG_IN(c, row, col));
+ }
+ break;
+ }
+
+ case nir_intrinsic_emit_vertex:
+ case nir_intrinsic_end_primitive:
+ unreachable("Should have been lowered in v3d_nir_lower_io");
+ break;
+
+ case nir_intrinsic_load_primitive_id: {
+ /* gl_PrimitiveIdIn is written by the GBG in the first word of
+ * VPM output header. According to docs, we should read this
+ * using ldvpm(v,d)_in (See Table 71).
+ */
+ ntq_store_dest(c, &instr->dest, 0,
+ vir_LDVPMV_IN(c, vir_uniform_ui(c, 0)));
+ break;
+ }
+
default:
fprintf(stderr, "Unknown intrinsic: ");
nir_print_instr(&instr->instr, stderr);
@@ -2636,10 +2749,21 @@ nir_to_vir(struct v3d_compile *c)
c->spill_size += V3D_CHANNELS * c->s->scratch_size;
}
- if (c->s->info.stage == MESA_SHADER_FRAGMENT)
+ switch (c->s->info.stage) {
+ case MESA_SHADER_VERTEX:
+ ntq_setup_vs_inputs(c);
+ break;
+ case MESA_SHADER_GEOMETRY:
+ ntq_setup_gs_inputs(c);
+ break;
+ case MESA_SHADER_FRAGMENT:
ntq_setup_fs_inputs(c);
- else
- ntq_setup_vpm_inputs(c);
+ break;
+ case MESA_SHADER_COMPUTE:
+ break;
+ default:
+ unreachable("unsupported shader stage");
+ }
ntq_setup_outputs(c);
@@ -2785,6 +2909,9 @@ v3d_nir_to_vir(struct v3d_compile *c)
case MESA_SHADER_FRAGMENT:
emit_frag_end(c);
break;
+ case MESA_SHADER_GEOMETRY:
+ emit_geom_end(c);
+ break;
case MESA_SHADER_VERTEX:
emit_vert_end(c);
break;
diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h
index 29057bdf4df..9b08e4a270e 100644
--- a/src/broadcom/compiler/v3d_compiler.h
+++ b/src/broadcom/compiler/v3d_compiler.h
@@ -329,6 +329,7 @@ struct v3d_key {
bool clamp_r:1;
} tex[V3D_MAX_TEXTURE_SAMPLERS];
uint8_t ucp_enables;
+ bool is_last_geometry_stage;
};
struct v3d_fs_key {
@@ -371,6 +372,16 @@ struct v3d_fs_key {
struct pipe_rt_blend_state blend;
};
+struct v3d_gs_key {
+ struct v3d_key base;
+
+ struct v3d_varying_slot used_outputs[V3D_MAX_FS_INPUTS];
+ uint8_t num_used_outputs;
+
+ bool is_coord;
+ bool per_vertex_point_size;
+};
+
struct v3d_vs_key {
struct v3d_key base;
@@ -552,6 +563,7 @@ struct v3d_compile {
int local_invocation_index_bits;
uint8_t vattr_sizes[V3D_MAX_VS_INPUTS / 4];
+ uint8_t gs_input_sizes[V3D_MAX_GS_INPUTS];
uint32_t vpm_output_size;
/* Size in bytes of registers that have been spilled. This is how much
@@ -586,6 +598,7 @@ struct v3d_compile {
struct pipe_shader_state *shader_state;
struct v3d_key *key;
struct v3d_fs_key *fs_key;
+ struct v3d_gs_key *gs_key;
struct v3d_vs_key *vs_key;
/* Live ranges of temps. */
@@ -687,6 +700,26 @@ struct v3d_vs_prog_data {
uint8_t vcm_cache_size;
};
+struct v3d_gs_prog_data {
+ struct v3d_prog_data base;
+
+ /* Whether the program reads gl_PrimitiveIDIn */
+ bool uses_pid;
+
+ /* Number of components read from each input varying. */
+ uint8_t input_sizes[V3D_MAX_GS_INPUTS / 4];
+
+ /* Number of inputs */
+ uint8_t num_inputs;
+ struct v3d_varying_slot input_slots[V3D_MAX_GS_INPUTS];
+
+ /* Total number of components written, for the shader state record. */
+ uint32_t vpm_output_size;
+
+ /* Output primitive type */
+ uint8_t out_prim_type;
+};
+
struct v3d_fs_prog_data {
struct v3d_prog_data base;
diff --git a/src/broadcom/compiler/v3d_nir_lower_io.c b/src/broadcom/compiler/v3d_nir_lower_io.c
index 3145c560a14..3c9279a2fee 100644
--- a/src/broadcom/compiler/v3d_nir_lower_io.c
+++ b/src/broadcom/compiler/v3d_nir_lower_io.c
@@ -45,22 +45,46 @@ struct v3d_nir_lower_io_state {
int psiz_vpm_offset;
int varyings_vpm_offset;
+ /* Geometry shader state */
+ struct {
+ /* VPM offset for the current vertex data output */
+ nir_variable *output_offset_var;
+ /* VPM offset for the current vertex header */
+ nir_variable *header_offset_var;
+ /* VPM header for the current vertex */
+ nir_variable *header_var;
+
+ /* Size of the complete VPM output header */
+ uint32_t output_header_size;
+ /* Size of the output data for a single vertex */
+ uint32_t output_vertex_data_size;
+ } gs;
+
BITSET_WORD varyings_stored[BITSET_WORDS(V3D_MAX_ANY_STAGE_INPUTS)];
nir_ssa_def *pos[4];
};
static void
-v3d_nir_store_output(nir_builder *b, int base, nir_ssa_def *chan)
+v3d_nir_emit_ff_vpm_outputs(struct v3d_compile *c, nir_builder *b,
+ struct v3d_nir_lower_io_state *state);
+
+static void
+v3d_nir_store_output(nir_builder *b, int base, nir_ssa_def *offset,
+ nir_ssa_def *chan)
{
nir_intrinsic_instr *intr =
- nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_output);
+ nir_intrinsic_instr_create(b->shader,
+ nir_intrinsic_store_output);
nir_ssa_dest_init(&intr->instr, &intr->dest,
1, intr->dest.ssa.bit_size, NULL);
intr->num_components = 1;
intr->src[0] = nir_src_for_ssa(chan);
- intr->src[1] = nir_src_for_ssa(nir_imm_int(b, 0));
+ if (offset)
+ intr->src[1] = nir_src_for_ssa(offset);
+ else
+ intr->src[1] = nir_src_for_ssa(nir_imm_int(b, 0));
nir_intrinsic_set_base(intr, base);
nir_intrinsic_set_write_mask(intr, 0x1);
@@ -91,8 +115,23 @@ v3d_varying_slot_vpm_offset(struct v3d_compile *c, nir_variable *var, int chan)
{
int component = var->data.location_frac + chan;
- for (int i = 0; i < c->vs_key->num_used_outputs; i++) {
- struct v3d_varying_slot slot = c->vs_key->used_outputs[i];
+ uint32_t num_used_outputs = 0;
+ struct v3d_varying_slot *used_outputs = NULL;
+ switch (c->s->info.stage) {
+ case MESA_SHADER_VERTEX:
+ num_used_outputs = c->vs_key->num_used_outputs;
+ used_outputs = c->vs_key->used_outputs;
+ break;
+ case MESA_SHADER_GEOMETRY:
+ num_used_outputs = c->gs_key->num_used_outputs;
+ used_outputs = c->gs_key->used_outputs;
+ break;
+ default:
+ unreachable("Unsupported shader stage");
+ }
+
+ for (int i = 0; i < num_used_outputs; i++) {
+ struct v3d_varying_slot slot = used_outputs[i];
if (v3d_slot_get_slot(slot) == var->data.location &&
v3d_slot_get_component(slot) == component) {
@@ -105,6 +144,9 @@ v3d_varying_slot_vpm_offset(struct v3d_compile *c, nir_variable *var, int chan)
/* Lowers a store_output(gallium driver location) to a series of store_outputs
* with a driver_location equal to the offset in the VPM.
+ *
+ * For geometry shaders we need to emit multiple vertices so the VPM offsets
+ * need to be computed in the shader code based on the current vertex index.
*/
static void
v3d_nir_lower_vpm_output(struct v3d_compile *c, nir_builder *b,
@@ -113,6 +155,13 @@ v3d_nir_lower_vpm_output(struct v3d_compile *c, nir_builder *b,
{
b->cursor = nir_before_instr(&intr->instr);
+ /* If this is a geometry shader we need to emit our outputs
+ * to the current vertex offset in the VPM.
+ */
+ nir_ssa_def *offset_reg =
+ c->s->info.stage == MESA_SHADER_GEOMETRY ?
+ nir_load_var(b, state->gs.output_offset_var) : NULL;
+
int start_comp = nir_intrinsic_component(intr);
nir_ssa_def *src = nir_ssa_for_src(b, intr->src[0],
intr->num_components);
@@ -141,7 +190,7 @@ v3d_nir_lower_vpm_output(struct v3d_compile *c, nir_builder *b,
/* Just psiz to the position in the FF header right now. */
if (var->data.location == VARYING_SLOT_PSIZ &&
state->psiz_vpm_offset != -1) {
- v3d_nir_store_output(b, state->psiz_vpm_offset, src);
+ v3d_nir_store_output(b, state->psiz_vpm_offset, offset_reg, src);
}
/* Scalarize outputs if it hasn't happened already, since we want to
@@ -161,12 +210,73 @@ v3d_nir_lower_vpm_output(struct v3d_compile *c, nir_builder *b,
BITSET_SET(state->varyings_stored, vpm_offset);
v3d_nir_store_output(b, state->varyings_vpm_offset + vpm_offset,
- nir_channel(b, src, i));
+ offset_reg, nir_channel(b, src, i));
}
nir_instr_remove(&intr->instr);
}
+static inline void
+reset_gs_header(nir_builder *b, struct v3d_nir_lower_io_state *state)
+{
+ const uint8_t NEW_PRIMITIVE_OFFSET = 0;
+ const uint8_t VERTEX_DATA_LENGTH_OFFSET = 8;
+
+ uint32_t vertex_data_size = state->gs.output_vertex_data_size;
+ assert((vertex_data_size & 0xffffff00) == 0);
+
+ uint32_t header;
+ header = 1 << NEW_PRIMITIVE_OFFSET;
+ header |= vertex_data_size << VERTEX_DATA_LENGTH_OFFSET;
+ nir_store_var(b, state->gs.header_var, nir_imm_int(b, header), 0x1);
+}
+
+static void
+v3d_nir_lower_emit_vertex(struct v3d_compile *c, nir_builder *b,
+ nir_intrinsic_instr *instr,
+ struct v3d_nir_lower_io_state *state)
+{
+ b->cursor = nir_before_instr(&instr->instr);
+
+ nir_ssa_def *header = nir_load_var(b, state->gs.header_var);
+ nir_ssa_def *header_offset = nir_load_var(b, state->gs.header_offset_var);
+ nir_ssa_def *output_offset = nir_load_var(b, state->gs.output_offset_var);
+
+ /* Emit fixed function outputs */
+ v3d_nir_emit_ff_vpm_outputs(c, b, state);
+
+ /* Emit vertex header */
+ v3d_nir_store_output(b, 0, header_offset, header);
+
+ /* Update VPM offset for next vertex output data and header */
+ output_offset =
+ nir_iadd(b, output_offset,
+ nir_imm_int(b, state->gs.output_vertex_data_size));
+
+ header_offset = nir_iadd(b, header_offset, nir_imm_int(b, 1));
+
+ /* Reset the New Primitive bit */
+ header = nir_iand(b, header, nir_imm_int(b, 0xfffffffe));
+
+ nir_store_var(b, state->gs.output_offset_var, output_offset, 0x1);
+ nir_store_var(b, state->gs.header_offset_var, header_offset, 0x1);
+ nir_store_var(b, state->gs.header_var, header, 0x1);
+
+ nir_instr_remove(&instr->instr);
+}
+
+static void
+v3d_nir_lower_end_primitive(struct v3d_compile *c, nir_builder *b,
+ nir_intrinsic_instr *instr,
+ struct v3d_nir_lower_io_state *state)
+{
+ assert(state->gs.header_var);
+ b->cursor = nir_before_instr(&instr->instr);
+ reset_gs_header(b, state);
+
+ nir_instr_remove(&instr->instr);
+}
+
static void
v3d_nir_lower_io_instr(struct v3d_compile *c, nir_builder *b,
struct nir_instr *instr,
@@ -182,8 +292,18 @@ v3d_nir_lower_io_instr(struct v3d_compile *c, nir_builder *b,
break;
case nir_intrinsic_store_output:
- if (c->s->info.stage == MESA_SHADER_VERTEX)
+ if (c->s->info.stage == MESA_SHADER_VERTEX ||
+ c->s->info.stage == MESA_SHADER_GEOMETRY) {
v3d_nir_lower_vpm_output(c, b, intr, state);
+ }
+ break;
+
+ case nir_intrinsic_emit_vertex:
+ v3d_nir_lower_emit_vertex(c, b, intr, state);
+ break;
+
+ case nir_intrinsic_end_primitive:
+ v3d_nir_lower_end_primitive(c, b, intr, state);
break;
default:
@@ -226,12 +346,64 @@ v3d_nir_lower_io_update_output_var_base(struct v3d_compile *c,
}
static void
-v3d_nir_setup_vpm_layout(struct v3d_compile *c,
- struct v3d_nir_lower_io_state *state)
+v3d_nir_setup_vpm_layout_vs(struct v3d_compile *c,
+ struct v3d_nir_lower_io_state *state)
{
uint32_t vpm_offset = 0;
- if (c->vs_key->is_coord) {
+ state->pos_vpm_offset = -1;
+ state->vp_vpm_offset = -1;
+ state->zs_vpm_offset = -1;
+ state->rcp_wc_vpm_offset = -1;
+ state->psiz_vpm_offset = -1;
+
+ bool needs_ff_outputs = c->vs_key->base.is_last_geometry_stage;
+ if (needs_ff_outputs) {
+ if (c->vs_key->is_coord) {
+ state->pos_vpm_offset = vpm_offset;
+ vpm_offset += 4;
+ }
+
+ state->vp_vpm_offset = vpm_offset;
+ vpm_offset += 2;
+
+ if (!c->vs_key->is_coord) {
+ state->zs_vpm_offset = vpm_offset++;
+ state->rcp_wc_vpm_offset = vpm_offset++;
+ }
+
+ if (c->vs_key->per_vertex_point_size)
+ state->psiz_vpm_offset = vpm_offset++;
+ }
+
+ state->varyings_vpm_offset = vpm_offset;
+
+ c->vpm_output_size = vpm_offset + c->vs_key->num_used_outputs;
+}
+
+static void
+v3d_nir_setup_vpm_layout_gs(struct v3d_compile *c,
+ struct v3d_nir_lower_io_state *state)
+{
+ /* 1 header slot for number of output vertices */
+ uint32_t vpm_offset = 1;
+
+ /* 1 header slot per output vertex */
+ const uint32_t num_vertices = c->s->info.gs.vertices_out;
+ vpm_offset += num_vertices;
+
+ state->gs.output_header_size = vpm_offset;
+
+ /* Vertex data: here we only compute offsets into a generic vertex data
+ * elements. When it is time to actually write a particular vertex to
+ * the VPM, we will add the offset for that vertex into the VPM output
+ * to these offsets.
+ *
+ * If geometry shaders are present, they are always the last shader
+ * stage before rasterization, so we always emit fixed function outputs.
+ */
+ vpm_offset = 0;
+ if (c->gs_key->is_coord) {
state->pos_vpm_offset = vpm_offset;
vpm_offset += 4;
} else {
@@ -241,7 +413,7 @@ v3d_nir_setup_vpm_layout(struct v3d_compile *c,
state->vp_vpm_offset = vpm_offset;
vpm_offset += 2;
- if (!c->vs_key->is_coord) {
+ if (!c->gs_key->is_coord) {
state->zs_vpm_offset = vpm_offset++;
state->rcp_wc_vpm_offset = vpm_offset++;
} else {
@@ -249,20 +421,34 @@ v3d_nir_setup_vpm_layout(struct v3d_compile *c,
state->rcp_wc_vpm_offset = -1;
}
- if (c->vs_key->per_vertex_point_size)
+ /* Mesa enables OES_geometry_shader_point_size automatically with
+ * OES_geometry_shader so we always need to handle point size
+ * writes if present.
+ */
+ if (c->gs_key->per_vertex_point_size)
state->psiz_vpm_offset = vpm_offset++;
- else
- state->psiz_vpm_offset = -1;
state->varyings_vpm_offset = vpm_offset;
- c->vpm_output_size = vpm_offset + c->vs_key->num_used_outputs;
+ state->gs.output_vertex_data_size =
+ state->varyings_vpm_offset + c->gs_key->num_used_outputs;
+
+ c->vpm_output_size =
+ state->gs.output_header_size +
+ state->gs.output_vertex_data_size * num_vertices;
}
static void
v3d_nir_emit_ff_vpm_outputs(struct v3d_compile *c, nir_builder *b,
struct v3d_nir_lower_io_state *state)
{
+ /* If this is a geometry shader we need to emit our fixed function
+ * outputs to the current vertex offset in the VPM.
+ */
+ nir_ssa_def *offset_reg =
+ c->s->info.stage == MESA_SHADER_GEOMETRY ?
+ nir_load_var(b, state->gs.output_offset_var) : NULL;
+
for (int i = 0; i < 4; i++) {
if (!state->pos[i])
state->pos[i] = nir_ssa_undef(b, 1, 32);
@@ -273,23 +459,25 @@ v3d_nir_emit_ff_vpm_outputs(struct v3d_compile *c, nir_builder *b,
if (state->pos_vpm_offset != -1) {
for (int i = 0; i < 4; i++) {
v3d_nir_store_output(b, state->pos_vpm_offset + i,
- state->pos[i]);
+ offset_reg, state->pos[i]);
}
}
- for (int i = 0; i < 2; i++) {
- nir_ssa_def *pos;
- nir_ssa_def *scale;
- pos = state->pos[i];
- if (i == 0)
- scale = nir_load_viewport_x_scale(b);
- else
- scale = nir_load_viewport_y_scale(b);
- pos = nir_fmul(b, pos, scale);
- pos = nir_fmul(b, pos, rcp_wc);
- pos = nir_f2i32(b, nir_fround_even(b, pos));
- v3d_nir_store_output(b, state->vp_vpm_offset + i,
- pos);
+ if (state->vp_vpm_offset != -1) {
+ for (int i = 0; i < 2; i++) {
+ nir_ssa_def *pos;
+ nir_ssa_def *scale;
+ pos = state->pos[i];
+ if (i == 0)
+ scale = nir_load_viewport_x_scale(b);
+ else
+ scale = nir_load_viewport_y_scale(b);
+ pos = nir_fmul(b, pos, scale);
+ pos = nir_fmul(b, pos, rcp_wc);
+ pos = nir_f2i32(b, nir_fround_even(b, pos));
+ v3d_nir_store_output(b, state->vp_vpm_offset + i,
+ offset_reg, pos);
+ }
}
if (state->zs_vpm_offset != -1) {
@@ -297,38 +485,118 @@ v3d_nir_emit_ff_vpm_outputs(struct v3d_compile *c, nir_builder *b,
z = nir_fmul(b, z, nir_load_viewport_z_scale(b));
z = nir_fmul(b, z, rcp_wc);
z = nir_fadd(b, z, nir_load_viewport_z_offset(b));
- v3d_nir_store_output(b, state->zs_vpm_offset, z);
+ v3d_nir_store_output(b, state->zs_vpm_offset, offset_reg, z);
}
- if (state->rcp_wc_vpm_offset != -1)
- v3d_nir_store_output(b, state->rcp_wc_vpm_offset, rcp_wc);
+ if (state->rcp_wc_vpm_offset != -1) {
+ v3d_nir_store_output(b, state->rcp_wc_vpm_offset,
+ offset_reg, rcp_wc);
+ }
- /* Store 0 to varyings requested by the FS but not stored in the VS.
- * This should be undefined behavior, but glsl-routing seems to rely
- * on it.
+ /* Store 0 to varyings requested by the FS but not stored by the
+ * previous stage. This should be undefined behavior, but
+ * glsl-routing seems to rely on it.
*/
- for (int i = 0; i < c->vs_key->num_used_outputs; i++) {
+ uint32_t num_used_outputs;
+ switch (c->s->info.stage) {
+ case MESA_SHADER_VERTEX:
+ num_used_outputs = c->vs_key->num_used_outputs;
+ break;
+ case MESA_SHADER_GEOMETRY:
+ num_used_outputs = c->gs_key->num_used_outputs;
+ break;
+ default:
+ unreachable("Unsupported shader stage");
+ }
+
+ for (int i = 0; i < num_used_outputs; i++) {
if (!BITSET_TEST(state->varyings_stored, i)) {
v3d_nir_store_output(b, state->varyings_vpm_offset + i,
- nir_imm_int(b, 0));
+ offset_reg, nir_imm_int(b, 0));
}
}
}
+static void
+emit_gs_prolog(struct v3d_compile *c, nir_builder *b,
+ nir_function_impl *impl,
+ struct v3d_nir_lower_io_state *state)
+{
+ nir_block *first = nir_start_block(impl);
+ b->cursor = nir_before_block(first);
+
+ const struct glsl_type *uint_type = glsl_uint_type();
+
+ assert(!state->gs.output_offset_var);
+ state->gs.output_offset_var =
+ nir_local_variable_create(impl, uint_type, "output_offset");
+ nir_store_var(b, state->gs.output_offset_var,
+ nir_imm_int(b, state->gs.output_header_size), 0x1);
+
+ assert(!state->gs.header_offset_var);
+ state->gs.header_offset_var =
+ nir_local_variable_create(impl, uint_type, "header_offset");
+ nir_store_var(b, state->gs.header_offset_var, nir_imm_int(b, 1), 0x1);
+
+ assert(!state->gs.header_var);
+ state->gs.header_var =
+ nir_local_variable_create(impl, uint_type, "header");
+ reset_gs_header(b, state);
+}
+
+static void
+emit_gs_vpm_output_header_prolog(struct v3d_compile *c, nir_builder *b,
+ struct v3d_nir_lower_io_state *state)
+{
+ const uint8_t VERTEX_COUNT_OFFSET = 16;
+
+ /* Our GS header has 1 generic header slot (at VPM offset 0) and then
+ * one slot per output vertex after it. This means we don't need to
+ * have a variable just to keep track of the number of vertices we
+ * emitted and instead we can just compute it here from the header
+ * offset variable by removing the one generic header slot that always
+ * goes at the begining of out header.
+ */
+ nir_ssa_def *header_offset =
+ nir_load_var(b, state->gs.header_offset_var);
+ nir_ssa_def *vertex_count =
+ nir_isub(b, header_offset, nir_imm_int(b, 1));
+ nir_ssa_def *header =
+ nir_ior(b, nir_imm_int(b, state->gs.output_header_size),
+ nir_ishl(b, vertex_count,
+ nir_imm_int(b, VERTEX_COUNT_OFFSET)));
+
+ v3d_nir_store_output(b, 0, NULL, header);
+}
+
void
v3d_nir_lower_io(nir_shader *s, struct v3d_compile *c)
{
struct v3d_nir_lower_io_state state = { 0 };
/* Set up the layout of the VPM outputs. */
- if (s->info.stage == MESA_SHADER_VERTEX)
- v3d_nir_setup_vpm_layout(c, &state);
+ switch (s->info.stage) {
+ case MESA_SHADER_VERTEX:
+ v3d_nir_setup_vpm_layout_vs(c, &state);
+ break;
+ case MESA_SHADER_GEOMETRY:
+ v3d_nir_setup_vpm_layout_gs(c, &state);
+ break;
+ case MESA_SHADER_FRAGMENT:
+ case MESA_SHADER_COMPUTE:
+ break;
+ default:
+ unreachable("Unsupported shader stage");
+ }
nir_foreach_function(function, s) {
if (function->impl) {
nir_builder b;
nir_builder_init(&b, function->impl);
+ if (c->s->info.stage == MESA_SHADER_GEOMETRY)
+ emit_gs_prolog(c, &b, function->impl, &state);
+
nir_foreach_block(block, function->impl) {
nir_foreach_instr_safe(instr, block)
v3d_nir_lower_io_instr(c, &b, instr,
@@ -337,8 +605,11 @@ v3d_nir_lower_io(nir_shader *s, struct v3d_compile *c)
nir_block *last = nir_impl_last_block(function->impl);
b.cursor = nir_after_block(last);
- if (s->info.stage == MESA_SHADER_VERTEX)
+ if (s->info.stage == MESA_SHADER_VERTEX) {
v3d_nir_emit_ff_vpm_outputs(c, &b, &state);
+ } else if (s->info.stage == MESA_SHADER_GEOMETRY) {
+ emit_gs_vpm_output_header_prolog(c, &b, &state);
+ }
nir_metadata_preserve(function->impl,
nir_metadata_block_index |
@@ -346,6 +617,8 @@ v3d_nir_lower_io(nir_shader *s, struct v3d_compile *c)
}
}
- if (s->info.stage == MESA_SHADER_VERTEX)
+ if (s->info.stage == MESA_SHADER_VERTEX ||
+ s->info.stage == MESA_SHADER_GEOMETRY) {
v3d_nir_lower_io_update_output_var_base(c, &state);
+ }
}
diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c
index 340cda903e9..dc966bc80ca 100644
--- a/src/broadcom/compiler/vir.c
+++ b/src/broadcom/compiler/vir.c
@@ -23,6 +23,7 @@
#include "broadcom/common/v3d_device_info.h"
#include "v3d_compiler.h"
+#include "util/u_prim.h"
int
vir_get_nsrc(struct qinst *inst)
@@ -661,6 +662,28 @@ v3d_vs_set_prog_data(struct v3d_compile *c,
}
static void
+v3d_gs_set_prog_data(struct v3d_compile *c,
+ struct v3d_gs_prog_data *prog_data)
+{
+ prog_data->num_inputs = c->num_inputs;
+ memcpy(prog_data->input_slots, c->input_slots,
+ c->num_inputs * sizeof(*c->input_slots));
+
+ /* gl_PrimitiveIdIn is written by the GBG into the first word of the
+ * VPM output header automatically and the shader will overwrite
+ * it after reading it if necessary, so it doesn't add to the VPM
+ * size requirements.
+ */
+ prog_data->uses_pid = (c->s->info.system_values_read &
+ (1ull << SYSTEM_VALUE_PRIMITIVE_ID));
+
+ /* Output segment size is in sectors (8 rows of 32 bits per channel) */
+ prog_data->vpm_output_size = align(c->vpm_output_size, 8) / 8;
+
+ prog_data->out_prim_type = c->s->info.gs.output_primitive;
+}
+
+static void
v3d_set_fs_prog_data_inputs(struct v3d_compile *c,
struct v3d_fs_prog_data *prog_data)
{
@@ -714,13 +737,21 @@ v3d_set_prog_data(struct v3d_compile *c,
v3d_set_prog_data_uniforms(c, prog_data);
- if (c->s->info.stage == MESA_SHADER_COMPUTE) {
- v3d_cs_set_prog_data(c, (struct v3d_compute_prog_data *)prog_data);
- } else if (c->s->info.stage == MESA_SHADER_VERTEX) {
+ switch (c->s->info.stage) {
+ case MESA_SHADER_VERTEX:
v3d_vs_set_prog_data(c, (struct v3d_vs_prog_data *)prog_data);
- } else {
- assert(c->s->info.stage == MESA_SHADER_FRAGMENT);
+ break;
+ case MESA_SHADER_GEOMETRY:
+ v3d_gs_set_prog_data(c, (struct v3d_gs_prog_data *)prog_data);
+ break;
+ case MESA_SHADER_FRAGMENT:
v3d_fs_set_prog_data(c, (struct v3d_fs_prog_data *)prog_data);
+ break;
+ case MESA_SHADER_COMPUTE:
+ v3d_cs_set_prog_data(c, (struct v3d_compute_prog_data *)prog_data);
+ break;
+ default:
+ unreachable("unsupported shader stage");
}
}
@@ -772,6 +803,37 @@ v3d_nir_lower_vs_early(struct v3d_compile *c)
}
static void
+v3d_nir_lower_gs_early(struct v3d_compile *c)
+{
+ /* Split our I/O vars and dead code eliminate the unused
+ * components.
+ */
+ NIR_PASS_V(c->s, nir_lower_io_to_scalar_early,
+ nir_var_shader_in | nir_var_shader_out);
+ uint64_t used_outputs[4] = {0};
+ for (int i = 0; i < c->gs_key->num_used_outputs; i++) {
+ int slot = v3d_slot_get_slot(c->gs_key->used_outputs[i]);
+ int comp = v3d_slot_get_component(c->gs_key->used_outputs[i]);
+ used_outputs[comp] |= 1ull << slot;
+ }
+ NIR_PASS_V(c->s, nir_remove_unused_io_vars,
+ &c->s->outputs, used_outputs, NULL); /* demotes to globals */
+ NIR_PASS_V(c->s, nir_lower_global_vars_to_local);
+ v3d_optimize_nir(c->s);
+ NIR_PASS_V(c->s, nir_remove_dead_variables, nir_var_shader_in);
+
+ /* This must go before nir_lower_io */
+ if (c->gs_key->per_vertex_point_size)
+ NIR_PASS_V(c->s, nir_lower_point_size, 1.0f, 0.0f);
+
+ NIR_PASS_V(c->s, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
+ type_size_vec4,
+ (nir_lower_io_options)0);
+ /* clean up nir_lower_io's deref_var remains */
+ NIR_PASS_V(c->s, nir_opt_dce);
+}
+
+static void
v3d_fixup_fs_output_types(struct v3d_compile *c)
{
nir_foreach_variable(var, &c->s->outputs) {
@@ -819,6 +881,18 @@ v3d_nir_lower_fs_early(struct v3d_compile *c)
}
static void
+v3d_nir_lower_gs_late(struct v3d_compile *c)
+{
+ if (c->key->ucp_enables) {
+ NIR_PASS_V(c->s, nir_lower_clip_gs, c->key->ucp_enables,
+ false, NULL);
+ }
+
+ /* Note: GS output scalarizing must happen after nir_lower_clip_gs. */
+ NIR_PASS_V(c->s, nir_lower_io_to_scalar, nir_var_shader_out);
+}
+
+static void
v3d_nir_lower_vs_late(struct v3d_compile *c)
{
if (c->vs_key->clamp_color)
@@ -907,6 +981,10 @@ uint64_t *v3d_compile(const struct v3d_compiler *compiler,
c->vs_key = (struct v3d_vs_key *)key;
prog_data = rzalloc_size(NULL, sizeof(struct v3d_vs_prog_data));
break;
+ case MESA_SHADER_GEOMETRY:
+ c->gs_key = (struct v3d_gs_key *)key;
+ prog_data = rzalloc_size(NULL, sizeof(struct v3d_gs_prog_data));
+ break;
case MESA_SHADER_FRAGMENT:
c->fs_key = (struct v3d_fs_key *)key;
prog_data = rzalloc_size(NULL, sizeof(struct v3d_fs_prog_data));
@@ -919,20 +997,35 @@ uint64_t *v3d_compile(const struct v3d_compiler *compiler,
unreachable("unsupported shader stage");
}
- if (c->s->info.stage == MESA_SHADER_VERTEX) {
+
+ switch (c->s->info.stage) {
+ case MESA_SHADER_VERTEX:
v3d_nir_lower_vs_early(c);
- } else if (c->s->info.stage != MESA_SHADER_COMPUTE) {
- assert(c->s->info.stage == MESA_SHADER_FRAGMENT);
+ break;
+ case MESA_SHADER_GEOMETRY:
+ v3d_nir_lower_gs_early(c);
+ break;
+ case MESA_SHADER_FRAGMENT:
v3d_nir_lower_fs_early(c);
+ break;
+ default:
+ break;
}
v3d_lower_nir(c);
- if (c->s->info.stage == MESA_SHADER_VERTEX) {
+ switch (c->s->info.stage) {
+ case MESA_SHADER_VERTEX:
v3d_nir_lower_vs_late(c);
- } else if (c->s->info.stage != MESA_SHADER_COMPUTE) {
- assert(c->s->info.stage == MESA_SHADER_FRAGMENT);
+ break;
+ case MESA_SHADER_GEOMETRY:
+ v3d_nir_lower_gs_late(c);
+ break;
+ case MESA_SHADER_FRAGMENT:
v3d_nir_lower_fs_late(c);
+ break;
+ default:
+ break;
}
NIR_PASS_V(c->s, v3d_nir_lower_io, c);
@@ -1134,7 +1227,9 @@ const char *
vir_get_stage_name(struct v3d_compile *c)
{
if (c->vs_key && c->vs_key->is_coord)
- return "MESA_SHADER_COORD";
+ return "MESA_SHADER_VERTEX_BIN";
+ else if (c->gs_key && c->gs_key->is_coord)
+ return "MESA_SHADER_GEOMETRY_BIN";
else
return gl_shader_stage_name(c->s->info.stage);
}
diff --git a/src/gallium/drivers/v3d/v3d_context.h b/src/gallium/drivers/v3d/v3d_context.h
index ecedbaf9efb..bf85b42eb9d 100644
--- a/src/gallium/drivers/v3d/v3d_context.h
+++ b/src/gallium/drivers/v3d/v3d_context.h
@@ -59,7 +59,8 @@ void v3d_job_add_bo(struct v3d_job *job, struct v3d_bo *bo);
#define VC5_DIRTY_ZSA (1ull << 2)
#define VC5_DIRTY_COMPTEX (1ull << 3)
#define VC5_DIRTY_VERTTEX (1ull << 4)
-#define VC5_DIRTY_FRAGTEX (1ull << 5)
+#define VC5_DIRTY_GEOMTEX (1ull << 5)
+#define VC5_DIRTY_FRAGTEX (1ull << 6)
#define VC5_DIRTY_SHADER_IMAGE (1ull << 9)
#define VC5_DIRTY_BLEND_COLOR (1ull << 10)
@@ -77,18 +78,22 @@ void v3d_job_add_bo(struct v3d_job *job, struct v3d_bo *bo);
#define VC5_DIRTY_CLIP (1ull << 22)
#define VC5_DIRTY_UNCOMPILED_CS (1ull << 23)
#define VC5_DIRTY_UNCOMPILED_VS (1ull << 24)
-#define VC5_DIRTY_UNCOMPILED_FS (1ull << 25)
+#define VC5_DIRTY_UNCOMPILED_GS (1ull << 25)
+#define VC5_DIRTY_UNCOMPILED_FS (1ull << 26)
#define VC5_DIRTY_COMPILED_CS (1ull << 29)
#define VC5_DIRTY_COMPILED_VS (1ull << 30)
-#define VC5_DIRTY_COMPILED_FS (1ull << 31)
-
-#define VC5_DIRTY_FS_INPUTS (1ull << 35)
-#define VC5_DIRTY_STREAMOUT (1ull << 36)
-#define VC5_DIRTY_OQ (1ull << 37)
-#define VC5_DIRTY_CENTROID_FLAGS (1ull << 38)
-#define VC5_DIRTY_NOPERSPECTIVE_FLAGS (1ull << 39)
-#define VC5_DIRTY_SSBO (1ull << 40)
+#define VC5_DIRTY_COMPILED_GS_BIN (1ULL << 31)
+#define VC5_DIRTY_COMPILED_GS (1ULL << 32)
+#define VC5_DIRTY_COMPILED_FS (1ull << 33)
+
+#define VC5_DIRTY_FS_INPUTS (1ull << 38)
+#define VC5_DIRTY_GS_INPUTS (1ull << 39)
+#define VC5_DIRTY_STREAMOUT (1ull << 40)
+#define VC5_DIRTY_OQ (1ull << 41)
+#define VC5_DIRTY_CENTROID_FLAGS (1ull << 42)
+#define VC5_DIRTY_NOPERSPECTIVE_FLAGS (1ull << 43)
+#define VC5_DIRTY_SSBO (1ull << 44)
#define VC5_MAX_FS_INPUTS 64
@@ -206,6 +211,7 @@ struct v3d_compiled_shader {
union {
struct v3d_prog_data *base;
struct v3d_vs_prog_data *vs;
+ struct v3d_gs_prog_data *gs;
struct v3d_fs_prog_data *fs;
struct v3d_compute_prog_data *compute;
} prog_data;
@@ -219,8 +225,8 @@ struct v3d_compiled_shader {
};
struct v3d_program_stateobj {
- struct v3d_uncompiled_shader *bind_vs, *bind_fs, *bind_compute;
- struct v3d_compiled_shader *cs, *vs, *fs, *compute;
+ struct v3d_uncompiled_shader *bind_vs, *bind_gs, *bind_fs, *bind_compute;
+ struct v3d_compiled_shader *cs, *vs, *gs_bin, *gs, *fs, *compute;
struct hash_table *cache[MESA_SHADER_STAGES];
diff --git a/src/gallium/drivers/v3d/v3d_program.c b/src/gallium/drivers/v3d/v3d_program.c
index 0f7762f119d..7bbdbe409e2 100644
--- a/src/gallium/drivers/v3d/v3d_program.c
+++ b/src/gallium/drivers/v3d/v3d_program.c
@@ -205,8 +205,12 @@ v3d_shader_precompile(struct v3d_context *v3d,
v3d_setup_shared_precompile_key(so, &key.base);
v3d_get_compiled_shader(v3d, &key.base, sizeof(key));
} else {
+ /* FIXME: add geometry shaders */
+
struct v3d_vs_key key = {
.base.shader_state = so,
+ /* Emit fixed function outputs */
+ .base.is_last_geometry_stage = true,
};
v3d_setup_shared_precompile_key(so, &key.base);
@@ -271,8 +275,10 @@ v3d_uncompiled_shader_create(struct pipe_context *pctx,
}
nir_variable_mode lower_mode = nir_var_all & ~nir_var_uniform;
- if (s->info.stage == MESA_SHADER_VERTEX)
+ if (s->info.stage == MESA_SHADER_VERTEX ||
+ s->info.stage == MESA_SHADER_GEOMETRY) {
lower_mode &= ~(nir_var_shader_in | nir_var_shader_out);
+ }
NIR_PASS_V(s, nir_lower_io, lower_mode,
type_size,
(nir_lower_io_options)0);
@@ -609,55 +615,153 @@ v3d_update_compiled_fs(struct v3d_context *v3d, uint8_t prim_mode)
}
static void
-v3d_update_compiled_vs(struct v3d_context *v3d, uint8_t prim_mode)
+v3d_update_compiled_gs(struct v3d_context *v3d, uint8_t prim_mode)
{
- struct v3d_vs_key local_key;
- struct v3d_vs_key *key = &local_key;
+ struct v3d_gs_key local_key;
+ struct v3d_gs_key *key = &local_key;
- if (!(v3d->dirty & (VC5_DIRTY_PRIM_MODE |
+ if (!(v3d->dirty & (VC5_DIRTY_GEOMTEX |
VC5_DIRTY_RASTERIZER |
- VC5_DIRTY_VERTTEX |
- VC5_DIRTY_VTXSTATE |
- VC5_DIRTY_UNCOMPILED_VS |
+ VC5_DIRTY_UNCOMPILED_GS |
+ VC5_DIRTY_PRIM_MODE |
VC5_DIRTY_FS_INPUTS))) {
return;
}
+ if (!v3d->prog.bind_gs) {
+ v3d->prog.gs = NULL;
+ v3d->prog.gs_bin = NULL;
+ return;
+ }
+
memset(key, 0, sizeof(*key));
- v3d_setup_shared_key(v3d, &key->base, &v3d->tex[PIPE_SHADER_VERTEX]);
- key->base.shader_state = v3d->prog.bind_vs;
+ v3d_setup_shared_key(v3d, &key->base, &v3d->tex[PIPE_SHADER_GEOMETRY]);
+ key->base.shader_state = v3d->prog.bind_gs;
key->base.ucp_enables = v3d->rasterizer->base.clip_plane_enable;
+ key->base.is_last_geometry_stage = true;
key->num_used_outputs = v3d->prog.fs->prog_data.fs->num_inputs;
STATIC_ASSERT(sizeof(key->used_outputs) ==
sizeof(v3d->prog.fs->prog_data.fs->input_slots));
memcpy(key->used_outputs, v3d->prog.fs->prog_data.fs->input_slots,
sizeof(key->used_outputs));
- key->clamp_color = v3d->rasterizer->base.clamp_vertex_color;
key->per_vertex_point_size =
(prim_mode == PIPE_PRIM_POINTS &&
v3d->rasterizer->base.point_size_per_vertex);
- struct v3d_compiled_shader *vs =
+ struct v3d_compiled_shader *gs =
v3d_get_compiled_shader(v3d, &key->base, sizeof(*key));
- if (vs != v3d->prog.vs) {
- v3d->prog.vs = vs;
- v3d->dirty |= VC5_DIRTY_COMPILED_VS;
+ if (gs != v3d->prog.gs) {
+ v3d->prog.gs = gs;
+ v3d->dirty |= VC5_DIRTY_COMPILED_GS;
}
key->is_coord = true;
- /* Coord shaders only output varyings used by transform feedback. */
+
+ /* The last bin-mode shader in the geometry pipeline only outputs
+ * varyings used by transform feedback.
+ */
struct v3d_uncompiled_shader *shader_state = key->base.shader_state;
memcpy(key->used_outputs, shader_state->tf_outputs,
sizeof(*key->used_outputs) * shader_state->num_tf_outputs);
if (shader_state->num_tf_outputs < key->num_used_outputs) {
+ uint32_t size = sizeof(*key->used_outputs) *
+ (key->num_used_outputs -
+ shader_state->num_tf_outputs);
memset(&key->used_outputs[shader_state->num_tf_outputs],
- 0,
- sizeof(*key->used_outputs) * (key->num_used_outputs -
- shader_state->num_tf_outputs));
+ 0, size);
}
key->num_used_outputs = shader_state->num_tf_outputs;
+ struct v3d_compiled_shader *old_gs = v3d->prog.gs;
+ struct v3d_compiled_shader *gs_bin =
+ v3d_get_compiled_shader(v3d, &key->base, sizeof(*key));
+ if (gs_bin != old_gs) {
+ v3d->prog.gs_bin = gs_bin;
+ v3d->dirty |= VC5_DIRTY_COMPILED_GS_BIN;
+ }
+
+ if (old_gs && memcmp(v3d->prog.gs->prog_data.gs->input_slots,
+ old_gs->prog_data.gs->input_slots,
+ sizeof(v3d->prog.gs->prog_data.gs->input_slots))) {
+ v3d->dirty |= VC5_DIRTY_GS_INPUTS;
+ }
+}
+
+static void
+v3d_update_compiled_vs(struct v3d_context *v3d, uint8_t prim_mode)
+{
+ struct v3d_vs_key local_key;
+ struct v3d_vs_key *key = &local_key;
+
+ if (!(v3d->dirty & (VC5_DIRTY_VERTTEX |
+ VC5_DIRTY_VTXSTATE |
+ VC5_DIRTY_UNCOMPILED_VS |
+ (v3d->prog.bind_gs ? 0 : VC5_DIRTY_RASTERIZER) |
+ (v3d->prog.bind_gs ? 0 : VC5_DIRTY_PRIM_MODE) |
+ (v3d->prog.bind_gs ? VC5_DIRTY_GS_INPUTS :
+ VC5_DIRTY_FS_INPUTS)))) {
+ return;
+ }
+
+ memset(key, 0, sizeof(*key));
+ v3d_setup_shared_key(v3d, &key->base, &v3d->tex[PIPE_SHADER_VERTEX]);
+ key->base.shader_state = v3d->prog.bind_vs;
+ key->base.ucp_enables = v3d->rasterizer->base.clip_plane_enable;
+ key->base.is_last_geometry_stage = !v3d->prog.bind_gs;
+
+ if (!v3d->prog.bind_gs) {
+ key->num_used_outputs = v3d->prog.fs->prog_data.fs->num_inputs;
+ STATIC_ASSERT(sizeof(key->used_outputs) ==
+ sizeof(v3d->prog.fs->prog_data.fs->input_slots));
+ memcpy(key->used_outputs, v3d->prog.fs->prog_data.fs->input_slots,
+ sizeof(key->used_outputs));
+ } else {
+ key->num_used_outputs = v3d->prog.gs->prog_data.gs->num_inputs;
+ STATIC_ASSERT(sizeof(key->used_outputs) ==
+ sizeof(v3d->prog.gs->prog_data.gs->input_slots));
+ memcpy(key->used_outputs, v3d->prog.gs->prog_data.gs->input_slots,
+ sizeof(key->used_outputs));
+ }
+
+ key->clamp_color = v3d->rasterizer->base.clamp_vertex_color;
+
+ key->per_vertex_point_size =
+ (prim_mode == PIPE_PRIM_POINTS &&
+ v3d->rasterizer->base.point_size_per_vertex);
+
+ struct v3d_compiled_shader *vs =
+ v3d_get_compiled_shader(v3d, &key->base, sizeof(*key));
+ if (vs != v3d->prog.vs) {
+ v3d->prog.vs = vs;
+ v3d->dirty |= VC5_DIRTY_COMPILED_VS;
+ }
+
+ key->is_coord = true;
+
+ /* Coord shaders only output varyings used by transform feedback,
+ * unless they are linked to other shaders in the geometry side
+ * of the pipeline, since in that case any of the output varyings
+ * could be required in later geometry stages to compute
+ * gl_Position or TF outputs.
+ */
+ if (!v3d->prog.bind_gs) {
+ struct v3d_uncompiled_shader *shader_state =
+ key->base.shader_state;
+ memcpy(key->used_outputs, shader_state->tf_outputs,
+ sizeof(*key->used_outputs) *
+ shader_state->num_tf_outputs);
+ if (shader_state->num_tf_outputs < key->num_used_outputs) {
+ uint32_t tail_bytes =
+ sizeof(*key->used_outputs) *
+ (key->num_used_outputs -
+ shader_state->num_tf_outputs);
+ memset(&key->used_outputs[shader_state->num_tf_outputs],
+ 0, tail_bytes);
+ }
+ key->num_used_outputs = shader_state->num_tf_outputs;
+ }
+
struct v3d_compiled_shader *cs =
v3d_get_compiled_shader(v3d, &key->base, sizeof(*key));
if (cs != v3d->prog.cs) {
@@ -670,6 +774,7 @@ void
v3d_update_compiled_shaders(struct v3d_context *v3d, uint8_t prim_mode)
{
v3d_update_compiled_fs(v3d, prim_mode);
+ v3d_update_compiled_gs(v3d, prim_mode);
v3d_update_compiled_vs(v3d, prim_mode);
}
@@ -703,6 +808,12 @@ fs_cache_hash(const void *key)
}
static uint32_t
+gs_cache_hash(const void *key)
+{
+ return _mesa_hash_data(key, sizeof(struct v3d_gs_key));
+}
+
+static uint32_t
vs_cache_hash(const void *key)
{
return _mesa_hash_data(key, sizeof(struct v3d_vs_key));
@@ -721,6 +832,12 @@ fs_cache_compare(const void *key1, const void *key2)
}
static bool
+gs_cache_compare(const void *key1, const void *key2)
+{
+ return memcmp(key1, key2, sizeof(struct v3d_gs_key)) == 0;
+}
+
+static bool
vs_cache_compare(const void *key1, const void *key2)
{
return memcmp(key1, key2, sizeof(struct v3d_vs_key)) == 0;
@@ -772,6 +889,14 @@ v3d_fp_state_bind(struct pipe_context *pctx, void *hwcso)
}
static void
+v3d_gp_state_bind(struct pipe_context *pctx, void *hwcso)
+{
+ struct v3d_context *v3d = v3d_context(pctx);
+ v3d->prog.bind_gs = hwcso;
+ v3d->dirty |= VC5_DIRTY_UNCOMPILED_GS;
+}
+
+static void
v3d_vp_state_bind(struct pipe_context *pctx, void *hwcso)
{
struct v3d_context *v3d = v3d_context(pctx);
@@ -804,10 +929,14 @@ v3d_program_init(struct pipe_context *pctx)
pctx->create_vs_state = v3d_shader_state_create;
pctx->delete_vs_state = v3d_shader_state_delete;
+ pctx->create_gs_state = v3d_shader_state_create;
+ pctx->delete_gs_state = v3d_shader_state_delete;
+
pctx->create_fs_state = v3d_shader_state_create;
pctx->delete_fs_state = v3d_shader_state_delete;
pctx->bind_fs_state = v3d_fp_state_bind;
+ pctx->bind_gs_state = v3d_gp_state_bind;
pctx->bind_vs_state = v3d_vp_state_bind;
if (v3d->screen->has_csd) {
@@ -818,6 +947,8 @@ v3d_program_init(struct pipe_context *pctx)
v3d->prog.cache[MESA_SHADER_VERTEX] =
_mesa_hash_table_create(pctx, vs_cache_hash, vs_cache_compare);
+ v3d->prog.cache[MESA_SHADER_GEOMETRY] =
+ _mesa_hash_table_create(pctx, gs_cache_hash, gs_cache_compare);
v3d->prog.cache[MESA_SHADER_FRAGMENT] =
_mesa_hash_table_create(pctx, fs_cache_hash, fs_cache_compare);
v3d->prog.cache[MESA_SHADER_COMPUTE] =