summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Airlie <[email protected]>2014-01-24 13:39:36 +1000
committerDave Airlie <[email protected]>2014-02-05 10:49:40 +1000
commite12147e9f608b8117e653152317c6c64ea004b71 (patch)
tree429ae3524f2c6ab27cc314f8f3070f565cec03a7
parentcda63db780512f7ca81f5e7973120d335e576ad6 (diff)
r600g: add support for indirect geom ring writes
We need to be able to write to the ring using a base register for when we emit vertices in a loop, in theory the SB compiler could collapse these indirect writes to direct writes if the register value is constant and known, but that is outside my pay grade. Signed-off-by: Dave Airlie <[email protected]> Reviewed-by: Alex Deucher <[email protected]>
-rw-r--r--src/gallium/drivers/r600/r600_shader.c65
1 files changed, 58 insertions, 7 deletions
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index fd11f61792a..cc2000d5ed5 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -288,6 +288,7 @@ struct r600_shader_ctx {
int gs_out_ring_offset;
int gs_next_vertex;
struct r600_shader *gs_for_vs;
+ int gs_export_gpr_treg;
};
struct r600_shader_tgsi_instruction {
@@ -297,7 +298,7 @@ struct r600_shader_tgsi_instruction {
int (*process)(struct r600_shader_ctx *ctx);
};
-static int emit_gs_ring_writes(struct r600_shader_ctx *ctx);
+static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, bool ind);
static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[];
static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx);
static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason);
@@ -1303,7 +1304,7 @@ static int generate_gs_copy_shader(struct r600_context *rctx,
return r600_bytecode_build(ctx.bc);
}
-static int emit_gs_ring_writes(struct r600_shader_ctx *ctx)
+static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, bool ind)
{
struct r600_bytecode_output output;
int i, k, ring_offset;
@@ -1328,17 +1329,48 @@ static int emit_gs_ring_writes(struct r600_shader_ctx *ctx)
/* next_ring_offset after parsing input decls contains total size of
* single vertex data, gs_next_vertex - current vertex index */
- ring_offset += ctx->gs_out_ring_offset * ctx->gs_next_vertex;
+ if (!ind)
+ ring_offset += ctx->gs_out_ring_offset * ctx->gs_next_vertex;
+ /* get a temp and add the ring offset to the next vertex base in the shader */
memset(&output, 0, sizeof(struct r600_bytecode_output));
output.gpr = ctx->shader->output[i].gpr;
output.elem_size = 3;
output.comp_mask = 0xF;
output.burst_count = 1;
+
+ if (ind)
+ output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND;
+ else
+ output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
output.op = CF_OP_MEM_RING;
- output.array_base = ring_offset >> 2; /* in dwords */
+
+
+ if (ind) {
+ output.array_base = ring_offset >> 2; /* in dwords */
+ output.array_size = 0xff
+ output.index_gpr = ctx->gs_export_gpr_treg;
+ } else
+ output.array_base = ring_offset >> 2; /* in dwords */
r600_bytecode_add_output(ctx->bc, &output);
}
+
+ if (ind) {
+ struct r600_bytecode_alu alu;
+ int r;
+
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP2_ADD_INT;
+ alu.src[0].sel = ctx->gs_export_gpr_treg;
+ alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[1].value = ctx->gs_out_ring_offset >> 4;
+ alu.dst.sel = ctx->gs_export_gpr_treg;
+ alu.dst.write = 1;
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ }
++ctx->gs_next_vertex;
return 0;
}
@@ -1473,7 +1505,11 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL;
ctx.bc->ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] +
ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1;
- ctx.temp_reg = ctx.bc->ar_reg + 1;
+ if (ctx.type == TGSI_PROCESSOR_GEOMETRY) {
+ ctx.gs_export_gpr_treg = ctx.bc->ar_reg + 1;
+ ctx.temp_reg = ctx.bc->ar_reg + 2;
+ } else
+ ctx.temp_reg = ctx.bc->ar_reg + 1;
if (indirect_gprs) {
shader->max_arrays = 0;
@@ -1667,6 +1703,21 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
}
}
+ if (ctx.type == TGSI_PROCESSOR_GEOMETRY) {
+ struct r600_bytecode_alu alu;
+ int r;
+
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_MOV;
+ alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[0].value = 0;
+ alu.dst.sel = ctx.gs_export_gpr_treg;
+ alu.dst.write = 1;
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx.bc, &alu);
+ if (r)
+ return r;
+ }
if (shader->two_side && ctx.colors_used) {
if ((r = process_twoside_color_inputs(&ctx)))
return r;
@@ -1770,7 +1821,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
if (ring_outputs) {
if (key.vs_as_es)
- emit_gs_ring_writes(&ctx);
+ emit_gs_ring_writes(&ctx, FALSE);
} else {
/* export output */
for (i = 0, j = 0; i < noutput; i++, j++) {
@@ -5973,7 +6024,7 @@ static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
static int tgsi_gs_emit(struct r600_shader_ctx *ctx)
{
if (ctx->inst_info->op == CF_OP_EMIT_VERTEX)
- emit_gs_ring_writes(ctx);
+ emit_gs_ring_writes(ctx, TRUE);
return r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op);
}