summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/r600
diff options
context:
space:
mode:
authorChristian König <[email protected]>2011-02-02 00:24:34 +0100
committerChristian König <[email protected]>2011-02-02 01:33:03 +0100
commit8ca3b140eb53fd8063337a5a2a54a35987d597bc (patch)
tree1005fa8685f97feec39ba981dd025290d3068205 /src/gallium/drivers/r600
parent8503cffc4c7b8df8d4260161dc8aa06b0f81f19e (diff)
r600g: use burst exports in shaders
Join multiple exports into just one instruction instead of exporting each register separately.
Diffstat (limited to 'src/gallium/drivers/r600')
-rw-r--r--src/gallium/drivers/r600/eg_asm.c3
-rw-r--r--src/gallium/drivers/r600/r600_asm.c36
-rw-r--r--src/gallium/drivers/r600/r600_asm.h1
-rw-r--r--src/gallium/drivers/r600/r600_shader.c3
4 files changed, 40 insertions, 3 deletions
diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c
index 67d742b3760..80c5de39750 100644
--- a/src/gallium/drivers/r600/eg_asm.c
+++ b/src/gallium/drivers/r600/eg_asm.c
@@ -62,7 +62,8 @@ int eg_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) |
S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) |
S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type);
- bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf->output.swizzle_x) |
+ bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) |
+ S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf->output.swizzle_x) |
S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(cf->output.swizzle_y) |
S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf->output.swizzle_z) |
S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) |
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 13bf7644e7e..e910d1cc73f 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -246,6 +246,37 @@ int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output)
{
int r;
+ if (bc->cf_last && (bc->cf_last->inst == output->inst ||
+ (bc->cf_last->inst == BC_INST(bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT) &&
+ output->inst == BC_INST(bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE))) &&
+ output->type == bc->cf_last->output.type &&
+ output->elem_size == bc->cf_last->output.elem_size &&
+ output->swizzle_x == bc->cf_last->output.swizzle_x &&
+ output->swizzle_y == bc->cf_last->output.swizzle_y &&
+ output->swizzle_z == bc->cf_last->output.swizzle_z &&
+ output->swizzle_w == bc->cf_last->output.swizzle_w &&
+ (output->burst_count + bc->cf_last->output.burst_count) <= 16) {
+
+ if ((output->gpr + output->burst_count) == bc->cf_last->output.gpr &&
+ (output->array_base + output->burst_count) == bc->cf_last->output.array_base) {
+
+ bc->cf_last->output.end_of_program |= output->end_of_program;
+ bc->cf_last->output.inst = output->inst;
+ bc->cf_last->output.gpr = output->gpr;
+ bc->cf_last->output.array_base = output->array_base;
+ bc->cf_last->output.burst_count += output->burst_count;
+ return 0;
+
+ } else if (output->gpr == (bc->cf_last->output.gpr + bc->cf_last->output.burst_count) &&
+ output->array_base == (bc->cf_last->output.array_base + bc->cf_last->output.burst_count)) {
+
+ bc->cf_last->output.end_of_program |= output->end_of_program;
+ bc->cf_last->output.inst = output->inst;
+ bc->cf_last->output.burst_count += output->burst_count;
+ return 0;
+ }
+ }
+
r = r600_bc_add_cf(bc);
if (r)
return r;
@@ -1443,7 +1474,8 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) |
S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) |
S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type);
- bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf->output.swizzle_x) |
+ bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) |
+ S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf->output.swizzle_x) |
S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(cf->output.swizzle_y) |
S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf->output.swizzle_z) |
S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) |
@@ -1725,9 +1757,9 @@ void r600_bc_dump(struct r600_bc *bc)
fprintf(stderr, "SWIZ_Y:%X ", cf->output.swizzle_y);
fprintf(stderr, "SWIZ_Z:%X ", cf->output.swizzle_z);
fprintf(stderr, "SWIZ_W:%X ", cf->output.swizzle_w);
- fprintf(stderr, "SWIZ_W:%X ", cf->output.swizzle_w);
fprintf(stderr, "BARRIER:%X ", cf->output.barrier);
fprintf(stderr, "INST:%d ", cf->output.inst);
+ fprintf(stderr, "BURST_COUNT:%d ", cf->output.burst_count);
fprintf(stderr, "EOP:%X\n", cf->output.end_of_program);
break;
case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index 278b4466cb0..b91d9b5b41b 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -116,6 +116,7 @@ struct r600_bc_output {
unsigned swizzle_y;
unsigned swizzle_z;
unsigned swizzle_w;
+ unsigned burst_count;
unsigned barrier;
};
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 41849875074..643c47d4bf6 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -632,6 +632,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
output[i].swizzle_y = 1;
output[i].swizzle_z = 2;
output[i].swizzle_w = 3;
+ output[i].burst_count = 1;
output[i].barrier = 1;
output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
output[i].array_base = i - pos0;
@@ -695,6 +696,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
output[i].swizzle_y = 1;
output[i].swizzle_z = 2;
output[i].swizzle_w = 3;
+ output[i].burst_count = 1;
output[i].barrier = 1;
output[i].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
output[i].array_base = 0;
@@ -711,6 +713,7 @@ int r600_shader_from_tgsi(const struct tgsi_token *tokens, struct r600_shader *s
output[0].swizzle_y = 7;
output[0].swizzle_z = 7;
output[0].swizzle_w = 7;
+ output[0].burst_count = 1;
output[0].barrier = 1;
output[0].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
output[0].array_base = 0;