summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/drivers/nv50/nv50_pc.c48
-rw-r--r--src/gallium/drivers/nv50/nv50_pc.h12
-rw-r--r--src/gallium/drivers/nv50/nv50_pc_optimize.c56
-rw-r--r--src/gallium/drivers/nv50/nv50_pc_regalloc.c23
-rw-r--r--src/gallium/drivers/nv50/nv50_program.c63
-rw-r--r--src/gallium/drivers/nv50/nv50_program.h16
-rw-r--r--src/gallium/drivers/nv50/nv50_screen.c3
-rw-r--r--src/gallium/drivers/nv50/nv50_tgsi_to_nc.c2
8 files changed, 171 insertions, 52 deletions
diff --git a/src/gallium/drivers/nv50/nv50_pc.c b/src/gallium/drivers/nv50/nv50_pc.c
index e34c0553eb4..c54f16e4c53 100644
--- a/src/gallium/drivers/nv50/nv50_pc.c
+++ b/src/gallium/drivers/nv50/nv50_pc.c
@@ -304,7 +304,7 @@ nv_pc_pass_in_order(struct nv_basic_block *root, nv_pc_pass_func f, void *priv)
}
static void
-nv_do_print_program(void *priv, struct nv_basic_block *b)
+nv_do_print_function(void *priv, struct nv_basic_block *b)
{
struct nv_instruction *i = b->phi;
@@ -323,11 +323,23 @@ nv_do_print_program(void *priv, struct nv_basic_block *b)
}
void
-nv_print_program(struct nv_basic_block *root)
+nv_print_function(struct nv_basic_block *root)
{
- nv_pc_pass_in_order(root, nv_do_print_program, root);
+ if (root->subroutine)
+ debug_printf("SUBROUTINE %i\n", root->subroutine);
+ else
+ debug_printf("MAIN\n");
- debug_printf("END\n\n");
+ nv_pc_pass_in_order(root, nv_do_print_function, root);
+}
+
+void
+nv_print_program(struct nv_pc *pc)
+{
+ int i;
+ for (i = 0; i < pc->num_subroutines + 1; ++i)
+ if (pc->root[i])
+ nv_print_function(pc->root[i]);
}
static INLINE void
@@ -388,11 +400,18 @@ nv50_generate_code(struct nv50_translation_info *ti)
if (!pc)
return 1;
+ pc->root = CALLOC(ti->subr_nr + 1, sizeof(pc->root[0]));
+ if (!pc->root) {
+ FREE(pc);
+ return 1;
+ }
+ pc->num_subroutines = ti->subr_nr;
+
ret = nv50_tgsi_to_nc(pc, ti);
if (ret)
goto out;
#ifdef NV50PC_DEBUG
- nv_print_program(pc->root);
+ nv_print_program(pc);
#endif
/* optimization */
@@ -400,7 +419,7 @@ nv50_generate_code(struct nv50_translation_info *ti)
if (ret)
goto out;
#ifdef NV50PC_DEBUG
- nv_print_program(pc->root);
+ nv_print_program(pc);
#endif
/* register allocation */
@@ -408,7 +427,7 @@ nv50_generate_code(struct nv50_translation_info *ti)
if (ret)
goto out;
#ifdef NV50PC_DEBUG
- nv_print_program(pc->root);
+ nv_print_program(pc);
#endif
/* prepare for emission */
@@ -441,16 +460,19 @@ nv50_generate_code(struct nv50_translation_info *ti)
out:
nv_pc_free_refs(pc);
- if (ret) {
+
+ if (pc->bb_list)
+ FREE(pc->bb_list);
+
+ if (ret) { /* on success, these will be referenced by nv50_program */
if (pc->emit)
- free(pc->emit);
+ FREE(pc->emit);
if (pc->immd_buf)
- free(pc->immd_buf);
+ FREE(pc->immd_buf);
if (pc->fixups)
- free(pc->fixups);
+ FREE(pc->fixups);
}
- free(pc);
-
+ FREE(pc);
return ret;
}
diff --git a/src/gallium/drivers/nv50/nv50_pc.h b/src/gallium/drivers/nv50/nv50_pc.h
index 703d32d334e..d9cc775572e 100644
--- a/src/gallium/drivers/nv50/nv50_pc.h
+++ b/src/gallium/drivers/nv50/nv50_pc.h
@@ -282,7 +282,7 @@ struct nv_basic_block {
ubyte in_kind[8];
int id;
- struct nv_basic_block *last_visitor;
+ int subroutine;
uint priv;
uint pass_seq;
@@ -314,10 +314,10 @@ nv_fixup_apply(uint32_t *bin, struct nv_fixup *fixup, uint32_t data)
bin[fixup->offset / 4] = val;
}
-struct nv_pc {
- struct nv50_translation_info *ti;
+struct nv50_translation_info;
- struct nv_basic_block *root;
+struct nv_pc {
+ struct nv_basic_block **root;
struct nv_basic_block *current_block;
struct nv_basic_block *parent_block;
@@ -332,6 +332,7 @@ struct nv_pc {
int num_instructions;
int num_refs;
int num_blocks;
+ int num_subroutines;
int max_reg[4];
@@ -463,7 +464,8 @@ void nv_print_instruction(struct nv_instruction *);
/* nv50_pc.c */
-void nv_print_program(struct nv_basic_block *b);
+void nv_print_function(struct nv_basic_block *root);
+void nv_print_program(struct nv_pc *);
boolean nv_op_commutative(uint opcode);
int nv50_indirect_opnd(struct nv_instruction *);
diff --git a/src/gallium/drivers/nv50/nv50_pc_optimize.c b/src/gallium/drivers/nv50/nv50_pc_optimize.c
index 1ed50321754..4f5bdc1f9fb 100644
--- a/src/gallium/drivers/nv50/nv50_pc_optimize.c
+++ b/src/gallium/drivers/nv50/nv50_pc_optimize.c
@@ -213,23 +213,36 @@ nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b)
pc->bin_size += b->bin_size *= 4;
}
-int
-nv_pc_exec_pass2(struct nv_pc *pc)
+static int
+nv_pc_pass2(struct nv_pc *pc, struct nv_basic_block *root)
{
struct nv_pass pass;
pass.pc = pc;
pc->pass_seq++;
- nv_pass_flatten(&pass, pc->root);
+
+ nv_pass_flatten(&pass, root);
+
+ nv_pc_pass_in_order(root, nv_pc_pass_pre_emission, pc);
+
+ return 0;
+}
+
+int
+nv_pc_exec_pass2(struct nv_pc *pc)
+{
+ int i, ret;
NV50_DBGMSG("preparing %u blocks for emission\n", pc->num_blocks);
- pc->bb_list = CALLOC(pc->num_blocks, sizeof(struct nv_basic_block *));
- pc->num_blocks = 0;
+ pc->bb_list = CALLOC(pc->num_blocks, sizeof(pc->bb_list[0]));
- nv_pc_pass_in_order(pc->root, nv_pc_pass_pre_emission, pc);
+ pc->num_blocks = 0;
+ for (i = 0; i < pc->num_subroutines + 1; ++i)
+ if (pc->root[i] && (ret = nv_pc_pass2(pc, pc->root[i])))
+ return ret;
return 0;
}
@@ -1032,8 +1045,8 @@ nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b)
return 0;
}
-int
-nv_pc_exec_pass0(struct nv_pc *pc)
+static int
+nv_pc_pass0(struct nv_pc *pc, struct nv_basic_block *root)
{
struct nv_pass_reld_elim *reldelim;
struct nv_pass pass;
@@ -1047,35 +1060,35 @@ nv_pc_exec_pass0(struct nv_pc *pc)
* to whether sources are supported memory loads.
*/
pc->pass_seq++;
- ret = nv_pass_lower_arith(&pass, pc->root);
+ ret = nv_pass_lower_arith(&pass, root);
if (ret)
return ret;
pc->pass_seq++;
- ret = nv_pass_fold_loads(&pass, pc->root);
+ ret = nv_pass_fold_loads(&pass, root);
if (ret)
return ret;
pc->pass_seq++;
- ret = nv_pass_fold_stores(&pass, pc->root);
+ ret = nv_pass_fold_stores(&pass, root);
if (ret)
return ret;
reldelim = CALLOC_STRUCT(nv_pass_reld_elim);
reldelim->pc = pc;
pc->pass_seq++;
- ret = nv_pass_reload_elim(reldelim, pc->root);
+ ret = nv_pass_reload_elim(reldelim, root);
FREE(reldelim);
if (ret)
return ret;
pc->pass_seq++;
- ret = nv_pass_cse(&pass, pc->root);
+ ret = nv_pass_cse(&pass, root);
if (ret)
return ret;
pc->pass_seq++;
- ret = nv_pass_lower_mods(&pass, pc->root);
+ ret = nv_pass_lower_mods(&pass, root);
if (ret)
return ret;
@@ -1083,14 +1096,25 @@ nv_pc_exec_pass0(struct nv_pc *pc)
do {
dce.removed = 0;
pc->pass_seq++;
- ret = nv_pass_dce(&dce, pc->root);
+ ret = nv_pass_dce(&dce, root);
if (ret)
return ret;
} while (dce.removed);
- ret = nv_pass_tex_mask(&pass, pc->root);
+ ret = nv_pass_tex_mask(&pass, root);
if (ret)
return ret;
return ret;
}
+
+int
+nv_pc_exec_pass0(struct nv_pc *pc)
+{
+ int i, ret;
+
+ for (i = 0; i < pc->num_subroutines + 1; ++i)
+ if (pc->root[i] && (ret = nv_pc_pass0(pc, pc->root[i])))
+ return ret;
+ return 0;
+}
diff --git a/src/gallium/drivers/nv50/nv50_pc_regalloc.c b/src/gallium/drivers/nv50/nv50_pc_regalloc.c
index d401706b5bc..2998343db52 100644
--- a/src/gallium/drivers/nv50/nv50_pc_regalloc.c
+++ b/src/gallium/drivers/nv50/nv50_pc_regalloc.c
@@ -874,8 +874,8 @@ pass_linear_scan(struct nv_pc_pass *ctx, int iter)
return 0;
}
-int
-nv_pc_exec_pass1(struct nv_pc *pc)
+static int
+nv_pc_pass1(struct nv_pc *pc, struct nv_basic_block *root)
{
struct nv_pc_pass *ctx;
int i, ret;
@@ -890,12 +890,12 @@ nv_pc_exec_pass1(struct nv_pc *pc)
ctx->insns = CALLOC(NV_PC_MAX_INSTRUCTIONS, sizeof(struct nv_instruction *));
pc->pass_seq++;
- ret = pass_generate_phi_movs(ctx, pc->root);
+ ret = pass_generate_phi_movs(ctx, root);
assert(!ret);
for (i = 0; i < pc->loop_nesting_bound; ++i) {
pc->pass_seq++;
- ret = pass_build_live_sets(ctx, pc->root);
+ ret = pass_build_live_sets(ctx, root);
assert(!ret && "live sets");
if (ret) {
NOUVEAU_ERR("failed to build live sets (iteration %d)\n", i);
@@ -904,10 +904,10 @@ nv_pc_exec_pass1(struct nv_pc *pc)
}
pc->pass_seq++;
- nv_pc_pass_in_order(pc->root, pass_order_instructions, ctx);
+ nv_pc_pass_in_order(root, pass_order_instructions, ctx);
pc->pass_seq++;
- ret = pass_build_intervals(ctx, pc->root);
+ ret = pass_build_intervals(ctx, root);
assert(!ret && "build intervals");
if (ret) {
NOUVEAU_ERR("failed to build live intervals\n");
@@ -944,3 +944,14 @@ out:
FREE(ctx);
return ret;
}
+
+int
+nv_pc_exec_pass1(struct nv_pc *pc)
+{
+ int i, ret;
+
+ for (i = 0; i < pc->num_subroutines + 1; ++i)
+ if (pc->root[i] && (ret = nv_pc_pass1(pc, pc->root[i])))
+ return ret;
+ return 0;
+}
diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index d7d3030e2f6..925028700cd 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -147,10 +147,17 @@ prog_inst(struct nv50_translation_info *ti,
int s, c, k;
unsigned mask;
+ if (inst->Instruction.Opcode == TGSI_OPCODE_BGNSUB) {
+ ti->subr[ti->subr_nr].pos = id - 1;
+ ti->subr[ti->subr_nr].id = ti->subr_nr + 1; /* id 0 is main program */
+ ++ti->subr_nr;
+ }
+
if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) {
+ dst = &inst->Dst[0].Register;
+
for (c = 0; c < 4; ++c) {
- dst = &inst->Dst[0].Register;
- if (inst->Dst[0].Register.Indirect)
+ if (dst->Indirect)
nv50_indirect_outputs(ti, id);
if (!(dst->WriteMask & (1 << c)))
continue;
@@ -182,6 +189,44 @@ prog_inst(struct nv50_translation_info *ti,
}
}
+/* Probably should introduce something like struct tgsi_function_declaration
+ * instead of trying to guess inputs/outputs.
+ */
+static void
+prog_subroutine_inst(struct nv50_subroutine *subr,
+ const struct tgsi_full_instruction *inst)
+{
+ const struct tgsi_dst_register *dst;
+ const struct tgsi_src_register *src;
+ int s, c, k;
+ unsigned mask;
+
+ for (s = 0; s < inst->Instruction.NumSrcRegs; ++s) {
+ src = &inst->Src[s].Register;
+ if (src->File != TGSI_FILE_TEMPORARY)
+ continue;
+ mask = nv50_tgsi_src_mask(inst, s);
+
+ assert(!inst->Src[s].Register.Indirect);
+
+ for (c = 0; c < 4; ++c) {
+ k = tgsi_util_get_full_src_register_swizzle(&inst->Src[s], c);
+
+ if ((mask & (1 << c)) && k < TGSI_SWIZZLE_W)
+ if (!(subr->retv[src->Index / 32][k] & (1 << (src->Index % 32))))
+ subr->argv[src->Index / 32][k] |= 1 << (src->Index % 32);
+ }
+ }
+
+ if (inst->Dst[0].Register.File == TGSI_FILE_TEMPORARY) {
+ dst = &inst->Dst[0].Register;
+
+ for (c = 0; c < 4; ++c)
+ if (dst->WriteMask & (1 << c))
+ subr->retv[dst->Index / 32][c] |= 1 << (dst->Index % 32);
+ }
+}
+
static void
prog_immediate(struct nv50_translation_info *ti,
const struct tgsi_full_immediate *imm)
@@ -482,7 +527,7 @@ nv50_prog_scan(struct nv50_translation_info *ti)
{
struct nv50_program *p = ti->p;
struct tgsi_parse_context parse;
- int ret;
+ int ret, i;
p->vp.edgeflag = 0x40;
p->vp.psiz = 0x40;
@@ -496,6 +541,9 @@ nv50_prog_scan(struct nv50_translation_info *ti)
tgsi_dump(p->pipe.tokens, 0);
#endif
+ ti->subr =
+ CALLOC(ti->scan.opcode_count[TGSI_OPCODE_BGNSUB], sizeof(ti->subr[0]));
+
ti->immd32 = (uint32_t *)MALLOC(ti->scan.immediate_count * 16);
ti->immd32_ty = (ubyte *)MALLOC(ti->scan.immediate_count * sizeof(ubyte));
@@ -519,6 +567,13 @@ nv50_prog_scan(struct nv50_translation_info *ti)
}
}
+ /* Scan to determine which registers are inputs/outputs of a subroutine. */
+ for (i = 0; i < ti->subr_nr; ++i) {
+ int pc = ti->subr[i].id;
+ while (ti->insns[pc].Instruction.Opcode != TGSI_OPCODE_ENDSUB)
+ prog_subroutine_inst(&ti->subr[i], &ti->insns[pc++]);
+ }
+
p->in_nr = ti->scan.file_max[TGSI_FILE_INPUT] + 1;
p->out_nr = ti->scan.file_max[TGSI_FILE_OUTPUT] + 1;
@@ -572,6 +627,8 @@ out:
FREE(ti->immd32_ty);
if (ti->insns)
FREE(ti->insns);
+ if (ti->subr)
+ FREE(ti->subr);
FREE(ti);
return ret ? FALSE : TRUE;
}
diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h
index 3c3f1f7f970..918baf325f5 100644
--- a/src/gallium/drivers/nv50/nv50_program.h
+++ b/src/gallium/drivers/nv50/nv50_program.h
@@ -27,6 +27,8 @@
#include "tgsi/tgsi_scan.h"
#include "nouveau/nouveau_class.h"
+#define NV50_CAP_MAX_PROGRAM_TEMPS (128 / 4)
+
struct nv50_varying {
uint8_t id; /* tgsi index */
uint8_t hw; /* hw index, nv50 wants flat FP inputs last */
@@ -92,13 +94,13 @@ struct nv50_program {
#define NV50_INTERP_FLAT (1 << 1)
#define NV50_INTERP_CENTROID (1 << 2)
-#define NV50_PROG_MAX_SUBROUTINES 8
-
/* analyze TGSI and see which TEMP[] are used as subroutine inputs/outputs */
struct nv50_subroutine {
- int id;
- uint32_t argv[4][1]; /* 4 bitmasks, for each of xyzw, only allow 32 TEMPs */
- uint32_t retv[4][1];
+ unsigned id;
+ unsigned pos;
+ /* function inputs and outputs */
+ uint32_t argv[NV50_CAP_MAX_PROGRAM_TEMPS][4];
+ uint32_t retv[NV50_CAP_MAX_PROGRAM_TEMPS][4];
};
struct nv50_translation_info {
@@ -119,8 +121,8 @@ struct nv50_translation_info {
unsigned immd32_nr;
ubyte *immd32_ty;
ubyte edgeflag_out;
- struct nv50_subroutine subr[NV50_PROG_MAX_SUBROUTINES];
- int subr_nr;
+ struct nv50_subroutine *subr;
+ unsigned subr_nr;
};
int nv50_generate_code(struct nv50_translation_info *ti);
diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c
index fc75d81d549..c1efa443daf 100644
--- a/src/gallium/drivers/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nv50/nv50_screen.c
@@ -26,6 +26,7 @@
#include "nv50_context.h"
#include "nv50_screen.h"
#include "nv50_resource.h"
+#include "nv50_program.h"
#include "nouveau/nouveau_stateobj.h"
@@ -152,7 +153,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
return 0;
case PIPE_CAP_MAX_VS_TEMPS:
case PIPE_CAP_MAX_FS_TEMPS: /* no spilling atm */
- return 128 / 4;
+ return NV50_CAP_MAX_PROGRAM_TEMPS;
case PIPE_CAP_DEPTH_CLAMP:
return 1;
default:
diff --git a/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c b/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c
index 386dbda423d..dea8fa0663e 100644
--- a/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c
+++ b/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c
@@ -1850,7 +1850,7 @@ nv50_tgsi_to_nc(struct nv_pc *pc, struct nv50_translation_info *ti)
struct bld_context *bld = CALLOC_STRUCT(bld_context);
int c;
- pc->root = pc->current_block = new_basic_block(pc);
+ pc->root[0] = pc->current_block = new_basic_block(pc);
bld->pc = pc;
bld->ti = ti;