diff options
-rw-r--r-- | src/gallium/drivers/nv50/nv50_pc.c | 48 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_pc.h | 12 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_pc_optimize.c | 56 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_pc_regalloc.c | 23 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_program.c | 63 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_program.h | 16 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_screen.c | 3 | ||||
-rw-r--r-- | src/gallium/drivers/nv50/nv50_tgsi_to_nc.c | 2 |
8 files changed, 171 insertions, 52 deletions
diff --git a/src/gallium/drivers/nv50/nv50_pc.c b/src/gallium/drivers/nv50/nv50_pc.c index e34c0553eb4..c54f16e4c53 100644 --- a/src/gallium/drivers/nv50/nv50_pc.c +++ b/src/gallium/drivers/nv50/nv50_pc.c @@ -304,7 +304,7 @@ nv_pc_pass_in_order(struct nv_basic_block *root, nv_pc_pass_func f, void *priv) } static void -nv_do_print_program(void *priv, struct nv_basic_block *b) +nv_do_print_function(void *priv, struct nv_basic_block *b) { struct nv_instruction *i = b->phi; @@ -323,11 +323,23 @@ nv_do_print_program(void *priv, struct nv_basic_block *b) } void -nv_print_program(struct nv_basic_block *root) +nv_print_function(struct nv_basic_block *root) { - nv_pc_pass_in_order(root, nv_do_print_program, root); + if (root->subroutine) + debug_printf("SUBROUTINE %i\n", root->subroutine); + else + debug_printf("MAIN\n"); - debug_printf("END\n\n"); + nv_pc_pass_in_order(root, nv_do_print_function, root); +} + +void +nv_print_program(struct nv_pc *pc) +{ + int i; + for (i = 0; i < pc->num_subroutines + 1; ++i) + if (pc->root[i]) + nv_print_function(pc->root[i]); } static INLINE void @@ -388,11 +400,18 @@ nv50_generate_code(struct nv50_translation_info *ti) if (!pc) return 1; + pc->root = CALLOC(ti->subr_nr + 1, sizeof(pc->root[0])); + if (!pc->root) { + FREE(pc); + return 1; + } + pc->num_subroutines = ti->subr_nr; + ret = nv50_tgsi_to_nc(pc, ti); if (ret) goto out; #ifdef NV50PC_DEBUG - nv_print_program(pc->root); + nv_print_program(pc); #endif /* optimization */ @@ -400,7 +419,7 @@ nv50_generate_code(struct nv50_translation_info *ti) if (ret) goto out; #ifdef NV50PC_DEBUG - nv_print_program(pc->root); + nv_print_program(pc); #endif /* register allocation */ @@ -408,7 +427,7 @@ nv50_generate_code(struct nv50_translation_info *ti) if (ret) goto out; #ifdef NV50PC_DEBUG - nv_print_program(pc->root); + nv_print_program(pc); #endif /* prepare for emission */ @@ -441,16 +460,19 @@ nv50_generate_code(struct nv50_translation_info *ti) out: nv_pc_free_refs(pc); - if (ret) { + + if (pc->bb_list) + FREE(pc->bb_list); + + if (ret) { /* on success, these will be referenced by nv50_program */ if (pc->emit) - free(pc->emit); + FREE(pc->emit); if (pc->immd_buf) - free(pc->immd_buf); + FREE(pc->immd_buf); if (pc->fixups) - free(pc->fixups); + FREE(pc->fixups); } - free(pc); - + FREE(pc); return ret; } diff --git a/src/gallium/drivers/nv50/nv50_pc.h b/src/gallium/drivers/nv50/nv50_pc.h index 703d32d334e..d9cc775572e 100644 --- a/src/gallium/drivers/nv50/nv50_pc.h +++ b/src/gallium/drivers/nv50/nv50_pc.h @@ -282,7 +282,7 @@ struct nv_basic_block { ubyte in_kind[8]; int id; - struct nv_basic_block *last_visitor; + int subroutine; uint priv; uint pass_seq; @@ -314,10 +314,10 @@ nv_fixup_apply(uint32_t *bin, struct nv_fixup *fixup, uint32_t data) bin[fixup->offset / 4] = val; } -struct nv_pc { - struct nv50_translation_info *ti; +struct nv50_translation_info; - struct nv_basic_block *root; +struct nv_pc { + struct nv_basic_block **root; struct nv_basic_block *current_block; struct nv_basic_block *parent_block; @@ -332,6 +332,7 @@ struct nv_pc { int num_instructions; int num_refs; int num_blocks; + int num_subroutines; int max_reg[4]; @@ -463,7 +464,8 @@ void nv_print_instruction(struct nv_instruction *); /* nv50_pc.c */ -void nv_print_program(struct nv_basic_block *b); +void nv_print_function(struct nv_basic_block *root); +void nv_print_program(struct nv_pc *); boolean nv_op_commutative(uint opcode); int nv50_indirect_opnd(struct nv_instruction *); diff --git a/src/gallium/drivers/nv50/nv50_pc_optimize.c b/src/gallium/drivers/nv50/nv50_pc_optimize.c index 1ed50321754..4f5bdc1f9fb 100644 --- a/src/gallium/drivers/nv50/nv50_pc_optimize.c +++ b/src/gallium/drivers/nv50/nv50_pc_optimize.c @@ -213,23 +213,36 @@ nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b) pc->bin_size += b->bin_size *= 4; } -int -nv_pc_exec_pass2(struct nv_pc *pc) +static int +nv_pc_pass2(struct nv_pc *pc, struct nv_basic_block *root) { struct nv_pass pass; pass.pc = pc; pc->pass_seq++; - nv_pass_flatten(&pass, pc->root); + + nv_pass_flatten(&pass, root); + + nv_pc_pass_in_order(root, nv_pc_pass_pre_emission, pc); + + return 0; +} + +int +nv_pc_exec_pass2(struct nv_pc *pc) +{ + int i, ret; NV50_DBGMSG("preparing %u blocks for emission\n", pc->num_blocks); - pc->bb_list = CALLOC(pc->num_blocks, sizeof(struct nv_basic_block *)); - pc->num_blocks = 0; + pc->bb_list = CALLOC(pc->num_blocks, sizeof(pc->bb_list[0])); - nv_pc_pass_in_order(pc->root, nv_pc_pass_pre_emission, pc); + pc->num_blocks = 0; + for (i = 0; i < pc->num_subroutines + 1; ++i) + if (pc->root[i] && (ret = nv_pc_pass2(pc, pc->root[i]))) + return ret; return 0; } @@ -1032,8 +1045,8 @@ nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b) return 0; } -int -nv_pc_exec_pass0(struct nv_pc *pc) +static int +nv_pc_pass0(struct nv_pc *pc, struct nv_basic_block *root) { struct nv_pass_reld_elim *reldelim; struct nv_pass pass; @@ -1047,35 +1060,35 @@ nv_pc_exec_pass0(struct nv_pc *pc) * to whether sources are supported memory loads. */ pc->pass_seq++; - ret = nv_pass_lower_arith(&pass, pc->root); + ret = nv_pass_lower_arith(&pass, root); if (ret) return ret; pc->pass_seq++; - ret = nv_pass_fold_loads(&pass, pc->root); + ret = nv_pass_fold_loads(&pass, root); if (ret) return ret; pc->pass_seq++; - ret = nv_pass_fold_stores(&pass, pc->root); + ret = nv_pass_fold_stores(&pass, root); if (ret) return ret; reldelim = CALLOC_STRUCT(nv_pass_reld_elim); reldelim->pc = pc; pc->pass_seq++; - ret = nv_pass_reload_elim(reldelim, pc->root); + ret = nv_pass_reload_elim(reldelim, root); FREE(reldelim); if (ret) return ret; pc->pass_seq++; - ret = nv_pass_cse(&pass, pc->root); + ret = nv_pass_cse(&pass, root); if (ret) return ret; pc->pass_seq++; - ret = nv_pass_lower_mods(&pass, pc->root); + ret = nv_pass_lower_mods(&pass, root); if (ret) return ret; @@ -1083,14 +1096,25 @@ nv_pc_exec_pass0(struct nv_pc *pc) do { dce.removed = 0; pc->pass_seq++; - ret = nv_pass_dce(&dce, pc->root); + ret = nv_pass_dce(&dce, root); if (ret) return ret; } while (dce.removed); - ret = nv_pass_tex_mask(&pass, pc->root); + ret = nv_pass_tex_mask(&pass, root); if (ret) return ret; return ret; } + +int +nv_pc_exec_pass0(struct nv_pc *pc) +{ + int i, ret; + + for (i = 0; i < pc->num_subroutines + 1; ++i) + if (pc->root[i] && (ret = nv_pc_pass0(pc, pc->root[i]))) + return ret; + return 0; +} diff --git a/src/gallium/drivers/nv50/nv50_pc_regalloc.c b/src/gallium/drivers/nv50/nv50_pc_regalloc.c index d401706b5bc..2998343db52 100644 --- a/src/gallium/drivers/nv50/nv50_pc_regalloc.c +++ b/src/gallium/drivers/nv50/nv50_pc_regalloc.c @@ -874,8 +874,8 @@ pass_linear_scan(struct nv_pc_pass *ctx, int iter) return 0; } -int -nv_pc_exec_pass1(struct nv_pc *pc) +static int +nv_pc_pass1(struct nv_pc *pc, struct nv_basic_block *root) { struct nv_pc_pass *ctx; int i, ret; @@ -890,12 +890,12 @@ nv_pc_exec_pass1(struct nv_pc *pc) ctx->insns = CALLOC(NV_PC_MAX_INSTRUCTIONS, sizeof(struct nv_instruction *)); pc->pass_seq++; - ret = pass_generate_phi_movs(ctx, pc->root); + ret = pass_generate_phi_movs(ctx, root); assert(!ret); for (i = 0; i < pc->loop_nesting_bound; ++i) { pc->pass_seq++; - ret = pass_build_live_sets(ctx, pc->root); + ret = pass_build_live_sets(ctx, root); assert(!ret && "live sets"); if (ret) { NOUVEAU_ERR("failed to build live sets (iteration %d)\n", i); @@ -904,10 +904,10 @@ nv_pc_exec_pass1(struct nv_pc *pc) } pc->pass_seq++; - nv_pc_pass_in_order(pc->root, pass_order_instructions, ctx); + nv_pc_pass_in_order(root, pass_order_instructions, ctx); pc->pass_seq++; - ret = pass_build_intervals(ctx, pc->root); + ret = pass_build_intervals(ctx, root); assert(!ret && "build intervals"); if (ret) { NOUVEAU_ERR("failed to build live intervals\n"); @@ -944,3 +944,14 @@ out: FREE(ctx); return ret; } + +int +nv_pc_exec_pass1(struct nv_pc *pc) +{ + int i, ret; + + for (i = 0; i < pc->num_subroutines + 1; ++i) + if (pc->root[i] && (ret = nv_pc_pass1(pc, pc->root[i]))) + return ret; + return 0; +} diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index d7d3030e2f6..925028700cd 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -147,10 +147,17 @@ prog_inst(struct nv50_translation_info *ti, int s, c, k; unsigned mask; + if (inst->Instruction.Opcode == TGSI_OPCODE_BGNSUB) { + ti->subr[ti->subr_nr].pos = id - 1; + ti->subr[ti->subr_nr].id = ti->subr_nr + 1; /* id 0 is main program */ + ++ti->subr_nr; + } + if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) { + dst = &inst->Dst[0].Register; + for (c = 0; c < 4; ++c) { - dst = &inst->Dst[0].Register; - if (inst->Dst[0].Register.Indirect) + if (dst->Indirect) nv50_indirect_outputs(ti, id); if (!(dst->WriteMask & (1 << c))) continue; @@ -182,6 +189,44 @@ prog_inst(struct nv50_translation_info *ti, } } +/* Probably should introduce something like struct tgsi_function_declaration + * instead of trying to guess inputs/outputs. + */ +static void +prog_subroutine_inst(struct nv50_subroutine *subr, + const struct tgsi_full_instruction *inst) +{ + const struct tgsi_dst_register *dst; + const struct tgsi_src_register *src; + int s, c, k; + unsigned mask; + + for (s = 0; s < inst->Instruction.NumSrcRegs; ++s) { + src = &inst->Src[s].Register; + if (src->File != TGSI_FILE_TEMPORARY) + continue; + mask = nv50_tgsi_src_mask(inst, s); + + assert(!inst->Src[s].Register.Indirect); + + for (c = 0; c < 4; ++c) { + k = tgsi_util_get_full_src_register_swizzle(&inst->Src[s], c); + + if ((mask & (1 << c)) && k < TGSI_SWIZZLE_W) + if (!(subr->retv[src->Index / 32][k] & (1 << (src->Index % 32)))) + subr->argv[src->Index / 32][k] |= 1 << (src->Index % 32); + } + } + + if (inst->Dst[0].Register.File == TGSI_FILE_TEMPORARY) { + dst = &inst->Dst[0].Register; + + for (c = 0; c < 4; ++c) + if (dst->WriteMask & (1 << c)) + subr->retv[dst->Index / 32][c] |= 1 << (dst->Index % 32); + } +} + static void prog_immediate(struct nv50_translation_info *ti, const struct tgsi_full_immediate *imm) @@ -482,7 +527,7 @@ nv50_prog_scan(struct nv50_translation_info *ti) { struct nv50_program *p = ti->p; struct tgsi_parse_context parse; - int ret; + int ret, i; p->vp.edgeflag = 0x40; p->vp.psiz = 0x40; @@ -496,6 +541,9 @@ nv50_prog_scan(struct nv50_translation_info *ti) tgsi_dump(p->pipe.tokens, 0); #endif + ti->subr = + CALLOC(ti->scan.opcode_count[TGSI_OPCODE_BGNSUB], sizeof(ti->subr[0])); + ti->immd32 = (uint32_t *)MALLOC(ti->scan.immediate_count * 16); ti->immd32_ty = (ubyte *)MALLOC(ti->scan.immediate_count * sizeof(ubyte)); @@ -519,6 +567,13 @@ nv50_prog_scan(struct nv50_translation_info *ti) } } + /* Scan to determine which registers are inputs/outputs of a subroutine. */ + for (i = 0; i < ti->subr_nr; ++i) { + int pc = ti->subr[i].id; + while (ti->insns[pc].Instruction.Opcode != TGSI_OPCODE_ENDSUB) + prog_subroutine_inst(&ti->subr[i], &ti->insns[pc++]); + } + p->in_nr = ti->scan.file_max[TGSI_FILE_INPUT] + 1; p->out_nr = ti->scan.file_max[TGSI_FILE_OUTPUT] + 1; @@ -572,6 +627,8 @@ out: FREE(ti->immd32_ty); if (ti->insns) FREE(ti->insns); + if (ti->subr) + FREE(ti->subr); FREE(ti); return ret ? FALSE : TRUE; } diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h index 3c3f1f7f970..918baf325f5 100644 --- a/src/gallium/drivers/nv50/nv50_program.h +++ b/src/gallium/drivers/nv50/nv50_program.h @@ -27,6 +27,8 @@ #include "tgsi/tgsi_scan.h" #include "nouveau/nouveau_class.h" +#define NV50_CAP_MAX_PROGRAM_TEMPS (128 / 4) + struct nv50_varying { uint8_t id; /* tgsi index */ uint8_t hw; /* hw index, nv50 wants flat FP inputs last */ @@ -92,13 +94,13 @@ struct nv50_program { #define NV50_INTERP_FLAT (1 << 1) #define NV50_INTERP_CENTROID (1 << 2) -#define NV50_PROG_MAX_SUBROUTINES 8 - /* analyze TGSI and see which TEMP[] are used as subroutine inputs/outputs */ struct nv50_subroutine { - int id; - uint32_t argv[4][1]; /* 4 bitmasks, for each of xyzw, only allow 32 TEMPs */ - uint32_t retv[4][1]; + unsigned id; + unsigned pos; + /* function inputs and outputs */ + uint32_t argv[NV50_CAP_MAX_PROGRAM_TEMPS][4]; + uint32_t retv[NV50_CAP_MAX_PROGRAM_TEMPS][4]; }; struct nv50_translation_info { @@ -119,8 +121,8 @@ struct nv50_translation_info { unsigned immd32_nr; ubyte *immd32_ty; ubyte edgeflag_out; - struct nv50_subroutine subr[NV50_PROG_MAX_SUBROUTINES]; - int subr_nr; + struct nv50_subroutine *subr; + unsigned subr_nr; }; int nv50_generate_code(struct nv50_translation_info *ti); diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index fc75d81d549..c1efa443daf 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -26,6 +26,7 @@ #include "nv50_context.h" #include "nv50_screen.h" #include "nv50_resource.h" +#include "nv50_program.h" #include "nouveau/nouveau_stateobj.h" @@ -152,7 +153,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return 0; case PIPE_CAP_MAX_VS_TEMPS: case PIPE_CAP_MAX_FS_TEMPS: /* no spilling atm */ - return 128 / 4; + return NV50_CAP_MAX_PROGRAM_TEMPS; case PIPE_CAP_DEPTH_CLAMP: return 1; default: diff --git a/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c b/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c index 386dbda423d..dea8fa0663e 100644 --- a/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c +++ b/src/gallium/drivers/nv50/nv50_tgsi_to_nc.c @@ -1850,7 +1850,7 @@ nv50_tgsi_to_nc(struct nv_pc *pc, struct nv50_translation_info *ti) struct bld_context *bld = CALLOC_STRUCT(bld_context); int c; - pc->root = pc->current_block = new_basic_block(pc); + pc->root[0] = pc->current_block = new_basic_block(pc); bld->pc = pc; bld->ti = ti; |