diff options
-rw-r--r-- | src/mesa/drivers/dri/r300/compiler/r300_fragprog.c | 67 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c | 141 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/compiler/radeon_code.h | 17 | ||||
-rw-r--r-- | src/mesa/drivers/dri/r300/r300_state.c | 35 |
4 files changed, 145 insertions, 115 deletions
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c index fbffd3d7cdd..6c9fba49146 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c @@ -203,18 +203,21 @@ void r300FragmentProgramDump(struct rX00_fragment_program_code *c) fprintf(stderr, "Hardware program\n"); fprintf(stderr, "----------------\n"); - for (n = 0; n < (code->cur_node + 1); n++) { + for (n = 0; n <= (code->config & 3); n++) { + uint32_t code_addr = code->code_addr[3 - (code->config & 3) + n]; + int alu_offset = (code_addr & R300_ALU_START_MASK) >> R300_ALU_START_SHIFT; + int alu_end = (code_addr & R300_ALU_SIZE_MASK) >> R300_ALU_SIZE_SHIFT; + int tex_offset = (code_addr & R300_TEX_START_MASK) >> R300_TEX_START_SHIFT; + int tex_end = (code_addr & R300_TEX_SIZE_MASK) >> R300_TEX_SIZE_SHIFT; + fprintf(stderr, "NODE %d: alu_offset: %d, tex_offset: %d, " - "alu_end: %d, tex_end: %d, flags: %08x\n", n, - code->node[n].alu_offset, - code->node[n].tex_offset, - code->node[n].alu_end, code->node[n].tex_end, - code->node[n].flags); + "alu_end: %d, tex_end: %d (code_addr: %08x)\n", n, + alu_offset, tex_offset, alu_end, tex_end, code_addr); - if (n > 0 || code->first_node_has_tex) { + if (n > 0 || (code->config & R300_PFS_CNTL_FIRST_NODE_HAS_TEX)) { fprintf(stderr, " TEX:\n"); - for (i = code->node[n].tex_offset; - i <= code->node[n].tex_offset + code->node[n].tex_end; + for (i = tex_offset; + i <= tex_offset + tex_end; ++i) { const char *instr; @@ -252,8 +255,8 @@ void r300FragmentProgramDump(struct rX00_fragment_program_code *c) } } - for (i = code->node[n].alu_offset; - i <= code->node[n].alu_offset + code->node[n].alu_end; ++i) { + for (i = alu_offset; + i <= alu_offset + alu_end; ++i) { char srcc[3][10], dstc[20]; char srca[3][10], dsta[20]; char argc[3][20]; @@ -261,8 +264,8 @@ void r300FragmentProgramDump(struct rX00_fragment_program_code *c) char flags[5], tmp[10]; for (j = 0; j < 3; ++j) { - int regc = code->alu.inst[i].inst1 >> (j * 6); - int rega = code->alu.inst[i].inst3 >> (j * 6); + int regc = code->alu.inst[i].rgb_addr >> (j * 6); + int rega = code->alu.inst[i].alpha_addr >> (j * 6); sprintf(srcc[j], "%c%i", (regc & 32) ? 'c' : 't', regc & 31); @@ -273,45 +276,45 @@ void r300FragmentProgramDump(struct rX00_fragment_program_code *c) dstc[0] = 0; sprintf(flags, "%s%s%s", (code->alu.inst[i]. - inst1 & R300_ALU_DSTC_REG_X) ? "x" : "", + rgb_addr & R300_ALU_DSTC_REG_X) ? "x" : "", (code->alu.inst[i]. - inst1 & R300_ALU_DSTC_REG_Y) ? "y" : "", + rgb_addr & R300_ALU_DSTC_REG_Y) ? "y" : "", (code->alu.inst[i]. - inst1 & R300_ALU_DSTC_REG_Z) ? "z" : ""); + rgb_addr & R300_ALU_DSTC_REG_Z) ? "z" : ""); if (flags[0] != 0) { sprintf(dstc, "t%i.%s ", (code->alu.inst[i]. - inst1 >> R300_ALU_DSTC_SHIFT) & 31, + rgb_addr >> R300_ALU_DSTC_SHIFT) & 31, flags); } sprintf(flags, "%s%s%s", (code->alu.inst[i]. - inst1 & R300_ALU_DSTC_OUTPUT_X) ? "x" : "", + rgb_addr & R300_ALU_DSTC_OUTPUT_X) ? "x" : "", (code->alu.inst[i]. - inst1 & R300_ALU_DSTC_OUTPUT_Y) ? "y" : "", + rgb_addr & R300_ALU_DSTC_OUTPUT_Y) ? "y" : "", (code->alu.inst[i]. - inst1 & R300_ALU_DSTC_OUTPUT_Z) ? "z" : ""); + rgb_addr & R300_ALU_DSTC_OUTPUT_Z) ? "z" : ""); if (flags[0] != 0) { sprintf(tmp, "o%i.%s", (code->alu.inst[i]. - inst1 >> R300_ALU_DSTC_SHIFT) & 31, + rgb_addr >> R300_ALU_DSTC_SHIFT) & 31, flags); strcat(dstc, tmp); } dsta[0] = 0; - if (code->alu.inst[i].inst3 & R300_ALU_DSTA_REG) { + if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_REG) { sprintf(dsta, "t%i.w ", (code->alu.inst[i]. - inst3 >> R300_ALU_DSTA_SHIFT) & 31); + alpha_addr >> R300_ALU_DSTA_SHIFT) & 31); } - if (code->alu.inst[i].inst3 & R300_ALU_DSTA_OUTPUT) { + if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_OUTPUT) { sprintf(tmp, "o%i.w ", (code->alu.inst[i]. - inst3 >> R300_ALU_DSTA_SHIFT) & 31); + alpha_addr >> R300_ALU_DSTA_SHIFT) & 31); strcat(dsta, tmp); } - if (code->alu.inst[i].inst3 & R300_ALU_DSTA_DEPTH) { + if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_DEPTH) { strcat(dsta, "Z"); } @@ -319,12 +322,12 @@ void r300FragmentProgramDump(struct rX00_fragment_program_code *c) "%3i: xyz: %3s %3s %3s -> %-20s (%08x)\n" " w: %3s %3s %3s -> %-20s (%08x)\n", i, srcc[0], srcc[1], srcc[2], dstc, - code->alu.inst[i].inst1, srca[0], srca[1], - srca[2], dsta, code->alu.inst[i].inst3); + code->alu.inst[i].rgb_addr, srca[0], srca[1], + srca[2], dsta, code->alu.inst[i].alpha_addr); for (j = 0; j < 3; ++j) { - int regc = code->alu.inst[i].inst0 >> (j * 7); - int rega = code->alu.inst[i].inst2 >> (j * 7); + int regc = code->alu.inst[i].rgb_inst >> (j * 7); + int rega = code->alu.inst[i].alpha_inst >> (j * 7); int d; char buf[20]; @@ -406,8 +409,8 @@ void r300FragmentProgramDump(struct rX00_fragment_program_code *c) fprintf(stderr, " xyz: %8s %8s %8s op: %08x\n" " w: %8s %8s %8s op: %08x\n", argc[0], argc[1], argc[2], - code->alu.inst[i].inst0, arga[0], arga[1], - arga[2], code->alu.inst[i].inst2); + code->alu.inst[i].rgb_inst, arga[0], arga[1], + arga[2], code->alu.inst[i].alpha_inst); } } } diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c index 80b569ade67..305dc074ee8 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c @@ -46,8 +46,18 @@ #include "r300_fragprog_swizzle.h" +struct r300_emit_state { + struct r300_fragment_program_compiler * compiler; + + unsigned current_node : 2; + unsigned node_first_tex : 8; + unsigned node_first_alu : 8; + uint32_t node_flags; +}; + #define PROG_CODE \ - struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)data; \ + struct r300_emit_state * emit = (struct r300_emit_state*)data; \ + struct r300_fragment_program_compiler *c = emit->compiler; \ struct r300_fragment_program_code *code = &c->code->code.r300 #define error(fmt, args...) do { \ @@ -61,8 +71,8 @@ */ static void use_temporary(struct r300_fragment_program_code *code, GLuint index) { - if (index > code->max_temp_idx) - code->max_temp_idx = index; + if (index > code->pixsize) + code->pixsize = index; } @@ -121,62 +131,61 @@ static GLboolean emit_alu(void* data, struct radeon_pair_instruction* inst) int ip = code->alu.length++; int j; - code->node[code->cur_node].alu_end++; - code->alu.inst[ip].inst0 = translate_rgb_opcode(c, inst->RGB.Opcode); - code->alu.inst[ip].inst2 = translate_alpha_opcode(c, inst->Alpha.Opcode); + code->alu.inst[ip].rgb_inst = translate_rgb_opcode(c, inst->RGB.Opcode); + code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode); for(j = 0; j < 3; ++j) { GLuint src = inst->RGB.Src[j].Index | (inst->RGB.Src[j].Constant << 5); if (!inst->RGB.Src[j].Constant) use_temporary(code, inst->RGB.Src[j].Index); - code->alu.inst[ip].inst1 |= src << (6*j); + code->alu.inst[ip].rgb_addr |= src << (6*j); src = inst->Alpha.Src[j].Index | (inst->Alpha.Src[j].Constant << 5); if (!inst->Alpha.Src[j].Constant) use_temporary(code, inst->Alpha.Src[j].Index); - code->alu.inst[ip].inst3 |= src << (6*j); + code->alu.inst[ip].alpha_addr |= src << (6*j); GLuint arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle); arg |= inst->RGB.Arg[j].Abs << 6; arg |= inst->RGB.Arg[j].Negate << 5; - code->alu.inst[ip].inst0 |= arg << (7*j); + code->alu.inst[ip].rgb_inst |= arg << (7*j); arg = r300FPTranslateAlphaSwizzle(inst->Alpha.Arg[j].Source, inst->Alpha.Arg[j].Swizzle); arg |= inst->Alpha.Arg[j].Abs << 6; arg |= inst->Alpha.Arg[j].Negate << 5; - code->alu.inst[ip].inst2 |= arg << (7*j); + code->alu.inst[ip].alpha_inst |= arg << (7*j); } if (inst->RGB.Saturate) - code->alu.inst[ip].inst0 |= R300_ALU_OUTC_CLAMP; + code->alu.inst[ip].rgb_inst |= R300_ALU_OUTC_CLAMP; if (inst->Alpha.Saturate) - code->alu.inst[ip].inst2 |= R300_ALU_OUTA_CLAMP; + code->alu.inst[ip].alpha_inst |= R300_ALU_OUTA_CLAMP; if (inst->RGB.WriteMask) { use_temporary(code, inst->RGB.DestIndex); - code->alu.inst[ip].inst1 |= + code->alu.inst[ip].rgb_addr |= (inst->RGB.DestIndex << R300_ALU_DSTC_SHIFT) | (inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT); } if (inst->RGB.OutputWriteMask) { - code->alu.inst[ip].inst1 |= (inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT); - code->node[code->cur_node].flags |= R300_RGBA_OUT; + code->alu.inst[ip].rgb_addr |= (inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT); + emit->node_flags |= R300_RGBA_OUT; } if (inst->Alpha.WriteMask) { use_temporary(code, inst->Alpha.DestIndex); - code->alu.inst[ip].inst3 |= + code->alu.inst[ip].alpha_addr |= (inst->Alpha.DestIndex << R300_ALU_DSTA_SHIFT) | R300_ALU_DSTA_REG; } if (inst->Alpha.OutputWriteMask) { - code->alu.inst[ip].inst3 |= R300_ALU_DSTA_OUTPUT; - code->node[code->cur_node].flags |= R300_RGBA_OUT; + code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_OUTPUT; + emit->node_flags |= R300_RGBA_OUT; } if (inst->Alpha.DepthWriteMask) { - code->alu.inst[ip].inst3 |= R300_ALU_DSTA_DEPTH; - code->node[code->cur_node].flags |= R300_W_OUT; + code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_DEPTH; + emit->node_flags |= R300_W_OUT; c->code->writes_depth = GL_TRUE; } @@ -187,31 +196,50 @@ static GLboolean emit_alu(void* data, struct radeon_pair_instruction* inst) /** * Finish the current node without advancing to the next one. */ -static GLboolean finish_node(struct r300_fragment_program_compiler *c) +static GLboolean finish_node(struct r300_emit_state * emit) { - struct r300_fragment_program_code *code = &c->code->code.r300; - struct r300_fragment_program_node *node = &code->node[code->cur_node]; + struct r300_fragment_program_compiler * c = emit->compiler; + struct r300_fragment_program_code *code = &emit->compiler->code->code.r300; - if (node->alu_end < 0) { + if (code->alu.length == emit->node_first_alu) { /* Generate a single NOP for this node */ struct radeon_pair_instruction inst; _mesa_bzero(&inst, sizeof(inst)); - if (!emit_alu(c, &inst)) + if (!emit_alu(emit, &inst)) return GL_FALSE; } - if (node->tex_end < 0) { - if (code->cur_node == 0) { - node->tex_end = 0; - } else { - error("Node %i has no TEX instructions", code->cur_node); + unsigned alu_offset = emit->node_first_alu; + unsigned alu_end = code->alu.length - alu_offset - 1; + unsigned tex_offset = emit->node_first_tex; + unsigned tex_end = code->tex.length - tex_offset - 1; + + if (code->tex.length == emit->node_first_tex) { + if (emit->current_node > 0) { + error("Node %i has no TEX instructions", emit->current_node); return GL_FALSE; } + + tex_end = 0; } else { - if (code->cur_node == 0) - code->first_node_has_tex = 1; + if (emit->current_node == 0) + code->config |= R300_PFS_CNTL_FIRST_NODE_HAS_TEX; } + /* Write the config register. + * Note: The order in which the words for each node are written + * is not correct here and needs to be fixed up once we're entirely + * done + * + * Also note that the register specification from AMD is slightly + * incorrect in its description of this register. */ + code->code_addr[emit->current_node] = + (alu_offset << R300_ALU_START_SHIFT) | + (alu_end << R300_ALU_SIZE_SHIFT) | + (tex_offset << R300_TEX_START_SHIFT) | + (tex_end << R300_TEX_SIZE_SHIFT) | + emit->node_flags; + return GL_TRUE; } @@ -224,25 +252,23 @@ static GLboolean begin_tex(void* data) { PROG_CODE; - if (code->cur_node == 0) { - if (code->node[0].alu_end < 0 && - code->node[0].tex_end < 0) - return GL_TRUE; + if (code->alu.length == emit->node_first_alu && + code->tex.length == emit->node_first_tex) { + return GL_TRUE; } - if (code->cur_node == 3) { + if (emit->current_node == 3) { error("Too many texture indirections"); return GL_FALSE; } - if (!finish_node(c)) + if (!finish_node(emit)) return GL_FALSE; - struct r300_fragment_program_node *node = &code->node[++code->cur_node]; - node->alu_offset = code->alu.length; - node->alu_end = -1; - node->tex_offset = code->tex.length; - node->tex_end = -1; + emit->current_node++; + emit->node_first_tex = code->tex.length; + emit->node_first_alu = code->alu.length; + emit->node_flags = 0; return GL_TRUE; } @@ -279,7 +305,6 @@ static GLboolean emit_tex(void* data, struct radeon_pair_texture_instruction* in use_temporary(code, inst->SrcIndex); - code->node[code->cur_node].tex_end++; code->tex.inst[code->tex.length++] = (inst->SrcIndex << R300_SRC_ADDR_SHIFT) | (dest << R300_DST_ADDR_SHIFT) | @@ -302,16 +327,34 @@ static const struct radeon_pair_handler pair_handler = { */ void r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler) { + struct r300_emit_state emit; struct r300_fragment_program_code *code = &compiler->code->code.r300; + memset(&emit, 0, sizeof(emit)); + emit.compiler = compiler; + _mesa_bzero(code, sizeof(struct r300_fragment_program_code)); - code->node[0].alu_end = -1; - code->node[0].tex_end = -1; - radeonPairProgram(compiler, &pair_handler, compiler); + radeonPairProgram(compiler, &pair_handler, &emit); if (compiler->Base.Error) return; - finish_node(compiler); + /* Finish the program */ + finish_node(&emit); + + code->config |= emit.current_node; /* FIRST_NODE_HAS_TEX set by finish_node */ + code->code_offset = + (0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT) | + ((code->alu.length-1) << R300_PFS_CNTL_ALU_END_SHIFT) | + (0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT) | + ((code->tex.length ? code->tex.length-1 : 0) << R300_PFS_CNTL_TEX_END_SHIFT); + + if (emit.current_node < 3) { + int shift = 3 - emit.current_node; + int i; + for(i = 0; i <= emit.current_node; ++i) + code->code_addr[shift + i] = code->code_addr[i]; + for(i = 0; i < shift; ++i) + code->code_addr[i] = 0; + } } - diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h index 77bc19d8ff9..6f5bc288316 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h @@ -136,18 +136,17 @@ struct r300_fragment_program_code { struct { int length; /**< total # of ALU instructions used */ struct { - uint32_t inst0; - uint32_t inst1; - uint32_t inst2; - uint32_t inst3; + uint32_t rgb_inst; + uint32_t rgb_addr; + uint32_t alpha_inst; + uint32_t alpha_addr; } inst[R300_PFS_MAX_ALU_INST]; } alu; - struct r300_fragment_program_node node[4]; - int cur_node; - int first_node_has_tex; - - int max_temp_idx; + uint32_t config; /* US_CONFIG */ + uint32_t pixsize; /* US_PIXSIZE */ + uint32_t code_offset; /* US_CODE_OFFSET */ + uint32_t code_addr[4]; /* US_CODE_ADDR */ }; diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 1f799d5a6ff..1ac14267d5e 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -2035,7 +2035,7 @@ static void r300SetupPixelShader(GLcontext *ctx) r300ContextPtr rmesa = R300_CONTEXT(ctx); struct r300_fragment_program *fp = rmesa->selected_fp; struct r300_fragment_program_code *code; - int i, k; + int i; code = &fp->code.code.r300; @@ -2048,33 +2048,18 @@ static void r300SetupPixelShader(GLcontext *ctx) rmesa->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_US_ALU_ALPHA_INST_0, code->alu.length); rmesa->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_US_ALU_ALPHA_ADDR_0, code->alu.length); for (i = 0; i < code->alu.length; i++) { - rmesa->hw.fpi[0].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst0; - rmesa->hw.fpi[1].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst1; - rmesa->hw.fpi[2].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst2; - rmesa->hw.fpi[3].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].inst3; + rmesa->hw.fpi[0].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].rgb_inst; + rmesa->hw.fpi[1].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].rgb_addr; + rmesa->hw.fpi[2].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].alpha_inst; + rmesa->hw.fpi[3].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].alpha_addr; } R300_STATECHANGE(rmesa, fp); - rmesa->hw.fp.cmd[R300_FP_CNTL0] = code->cur_node | (code->first_node_has_tex << 3); - rmesa->hw.fp.cmd[R300_FP_CNTL1] = code->max_temp_idx; - rmesa->hw.fp.cmd[R300_FP_CNTL2] = - (0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT) | - ((code->alu.length-1) << R300_PFS_CNTL_ALU_END_SHIFT) | - (0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT) | - ((code->tex.length ? code->tex.length-1 : 0) << R300_PFS_CNTL_TEX_END_SHIFT); - /* I just want to say, the way these nodes are stored.. weird.. */ - for (i = 0, k = (4 - (code->cur_node + 1)); i < 4; i++, k++) { - if (i < (code->cur_node + 1)) { - rmesa->hw.fp.cmd[R300_FP_NODE0 + k] = - (code->node[i].alu_offset << R300_ALU_START_SHIFT) | - (code->node[i].alu_end << R300_ALU_SIZE_SHIFT) | - (code->node[i].tex_offset << R300_TEX_START_SHIFT) | - (code->node[i].tex_end << R300_TEX_SIZE_SHIFT) | - code->node[i].flags; - } else { - rmesa->hw.fp.cmd[R300_FP_NODE0 + (3 - i)] = 0; - } - } + rmesa->hw.fp.cmd[R300_FP_CNTL0] = code->config; + rmesa->hw.fp.cmd[R300_FP_CNTL1] = code->pixsize; + rmesa->hw.fp.cmd[R300_FP_CNTL2] = code->code_offset; + for (i = 0; i < 4; i++) + rmesa->hw.fp.cmd[R300_FP_NODE0 + i] = code->code_addr[i]; R300_STATECHANGE(rmesa, fpp); rmesa->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_PFS_PARAM_0_X, fp->code.constants.Count * 4); |