From c9343047cfc44039915e0b09fc94bd992559a982 Mon Sep 17 00:00:00 2001 From: Vadim Girlin Date: Wed, 23 Jan 2013 21:30:02 +0100 Subject: r600g: improve inputs/interpolation handling with llvm backend Get rid of special handling for reserved regs. Use one intrinsic for all kinds of interpolation. v2[Vincent Lejeune]: Rebased against current master Reviewed-by: Tom Stellard Signed-off-by: Vadim Girlin --- src/gallium/drivers/r600/r600_llvm.c | 195 +++++++++++-------------------- src/gallium/drivers/r600/r600_shader.c | 159 ++++++++++++------------- src/gallium/drivers/r600/r600_shader.h | 3 +- src/gallium/drivers/radeon/radeon_llvm.h | 3 +- 4 files changed, 151 insertions(+), 209 deletions(-) (limited to 'src/gallium') diff --git a/src/gallium/drivers/r600/r600_llvm.c b/src/gallium/drivers/r600/r600_llvm.c index 9f8a0954f75..913dccc53a2 100644 --- a/src/gallium/drivers/r600/r600_llvm.c +++ b/src/gallium/drivers/r600/r600_llvm.c @@ -83,48 +83,40 @@ static LLVMValueRef llvm_fetch_system_value( static LLVMValueRef llvm_load_input_helper( struct radeon_llvm_context * ctx, - const char *intrinsic, unsigned idx) + unsigned idx, int interp, int ij_index) { - LLVMValueRef reg = lp_build_const_int32( - ctx->soa.bld_base.base.gallivm, - idx); - return build_intrinsic( - ctx->soa.bld_base.base.gallivm->builder, - intrinsic, - ctx->soa.bld_base.base.elem_type, ®, 1, - LLVMReadNoneAttribute); + const struct lp_build_context * bb = &ctx->soa.bld_base.base; + LLVMValueRef arg[2]; + int arg_count; + const char * intrinsic; + + arg[0] = lp_build_const_int32(bb->gallivm, idx); + + if (interp) { + intrinsic = "llvm.R600.interp.input"; + arg[1] = lp_build_const_int32(bb->gallivm, ij_index); + arg_count = 2; + } else { + intrinsic = "llvm.R600.load.input"; + arg_count = 1; + } + + return build_intrinsic(bb->gallivm->builder, intrinsic, + bb->elem_type, &arg[0], arg_count, LLVMReadNoneAttribute); } static LLVMValueRef llvm_face_select_helper( struct radeon_llvm_context * ctx, - const char *intrinsic, unsigned face_register, - unsigned frontcolor_register, unsigned backcolor_regiser) + unsigned face_loc, LLVMValueRef front_color, LLVMValueRef back_color) { - - LLVMValueRef backcolor = llvm_load_input_helper( - ctx, - intrinsic, - backcolor_regiser); - LLVMValueRef front_color = llvm_load_input_helper( - ctx, - intrinsic, - frontcolor_register); - LLVMValueRef face = llvm_load_input_helper( - ctx, - "llvm.R600.load.input", - face_register); - LLVMValueRef is_face_positive = LLVMBuildFCmp( - ctx->soa.bld_base.base.gallivm->builder, - LLVMRealUGT, face, - lp_build_const_float(ctx->soa.bld_base.base.gallivm, 0.0f), - ""); - return LLVMBuildSelect( - ctx->soa.bld_base.base.gallivm->builder, - is_face_positive, - front_color, - backcolor, - ""); + const struct lp_build_context * bb = &ctx->soa.bld_base.base; + LLVMValueRef face = llvm_load_input_helper(ctx, face_loc, 0, 0); + LLVMValueRef is_front = LLVMBuildFCmp( + bb->gallivm->builder, LLVMRealUGT, face, + lp_build_const_float(bb->gallivm, 0.0f), ""); + return LLVMBuildSelect(bb->gallivm->builder, is_front, + front_color, back_color, ""); } static void llvm_load_input( @@ -132,110 +124,59 @@ static void llvm_load_input( unsigned input_index, const struct tgsi_full_declaration *decl) { + const struct r600_shader_io * input = &ctx->r600_inputs[input_index]; unsigned chan; - - const char *intrinsics = "llvm.R600.load.input"; - unsigned offset = 4 * ctx->reserved_reg_count; - - if (ctx->type == TGSI_PROCESSOR_FRAGMENT && ctx->chip_class >= EVERGREEN) { - switch (decl->Interp.Interpolate) { - case TGSI_INTERPOLATE_COLOR: - case TGSI_INTERPOLATE_PERSPECTIVE: - offset = 0; - intrinsics = "llvm.R600.load.input.perspective"; - break; - case TGSI_INTERPOLATE_LINEAR: - offset = 0; - intrinsics = "llvm.R600.load.input.linear"; - break; - case TGSI_INTERPOLATE_CONSTANT: - offset = 0; - intrinsics = "llvm.R600.load.input.constant"; - break; - default: - assert(0 && "Unknow Interpolate mode"); - } + unsigned interp = 0; + int ij_index; + int two_side = (ctx->two_side && input->name == TGSI_SEMANTIC_COLOR); + LLVMValueRef v; + + if (ctx->chip_class >= EVERGREEN && ctx->type == TGSI_PROCESSOR_FRAGMENT && + input->spi_sid) { + interp = 1; + ij_index = (input->interpolate > 0) ? input->ij_index : -1; } for (chan = 0; chan < 4; chan++) { - unsigned soa_index = radeon_llvm_reg_index_soa(input_index, - chan); - - switch (decl->Semantic.Name) { - case TGSI_SEMANTIC_FACE: - ctx->inputs[soa_index] = llvm_load_input_helper(ctx, - "llvm.R600.load.input", - 4 * ctx->face_input); - break; - case TGSI_SEMANTIC_POSITION: - if (ctx->type != TGSI_PROCESSOR_FRAGMENT || chan != 3) { - ctx->inputs[soa_index] = llvm_load_input_helper(ctx, - "llvm.R600.load.input", - soa_index + (ctx->reserved_reg_count * 4)); - } else { - LLVMValueRef w_coord = llvm_load_input_helper(ctx, - "llvm.R600.load.input", - soa_index + (ctx->reserved_reg_count * 4)); - ctx->inputs[soa_index] = LLVMBuildFDiv(ctx->gallivm.builder, - lp_build_const_float(&(ctx->gallivm), 1.0f), w_coord, ""); - } - break; - case TGSI_SEMANTIC_COLOR: - if (ctx->two_side) { - unsigned front_location, back_location; - unsigned back_reg = ctx->r600_inputs[input_index] - .potential_back_facing_reg; - if (ctx->chip_class >= EVERGREEN) { - front_location = 4 * ctx->r600_inputs[input_index].lds_pos + chan; - back_location = 4 * ctx->r600_inputs[back_reg].lds_pos + chan; - } else { - front_location = soa_index + 4 * ctx->reserved_reg_count; - back_location = radeon_llvm_reg_index_soa( - ctx->r600_inputs[back_reg].gpr, - chan); - } - ctx->inputs[soa_index] = llvm_face_select_helper(ctx, - intrinsics, - 4 * ctx->face_input, front_location, back_location); - break; - } - default: - { - unsigned location; - if (ctx->chip_class >= EVERGREEN) { - location = 4 * ctx->r600_inputs[input_index].lds_pos + chan; - } else { - location = soa_index + 4 * ctx->reserved_reg_count; - } - /* The * 4 is assuming that we are in soa mode. */ - ctx->inputs[soa_index] = llvm_load_input_helper(ctx, - intrinsics, location); - - break; - } + unsigned soa_index = radeon_llvm_reg_index_soa(input_index, chan); + int loc; + + if (interp) { + loc = 4 * input->lds_pos + chan; + } else { + if (input->name == TGSI_SEMANTIC_FACE) + loc = 4 * ctx->face_gpr; + else + loc = 4 * input->gpr + chan; + } + + v = llvm_load_input_helper(ctx, loc, interp, ij_index); + + if (two_side) { + struct r600_shader_io * back_input = + &ctx->r600_inputs[input->back_color_input]; + int back_loc = interp ? back_input->lds_pos : back_input->gpr; + LLVMValueRef v2; + + back_loc = 4 * back_loc + chan; + v2 = llvm_load_input_helper(ctx, back_loc, interp, ij_index); + v = llvm_face_select_helper(ctx, 4 * ctx->face_gpr, v, v2); + } else if (input->name == TGSI_SEMANTIC_POSITION && + ctx->type == TGSI_PROCESSOR_FRAGMENT && chan == 3) { + /* RCP for fragcoord.w */ + v = LLVMBuildFDiv(ctx->gallivm.builder, + lp_build_const_float(&(ctx->gallivm), 1.0f), + v, ""); } + + ctx->inputs[soa_index] = v; } } static void llvm_emit_prologue(struct lp_build_tgsi_context * bld_base) { struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); - struct lp_build_context * base = &bld_base->base; - unsigned i; - /* Reserve special input registers */ - for (i = 0; i < ctx->reserved_reg_count; i++) { - unsigned chan; - for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { - LLVMValueRef reg_index = lp_build_const_int32( - base->gallivm, - radeon_llvm_reg_index_soa(i, chan)); - lp_build_intrinsic_unary(base->gallivm->builder, - "llvm.AMDGPU.reserve.reg", - LLVMVoidTypeInContext(base->gallivm->context), - reg_index); - } - } } static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index c16e467c853..e8992ba5bed 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -650,19 +650,15 @@ static int tgsi_is_supported(struct r600_shader_ctx *ctx) return 0; } -static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input) +static void evergreen_interp_assign_ij_index(struct r600_shader_ctx *ctx, + int input) { - int i, r; - struct r600_bytecode_alu alu; - int gpr = 0, base_chan = 0; int ij_index = 0; if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_PERSPECTIVE) { - ij_index = 0; if (ctx->shader->input[input].centroid) ij_index++; } else if (ctx->shader->input[input].interpolate == TGSI_INTERPOLATE_LINEAR) { - ij_index = 0; /* if we have perspective add one */ if (ctx->input_perspective) { ij_index++; @@ -674,6 +670,16 @@ static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input) ij_index++; } + ctx->shader->input[input].ij_index = ij_index; +} + +static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input) +{ + int i, r; + struct r600_bytecode_alu alu; + int gpr = 0, base_chan = 0; + int ij_index = ctx->shader->input[input].ij_index; + /* work out gpr and base_chan from index */ gpr = ij_index / 2; base_chan = (2 * (ij_index % 2)) + 1; @@ -806,12 +812,13 @@ static int evergreen_interp_input(struct r600_shader_ctx *ctx, int index) if (ctx->shader->input[index].spi_sid) { ctx->shader->input[index].lds_pos = ctx->shader->nlds++; - if (!ctx->use_llvm) { - if (ctx->shader->input[index].interpolate > 0) { + if (ctx->shader->input[index].interpolate > 0) { + evergreen_interp_assign_ij_index(ctx, index); + if (!ctx->use_llvm) r = evergreen_interp_alu(ctx, index); - } else { + } else { + if (!ctx->use_llvm) r = evergreen_interp_flat(ctx, index); - } } } return r; @@ -857,11 +864,11 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) i = ctx->shader->ninput++; ctx->shader->input[i].name = d->Semantic.Name; ctx->shader->input[i].sid = d->Semantic.Index; - ctx->shader->input[i].spi_sid = r600_spi_sid(&ctx->shader->input[i]); ctx->shader->input[i].interpolate = d->Interp.Interpolate; ctx->shader->input[i].centroid = d->Interp.Centroid; ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + d->Range.First; if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { + ctx->shader->input[i].spi_sid = r600_spi_sid(&ctx->shader->input[i]); switch (ctx->shader->input[i].name) { case TGSI_SEMANTIC_FACE: ctx->face_gpr = ctx->shader->input[i].gpr; @@ -883,11 +890,11 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) i = ctx->shader->noutput++; ctx->shader->output[i].name = d->Semantic.Name; ctx->shader->output[i].sid = d->Semantic.Index; - ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]); ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + d->Range.First; ctx->shader->output[i].interpolate = d->Interp.Interpolate; ctx->shader->output[i].write_mask = d->Declaration.UsageMask; if (ctx->type == TGSI_PROCESSOR_VERTEX) { + ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]); switch (d->Semantic.Name) { case TGSI_SEMANTIC_CLIPDIST: ctx->shader->clip_dist_write |= d->Declaration.UsageMask << (d->Semantic.Index << 2); @@ -1193,17 +1200,9 @@ static int process_twoside_color_inputs(struct r600_shader_ctx *ctx) for (i = 0; i < count; i++) { if (ctx->shader->input[i].name == TGSI_SEMANTIC_COLOR) { - unsigned back_facing_reg = ctx->shader->input[i].potential_back_facing_reg; - if (ctx->bc->chip_class >= EVERGREEN) { - if ((r = evergreen_interp_input(ctx, back_facing_reg))) - return r; - } - - if (!ctx->use_llvm) { - r = select_twoside_color(ctx, i, back_facing_reg); - if (r) - return r; - } + r = select_twoside_color(ctx, i, ctx->shader->input[i].back_color_input); + if (r) + return r; } } return 0; @@ -1396,7 +1395,11 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, // TGSI to LLVM needs to know the lds position of inputs. // Non LLVM path computes it later (in process_twoside_color) ctx.shader->input[ni].lds_pos = next_lds_loc++; - ctx.shader->input[i].potential_back_facing_reg = ni; + ctx.shader->input[i].back_color_input = ni; + if (ctx.bc->chip_class >= EVERGREEN) { + if ((r = evergreen_interp_input(&ctx, ni))) + return r; + } } } } @@ -1408,10 +1411,9 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, LLVMModuleRef mod; unsigned dump = 0; memset(&radeon_llvm_ctx, 0, sizeof(radeon_llvm_ctx)); - radeon_llvm_ctx.reserved_reg_count = ctx.file_offset[TGSI_FILE_INPUT]; radeon_llvm_ctx.type = ctx.type; radeon_llvm_ctx.two_side = shader->two_side; - radeon_llvm_ctx.face_input = ctx.face_gpr; + radeon_llvm_ctx.face_gpr = ctx.face_gpr; radeon_llvm_ctx.r600_inputs = ctx.shader->input; radeon_llvm_ctx.r600_outputs = ctx.shader->output; radeon_llvm_ctx.color_buffer_count = MAX2(key.nr_cbufs , 1); @@ -1442,9 +1444,24 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, if (shader->fs_write_all && rscreen->chip_class >= EVERGREEN) shader->nr_ps_max_color_exports = 8; - if (ctx.fragcoord_input >= 0 && !use_llvm) { - if (ctx.bc->chip_class == CAYMAN) { - for (j = 0 ; j < 4; j++) { + if (!use_llvm) { + if (ctx.fragcoord_input >= 0) { + if (ctx.bc->chip_class == CAYMAN) { + for (j = 0 ; j < 4; j++) { + struct r600_bytecode_alu alu; + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); + alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr; + alu.src[0].chan = 3; + + alu.dst.sel = shader->input[ctx.fragcoord_input].gpr; + alu.dst.chan = j; + alu.dst.write = (j == 3); + alu.last = 1; + if ((r = r600_bytecode_add_alu(ctx.bc, &alu))) + return r; + } + } else { struct r600_bytecode_alu alu; memset(&alu, 0, sizeof(struct r600_bytecode_alu)); alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); @@ -1452,65 +1469,49 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen, alu.src[0].chan = 3; alu.dst.sel = shader->input[ctx.fragcoord_input].gpr; - alu.dst.chan = j; - alu.dst.write = (j == 3); + alu.dst.chan = 3; + alu.dst.write = 1; alu.last = 1; if ((r = r600_bytecode_add_alu(ctx.bc, &alu))) return r; } - } else { - struct r600_bytecode_alu alu; - memset(&alu, 0, sizeof(struct r600_bytecode_alu)); - alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE); - alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr; - alu.src[0].chan = 3; + } - alu.dst.sel = shader->input[ctx.fragcoord_input].gpr; - alu.dst.chan = 3; - alu.dst.write = 1; - alu.last = 1; - if ((r = r600_bytecode_add_alu(ctx.bc, &alu))) + if (shader->two_side && ctx.colors_used) { + if ((r = process_twoside_color_inputs(&ctx))) return r; } - } - - if (shader->two_side && ctx.colors_used) { - if ((r = process_twoside_color_inputs(&ctx))) - return r; - } - tgsi_parse_init(&ctx.parse, tokens); - while (!tgsi_parse_end_of_tokens(&ctx.parse)) { - tgsi_parse_token(&ctx.parse); - switch (ctx.parse.FullToken.Token.Type) { - case TGSI_TOKEN_TYPE_INSTRUCTION: - if (use_llvm) { - continue; + tgsi_parse_init(&ctx.parse, tokens); + while (!tgsi_parse_end_of_tokens(&ctx.parse)) { + tgsi_parse_token(&ctx.parse); + switch (ctx.parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_INSTRUCTION: + r = tgsi_is_supported(&ctx); + if (r) + goto out_err; + ctx.max_driver_temp_used = 0; + /* reserve first tmp for everyone */ + r600_get_temp(&ctx); + + opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode; + if ((r = tgsi_split_constant(&ctx))) + goto out_err; + if ((r = tgsi_split_literal_constant(&ctx))) + goto out_err; + if (ctx.bc->chip_class == CAYMAN) + ctx.inst_info = &cm_shader_tgsi_instruction[opcode]; + else if (ctx.bc->chip_class >= EVERGREEN) + ctx.inst_info = &eg_shader_tgsi_instruction[opcode]; + else + ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; + r = ctx.inst_info->process(&ctx); + if (r) + goto out_err; + break; + default: + break; } - r = tgsi_is_supported(&ctx); - if (r) - goto out_err; - ctx.max_driver_temp_used = 0; - /* reserve first tmp for everyone */ - r600_get_temp(&ctx); - - opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode; - if ((r = tgsi_split_constant(&ctx))) - goto out_err; - if ((r = tgsi_split_literal_constant(&ctx))) - goto out_err; - if (ctx.bc->chip_class == CAYMAN) - ctx.inst_info = &cm_shader_tgsi_instruction[opcode]; - else if (ctx.bc->chip_class >= EVERGREEN) - ctx.inst_info = &eg_shader_tgsi_instruction[opcode]; - else - ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; - r = ctx.inst_info->process(&ctx); - if (r) - goto out_err; - break; - default: - break; } } diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h index d61efcb1a72..f55e002aea6 100644 --- a/src/gallium/drivers/r600/r600_shader.h +++ b/src/gallium/drivers/r600/r600_shader.h @@ -32,9 +32,10 @@ struct r600_shader_io { int sid; int spi_sid; unsigned interpolate; + unsigned ij_index; boolean centroid; unsigned lds_pos; /* for evergreen */ - unsigned potential_back_facing_reg; + unsigned back_color_input; unsigned write_mask; }; diff --git a/src/gallium/drivers/radeon/radeon_llvm.h b/src/gallium/drivers/radeon/radeon_llvm.h index 1edcbd46d72..64d838ab46b 100644 --- a/src/gallium/drivers/radeon/radeon_llvm.h +++ b/src/gallium/drivers/radeon/radeon_llvm.h @@ -56,7 +56,7 @@ struct radeon_llvm_context { unsigned chip_class; unsigned type; - unsigned face_input; + unsigned face_gpr; unsigned two_side; unsigned clip_vertex; struct r600_shader_io * r600_inputs; @@ -108,7 +108,6 @@ struct radeon_llvm_context { LLVMValueRef system_values[RADEON_LLVM_MAX_SYSTEM_VALUES]; - unsigned reserved_reg_count; /*=== Private Members ===*/ struct radeon_llvm_branch branch[RADEON_LLVM_MAX_BRANCH_DEPTH]; -- cgit v1.2.3