diff options
Diffstat (limited to 'src/gallium/drivers')
149 files changed, 2767 insertions, 2802 deletions
diff --git a/src/gallium/drivers/i915/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c index 94c0c69fcae..def9a03d377 100644 --- a/src/gallium/drivers/i915/i915_fpc_translate.c +++ b/src/gallium/drivers/i915/i915_fpc_translate.c @@ -512,6 +512,22 @@ i915_translate_instruction(struct i915_fp_compile *p, emit_simple_arith(p, inst, A0_ADD, 2, fs); break; + case TGSI_OPCODE_CEIL: + src0 = src_vector(p, &inst->Src[0], fs); + tmp = i915_get_utemp(p); + flags = get_result_flags(inst); + i915_emit_arith(p, + A0_FLR, + tmp, + flags & A0_DEST_CHANNEL_ALL, 0, + negate(src0, 1, 1, 1, 1), 0, 0); + i915_emit_arith(p, + A0_MOV, + get_result_vector(p, &inst->Dst[0]), + flags, 0, + negate(tmp, 1, 1, 1, 1), 0, 0); + break; + case TGSI_OPCODE_CMP: src0 = src_vector(p, &inst->Src[0], fs); src1 = src_vector(p, &inst->Src[1], fs); diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend.h b/src/gallium/drivers/llvmpipe/lp_bld_blend.h index f82ae30bb7d..c0c95a27129 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_blend.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend.h @@ -64,12 +64,14 @@ lp_build_blend_func(struct lp_build_context *bld, LLVMValueRef lp_build_blend_aos(struct gallivm_state *gallivm, const struct pipe_blend_state *blend, + const enum pipe_format *cbuf_format, struct lp_type type, unsigned rt, LLVMValueRef src, LLVMValueRef dst, + LLVMValueRef mask, LLVMValueRef const_, - unsigned alpha_swizzle); + const unsigned char swizzle[4]); void diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c index c342346a36e..59d5f545966 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c @@ -45,12 +45,14 @@ #include "pipe/p_state.h" #include "util/u_debug.h" +#include "util/u_format.h" #include "gallivm/lp_bld_type.h" #include "gallivm/lp_bld_const.h" #include "gallivm/lp_bld_arit.h" #include "gallivm/lp_bld_logic.h" #include "gallivm/lp_bld_swizzle.h" +#include "gallivm/lp_bld_bitarit.h" #include "gallivm/lp_bld_debug.h" #include "lp_bld_blend.h" @@ -300,25 +302,39 @@ lp_build_blend_func(struct lp_build_context *bld, } +/** + * Performs blending of src and dst pixels + * + * @param blend the blend state of the shader variant + * @param cbuf_format format of the colour buffer + * @param type data type of the pixel vector + * @param rt rt number + * @param src blend src + * @param dst blend dst + * @param mask optional mask to apply to the blending result + * @param const_ const blend color + * @param swizzle swizzle values for RGBA + * + * @return the result of blending src and dst + */ LLVMValueRef lp_build_blend_aos(struct gallivm_state *gallivm, const struct pipe_blend_state *blend, + const enum pipe_format *cbuf_format, struct lp_type type, unsigned rt, LLVMValueRef src, LLVMValueRef dst, + LLVMValueRef mask, LLVMValueRef const_, - unsigned alpha_swizzle) + const unsigned char swizzle[4]) { struct lp_build_blend_aos_context bld; LLVMValueRef src_term; LLVMValueRef dst_term; - - /* FIXME: color masking not implemented yet */ - assert(blend->rt[rt].colormask == 0xf); - - if(!blend->rt[rt].blend_enable) - return src; + LLVMValueRef result; + unsigned alpha_swizzle = swizzle[3]; + boolean fullcolormask; /* Setup build context */ memset(&bld, 0, sizeof bld); @@ -327,30 +343,59 @@ lp_build_blend_aos(struct gallivm_state *gallivm, bld.dst = dst; bld.const_ = const_; - /* TODO: There are still a few optimization opportunities here. For certain - * combinations it is possible to reorder the operations and therefore saving - * some instructions. */ + if (!blend->rt[rt].blend_enable) { + result = src; + } else { + + /* TODO: There are still a few optimization opportunities here. For certain + * combinations it is possible to reorder the operations and therefore saving + * some instructions. */ + + src_term = lp_build_blend_factor(&bld, src, blend->rt[rt].rgb_src_factor, + blend->rt[rt].alpha_src_factor, alpha_swizzle); + dst_term = lp_build_blend_factor(&bld, dst, blend->rt[rt].rgb_dst_factor, + blend->rt[rt].alpha_dst_factor, alpha_swizzle); + + lp_build_name(src_term, "src_term"); + lp_build_name(dst_term, "dst_term"); - src_term = lp_build_blend_factor(&bld, src, blend->rt[rt].rgb_src_factor, - blend->rt[rt].alpha_src_factor, alpha_swizzle); - dst_term = lp_build_blend_factor(&bld, dst, blend->rt[rt].rgb_dst_factor, - blend->rt[rt].alpha_dst_factor, alpha_swizzle); + if(blend->rt[rt].rgb_func == blend->rt[rt].alpha_func) { + result = lp_build_blend_func(&bld.base, blend->rt[rt].rgb_func, src_term, dst_term); + } + else { + /* Seperate RGB / A functions */ + + LLVMValueRef rgb; + LLVMValueRef alpha; - lp_build_name(src_term, "src_term"); - lp_build_name(dst_term, "dst_term"); + rgb = lp_build_blend_func(&bld.base, blend->rt[rt].rgb_func, src_term, dst_term); + alpha = lp_build_blend_func(&bld.base, blend->rt[rt].alpha_func, src_term, dst_term); - if(blend->rt[rt].rgb_func == blend->rt[rt].alpha_func) { - return lp_build_blend_func(&bld.base, blend->rt[rt].rgb_func, src_term, dst_term); + result = lp_build_blend_swizzle(&bld, rgb, alpha, LP_BUILD_BLEND_SWIZZLE_RGBA, alpha_swizzle); + } } - else { - /* Seperate RGB / A functions */ - LLVMValueRef rgb; - LLVMValueRef alpha; + /* Check if color mask is necessary */ + fullcolormask = util_format_colormask_full(util_format_description(cbuf_format[rt]), blend->rt[rt].colormask); + + if (!fullcolormask) { + LLVMValueRef color_mask; - rgb = lp_build_blend_func(&bld.base, blend->rt[rt].rgb_func, src_term, dst_term); - alpha = lp_build_blend_func(&bld.base, blend->rt[rt].alpha_func, src_term, dst_term); + color_mask = lp_build_const_mask_aos_swizzled(gallivm, bld.base.type, blend->rt[rt].colormask, swizzle); + lp_build_name(color_mask, "color_mask"); - return lp_build_blend_swizzle(&bld, rgb, alpha, LP_BUILD_BLEND_SWIZZLE_RGBA, alpha_swizzle); + /* Combine with input mask if necessary */ + if (mask) { + mask = lp_build_and(&bld.base, color_mask, mask); + } else { + mask = color_mask; + } + } + + /* Apply mask, if one exists */ + if (mask) { + result = lp_build_select(&bld.base, mask, result, dst); } + + return result; } diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 71d0ddf5e75..230b80a945f 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -290,6 +290,10 @@ lp_rast_triangle_3_16(struct lp_rasterizer_task *task, c = _mm_add_epi32(c, mm_mullo_epi32(dcdy, _mm_set1_epi32(y))); rej4 = _mm_slli_epi32(rej4, 2); + /* Adjust so we can just check the sign bit (< 0 comparison), instead of having to do a less efficient <= 0 comparison */ + c = _mm_sub_epi32(c, _mm_set1_epi32(1)); + rej4 = _mm_add_epi32(rej4, _mm_set1_epi32(1)); + dcdx2 = _mm_add_epi32(dcdx, dcdx); dcdx3 = _mm_add_epi32(dcdx2, dcdx); @@ -383,7 +387,7 @@ lp_rast_triangle_3_4(struct lp_rasterizer_task *task, __m128i span_1; /* 0,dcdx,2dcdx,3dcdx for plane 1 */ __m128i span_2; /* 0,dcdx,2dcdx,3dcdx for plane 2 */ __m128i unused; - + transpose4_epi32(&p0, &p1, &p2, &zero, &c, &dcdx, &dcdy, &unused); @@ -394,6 +398,9 @@ lp_rast_triangle_3_4(struct lp_rasterizer_task *task, c = _mm_add_epi32(c, mm_mullo_epi32(dcdx, _mm_set1_epi32(x))); c = _mm_add_epi32(c, mm_mullo_epi32(dcdy, _mm_set1_epi32(y))); + /* Adjust so we can just check the sign bit (< 0 comparison), instead of having to do a less efficient <= 0 comparison */ + c = _mm_sub_epi32(c, _mm_set1_epi32(1)); + dcdx2 = _mm_add_epi32(dcdx, dcdx); dcdx3 = _mm_add_epi32(dcdx2, dcdx); diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index b50c354fa9b..26d35debdaf 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -271,15 +271,13 @@ do_triangle_ccw(struct lp_setup_context *setup, */ int adj = (setup->pixel_offset != 0) ? 1 : 0; - bbox.x0 = (MIN3(x[0], x[1], x[2]) + (FIXED_ONE-1)) >> FIXED_ORDER; - bbox.x1 = (MAX3(x[0], x[1], x[2]) + (FIXED_ONE-1)) >> FIXED_ORDER; - bbox.y0 = (MIN3(y[0], y[1], y[2]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER; - bbox.y1 = (MAX3(y[0], y[1], y[2]) + (FIXED_ONE-1) + adj) >> FIXED_ORDER; + /* Inclusive x0, exclusive x1 */ + bbox.x0 = MIN3(x[0], x[1], x[2]) >> FIXED_ORDER; + bbox.x1 = (MAX3(x[0], x[1], x[2]) - 1) >> FIXED_ORDER; - /* Inclusive coordinates: - */ - bbox.x1--; - bbox.y1--; + /* Inclusive / exclusive depending upon adj (bottom-left or top-right) */ + bbox.y0 = (MIN3(y[0], y[1], y[2]) + adj) >> FIXED_ORDER; + bbox.y1 = (MAX3(y[0], y[1], y[2]) - 1 + adj) >> FIXED_ORDER; } if (bbox.x1 < bbox.x0 || diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index ec94190649c..2d2391e908c 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -924,6 +924,7 @@ generate_variant(struct llvmpipe_context *lp, const struct lp_fragment_shader_variant_key *key) { struct lp_fragment_shader_variant *variant; + const struct util_format_description *cbuf0_format_desc; boolean fullcolormask; variant = CALLOC_STRUCT(lp_fragment_shader_variant); @@ -942,12 +943,8 @@ generate_variant(struct llvmpipe_context *lp, */ fullcolormask = FALSE; if (key->nr_cbufs == 1) { - const struct util_format_description *format_desc; - format_desc = util_format_description(key->cbuf_format[0]); - if ((~key->blend.rt[0].colormask & - util_format_colormask(format_desc)) == 0) { - fullcolormask = TRUE; - } + cbuf0_format_desc = util_format_description(key->cbuf_format[0]); + fullcolormask = util_format_colormask_full(cbuf0_format_desc, key->blend.rt[0].colormask); } variant->opaque = diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c index b3ca134131d..51324cbb6a3 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c @@ -179,7 +179,9 @@ add_blend_test(struct gallivm_state *gallivm, LLVMValueRef res_ptr; LLVMBasicBlockRef block; LLVMBuilderRef builder; + const enum pipe_format format = PIPE_FORMAT_R8G8B8A8_UNORM; const unsigned rt = 0; + const unsigned char swizzle[4] = { 0, 1, 2, 3 }; vec_type = lp_build_vec_type(gallivm, type); @@ -205,7 +207,7 @@ add_blend_test(struct gallivm_state *gallivm, dst = LLVMBuildLoad(builder, dst_ptr, "dst"); con = LLVMBuildLoad(builder, const_ptr, "const"); - res = lp_build_blend_aos(gallivm, blend, type, rt, src, dst, con, 3); + res = lp_build_blend_aos(gallivm, blend, &format, type, rt, src, dst, NULL, con, swizzle); lp_build_name(res, "res"); diff --git a/src/gallium/drivers/nouveau/nouveau_buffer.c b/src/gallium/drivers/nouveau/nouveau_buffer.c index ff199debd74..936e2bf246a 100644 --- a/src/gallium/drivers/nouveau/nouveau_buffer.c +++ b/src/gallium/drivers/nouveau/nouveau_buffer.c @@ -523,8 +523,10 @@ nouveau_scratch_runout_release(struct nouveau_context *nv) { if (!nv->scratch.nr_runout) return; - while (nv->scratch.nr_runout--) + do { + --nv->scratch.nr_runout; nouveau_bo_ref(NULL, &nv->scratch.runout[nv->scratch.nr_runout]); + } while (nv->scratch.nr_runout); FREE(nv->scratch.runout); nv->scratch.end = 0; diff --git a/src/gallium/drivers/nv30/nvfx_fragprog.c b/src/gallium/drivers/nv30/nvfx_fragprog.c index e562b454f92..592ad21c6c8 100644 --- a/src/gallium/drivers/nv30/nvfx_fragprog.c +++ b/src/gallium/drivers/nv30/nvfx_fragprog.c @@ -535,6 +535,11 @@ nvfx_fragprog_parse_instruction(struct nv30_context* nvfx, struct nvfx_fpc *fpc, case TGSI_OPCODE_ADD: nvfx_fp_emit(fpc, arith(sat, ADD, dst, mask, src[0], src[1], none)); break; + case TGSI_OPCODE_CEIL: + tmp = nvfx_src(temp(fpc)); + nvfx_fp_emit(fpc, arith(0, FLR, tmp.reg, mask, neg(src[0]), none, none)); + nvfx_fp_emit(fpc, arith(sat, MOV, dst, mask, neg(tmp), none, none)); + break; case TGSI_OPCODE_CMP: insn = arith(0, MOV, none.reg, mask, src[0], none, none); insn.cc_update = 1; diff --git a/src/gallium/drivers/nv30/nvfx_vertprog.c b/src/gallium/drivers/nv30/nvfx_vertprog.c index d7eb9fb0a63..82972b3943c 100644 --- a/src/gallium/drivers/nv30/nvfx_vertprog.c +++ b/src/gallium/drivers/nv30/nvfx_vertprog.c @@ -550,6 +550,11 @@ nvfx_vertprog_parse_instruction(struct nv30_context *nv30, struct nvfx_vpc *vpc, case TGSI_OPCODE_ARL: nvfx_vp_emit(vpc, arith(0, VEC, ARL, dst, mask, src[0], none, none)); break; + case TGSI_OPCODE_CEIL: + tmp = nvfx_src(temp(vpc)); + nvfx_vp_emit(vpc, arith(0, VEC, FLR, tmp.reg, mask, neg(src[0]), none, none)); + nvfx_vp_emit(vpc, arith(sat, VEC, MOV, dst, mask, neg(tmp), none, none)); + break; case TGSI_OPCODE_CMP: insn = arith(0, VEC, MOV, none.reg, mask, src[0], none, none); insn.cc_update = 1; diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp index f7dac25c116..f713e6391c6 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.cpp @@ -278,6 +278,31 @@ BuildUtil::mkSelect(Value *pred, Value *dst, Value *trSrc, Value *flSrc) return mkOp2(OP_UNION, typeOfSize(dst->reg.size), dst, def0, def1); } +Instruction * +BuildUtil::mkSplit(Value *h[2], uint8_t halfSize, Value *val) +{ + Instruction *insn = NULL; + + const DataType fTy = typeOfSize(halfSize * 2); + + if (val->reg.file == FILE_IMMEDIATE) + val = mkMov(getSSA(halfSize * 2), val, fTy)->getDef(0); + + if (isMemoryFile(val->reg.file)) { + h[0] = cloneShallow(getFunction(), val); + h[1] = cloneShallow(getFunction(), val); + h[0]->reg.size = halfSize; + h[1]->reg.size = halfSize; + h[1]->reg.data.offset += halfSize; + } else { + h[0] = getSSA(halfSize, val->reg.file); + h[1] = getSSA(halfSize, val->reg.file); + insn = mkOp1(OP_SPLIT, fTy, h[0], val); + insn->setDef(1, h[1]); + } + return insn; +} + FlowInstruction * BuildUtil::mkFlow(operation op, void *targ, CondCode cc, Value *pred) { diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h b/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h index 9ee04dbcd12..dd7e491cb5c 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_build_util.h @@ -81,6 +81,8 @@ public: Instruction *mkSelect(Value *pred, Value *dst, Value *trSrc, Value *flSrc); + Instruction *mkSplit(Value *half[2], uint8_t halfSize, Value *); + void mkClobber(DataFile file, uint32_t regMask, int regUnitLog2); ImmediateValue *mkImm(float); diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp index 82e23602ca0..16f191da159 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_from_tgsi.cpp @@ -347,6 +347,7 @@ static nv50_ir::TexTarget translateTexture(uint tex) NV50_IR_TEX_TARG_CASE(SHADOW2D_ARRAY, 2D_ARRAY_SHADOW); NV50_IR_TEX_TARG_CASE(SHADOWCUBE, CUBE_SHADOW); NV50_IR_TEX_TARG_CASE(SHADOWRECT, RECT_SHADOW); + NV50_IR_TEX_TARG_CASE(BUFFER, BUFFER); case TGSI_TEXTURE_UNKNOWN: default: @@ -548,7 +549,7 @@ static nv50_ir::operation translateOpcode(uint opcode) NV50_IR_OPCODE_CASE(SAMPLE_D, TXD); NV50_IR_OPCODE_CASE(SAMPLE_L, TXL); NV50_IR_OPCODE_CASE(GATHER4, TXG); - NV50_IR_OPCODE_CASE(RESINFO, TXQ); + NV50_IR_OPCODE_CASE(SVIEWINFO, TXQ); NV50_IR_OPCODE_CASE(END, EXIT); @@ -597,8 +598,8 @@ public: int clipVertexOutput; - uint8_t *resourceTargets; // TGSI_TEXTURE_* - unsigned resourceCount; + uint8_t *samplerViewTargets; // TGSI_TEXTURE_* + unsigned samplerViewCount; private: int inferSysValDirection(unsigned sn) const; @@ -617,7 +618,7 @@ Source::Source(struct nv50_ir_prog_info *prog) : info(prog) if (prog->dbgFlags & NV50_IR_DEBUG_BASIC) tgsi_dump(tokens, 0); - resourceTargets = NULL; + samplerViewTargets = NULL; mainTempsInLMem = FALSE; } @@ -632,8 +633,8 @@ Source::~Source() if (info->immd.type) FREE(info->immd.type); - if (resourceTargets) - delete[] resourceTargets; + if (samplerViewTargets) + delete[] samplerViewTargets; } bool Source::scanSource() @@ -650,8 +651,8 @@ bool Source::scanSource() clipVertexOutput = -1; - resourceCount = scan.file_max[TGSI_FILE_RESOURCE] + 1; - resourceTargets = new uint8_t[resourceCount]; + samplerViewCount = scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1; + samplerViewTargets = new uint8_t[samplerViewCount]; info->immd.bufSize = 0; tempArrayCount = 0; @@ -805,7 +806,7 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl) info->in[i].si = si; if (info->type == PIPE_SHADER_FRAGMENT) { // translate interpolation mode - switch (decl->Declaration.Interpolate) { + switch (decl->Interp.Interpolate) { case TGSI_INTERPOLATE_CONSTANT: info->in[i].flat = 1; break; @@ -818,7 +819,7 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl) default: break; } - if (decl->Declaration.Centroid) + if (decl->Interp.Centroid) info->in[i].centroid = 1; } } @@ -874,9 +875,9 @@ bool Source::scanDeclaration(const struct tgsi_full_declaration *decl) info->sv[i].input = inferSysValDirection(sn); } break; - case TGSI_FILE_RESOURCE: + case TGSI_FILE_SAMPLER_VIEW: for (i = first; i <= last; ++i) - resourceTargets[i] = decl->Resource.Resource; + samplerViewTargets[i] = decl->SamplerView.Resource; break; case TGSI_FILE_IMMEDIATE_ARRAY: { @@ -1000,13 +1001,15 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst) nv50_ir::TexInstruction::Target Instruction::getTexture(const tgsi::Source *code, int s) const { - if (insn->Instruction.Texture) { - return translateTexture(insn->Texture.Texture); - } else { + switch (getSrc(s).getFile()) { + case TGSI_FILE_SAMPLER_VIEW: { // XXX: indirect access unsigned int r = getSrc(s).getIndex(0); - assert(r < code->resourceCount); - return translateTexture(code->resourceTargets[r]); + assert(r < code->samplerViewCount); + return translateTexture(code->samplerViewTargets[r]); + } + default: + return translateTexture(insn->Texture.Texture); } } @@ -2042,7 +2045,7 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) handleTXF(dst0, 1); break; case TGSI_OPCODE_TXQ: - case TGSI_OPCODE_RESINFO: + case TGSI_OPCODE_SVIEWINFO: handleTXQ(dst0, TXQ_DIMS); break; case TGSI_OPCODE_F2I: diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp index 27373b4cc47..16bba0e1723 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_lowering_nv50.cpp @@ -57,15 +57,17 @@ expandIntegerMUL(BuildUtil *bld, Instruction *mul) Instruction *i[9]; - Value *a[2] = { bld->getSSA(halfSize), bld->getSSA(halfSize) }; - Value *b[2] = { bld->getSSA(halfSize), bld->getSSA(halfSize) }; + bld->setPosition(mul, true); + + Value *a[2], *b[2]; Value *c[2]; Value *t[4]; for (int j = 0; j < 4; ++j) t[j] = bld->getSSA(fullSize); - (i[0] = bld->mkOp1(OP_SPLIT, fTy, a[0], mul->getSrc(0)))->setDef(1, a[1]); - (i[1] = bld->mkOp1(OP_SPLIT, fTy, b[0], mul->getSrc(1)))->setDef(1, b[1]); + // split sources into halves + i[0] = bld->mkSplit(a, halfSize, mul->getSrc(0)); + i[1] = bld->mkSplit(b, halfSize, mul->getSrc(1)); i[2] = bld->mkOp2(OP_MUL, fTy, t[0], a[0], b[1]); i[3] = bld->mkOp3(OP_MAD, fTy, t[1], a[1], b[0], t[0]); @@ -96,7 +98,8 @@ expandIntegerMUL(BuildUtil *bld, Instruction *mul) delete_Instruction(bld->getProgram(), mul); for (int j = 2; j <= (highResult ? 5 : 4); ++j) - i[j]->sType = hTy; + if (i[j]) + i[j]->sType = hTy; return true; } @@ -518,7 +521,6 @@ private: bool handleEXPORT(Instruction *); - bool handleMUL(Instruction *); bool handleDIV(Instruction *); bool handleSQRT(Instruction *); bool handlePOW(Instruction *); @@ -587,7 +589,8 @@ NV50LoweringPreSSA::handleTEX(TexInstruction *i) if (i->tex.target.isArray()) { Value *layer = i->getSrc(arg - 1); LValue *src = new_LValue(func, FILE_GPR); - bld.mkCvt(OP_CVT, TYPE_U16, src, TYPE_F32, layer); + bld.mkCvt(OP_CVT, TYPE_U32, src, TYPE_F32, layer); + bld.mkOp2(OP_MIN, TYPE_U32, src, src, bld.loadImm(NULL, 511)); i->setSrc(arg - 1, src); if (i->tex.target.isCube()) { @@ -941,14 +944,6 @@ NV50LoweringPreSSA::handleRDSV(Instruction *i) } bool -NV50LoweringPreSSA::handleMUL(Instruction *i) -{ - if (!isFloatType(i->dType) && typeSizeof(i->sType) > 2) - return expandIntegerMUL(&bld, i); - return true; -} - -bool NV50LoweringPreSSA::handleDIV(Instruction *i) { if (!isFloatType(i->dType)) @@ -1068,8 +1063,6 @@ NV50LoweringPreSSA::visit(Instruction *i) return handleSELP(i); case OP_POW: return handlePOW(i); - case OP_MUL: - return handleMUL(i); case OP_DIV: return handleDIV(i); case OP_SQRT: diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp index db5195cd582..10382d9cac6 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_peephole.cpp @@ -564,7 +564,7 @@ ConstantFolding::tryCollapseChainedMULs(Instruction *mul2, insn = mul2->getSrc(t)->getInsn(); if (!mul2->src(t).mod && insn->op == OP_MUL && insn->dType == TYPE_F32) mul1 = insn; - if (mul1) { + if (mul1 && !mul1->saturate) { int s1; if (mul1->src(s1 = 0).getImmediate(imm1) || @@ -584,10 +584,11 @@ ConstantFolding::tryCollapseChainedMULs(Instruction *mul2, if (f < 0) mul1->src(0).mod *= Modifier(NV50_IR_MOD_NEG); } + mul1->saturate = mul2->saturate; return; } } - if (mul2->getDef(0)->refCount() == 1) { + if (mul2->getDef(0)->refCount() == 1 && !mul2->saturate) { // b = mul a, imm // d = mul b, c -> d = mul_x_imm a, c int s2, t2; diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_ra.cpp index 77edaa6067a..726331e91e7 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_ra.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_ra.cpp @@ -1819,8 +1819,8 @@ RegAlloc::InsertConstraintsPass::texConstraintNVE0(TexInstruction *tex) int n = tex->srcCount(0xff, true); if (n > 4) { condenseSrcs(tex, 0, 3); - if (n > 5) - condenseSrcs(tex, 4, n - 1); + if (n > 5) // NOTE: first call modified positions already + condenseSrcs(tex, 4 - (4 - 1), n - 1 - (4 - 1)); } else if (n > 1) { condenseSrcs(tex, 0, n - 1); @@ -1850,8 +1850,8 @@ RegAlloc::InsertConstraintsPass::texConstraintNVC0(TexInstruction *tex) if (s > 1) condenseSrcs(tex, 0, s - 1); - if (n > 1) - condenseSrcs(tex, s, s + (n - 1)); + if (n > 1) // NOTE: first call modified positions already + condenseSrcs(tex, 1, n); condenseDefs(tex); } diff --git a/src/gallium/drivers/nv50/codegen/nv50_ir_target_nv50.cpp b/src/gallium/drivers/nv50/codegen/nv50_ir_target_nv50.cpp index 5e541e514cb..8b11c6a2fdd 100644 --- a/src/gallium/drivers/nv50/codegen/nv50_ir_target_nv50.cpp +++ b/src/gallium/drivers/nv50/codegen/nv50_ir_target_nv50.cpp @@ -310,7 +310,22 @@ TargetNV50::insnCanLoad(const Instruction *i, int s, return false; } - if (ld->getSrc(0)->reg.data.offset > (int32_t)(127 * typeSizeof(ld->dType))) + uint8_t ldSize; + + if ((i->op == OP_MUL || i->op == OP_MAD) && !isFloatType(i->dType)) { + // 32-bit MUL will be split into 16-bit MULs + if (ld->src(0).isIndirect(0)) + return false; + if (sf == FILE_IMMEDIATE) + return false; + ldSize = 2; + } else { + ldSize = typeSizeof(ld->dType); + } + + if (ldSize < 4 && sf == FILE_SHADER_INPUT) // no < 4-byte aligned a[] access + return false; + if (ld->getSrc(0)->reg.data.offset > (int32_t)(127 * ldSize)) return false; if (ld->src(0).isIndirect(0)) { diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index 1cee0e06c02..44a0ba0f561 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -48,6 +48,7 @@ #define NV50_NEW_CONSTBUF (1 << 18) #define NV50_NEW_TEXTURES (1 << 19) #define NV50_NEW_SAMPLERS (1 << 20) +#define NV50_NEW_STRMOUT (1 << 21) #define NV50_NEW_CONTEXT (1 << 31) #define NV50_BIND_FB 0 @@ -56,9 +57,10 @@ #define NV50_BIND_INDEX 3 #define NV50_BIND_TEXTURES 4 #define NV50_BIND_CB(s, i) (5 + 16 * (s) + (i)) -#define NV50_BIND_SCREEN 53 -#define NV50_BIND_TLS 54 -#define NV50_BIND_COUNT 55 +#define NV50_BIND_SO 53 +#define NV50_BIND_SCREEN 54 +#define NV50_BIND_TLS 55 +#define NV50_BIND_COUNT 56 #define NV50_BIND_2D 0 #define NV50_BIND_M2MF 0 #define NV50_BIND_FENCE 1 @@ -92,11 +94,13 @@ struct nv50_context { boolean point_sprite; boolean rt_serialize; boolean flushed; + boolean rasterizer_discard; uint8_t tls_required; uint8_t num_vtxbufs; uint8_t num_vtxelts; uint8_t num_textures[3]; uint8_t num_samplers[3]; + uint8_t prim_size; uint16_t scissor; } state; @@ -126,6 +130,10 @@ struct nv50_context { struct nv50_tsc_entry *samplers[3][PIPE_MAX_SAMPLERS]; unsigned num_samplers[3]; + uint8_t num_so_targets; + uint8_t so_targets_dirty; + struct pipe_stream_output_target *so_target[4]; + struct pipe_framebuffer_state framebuffer; struct pipe_blend_color blend_colour; struct pipe_stencil_ref stencil_ref; @@ -168,6 +176,14 @@ extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *); /* nv50_query.c */ void nv50_init_query_functions(struct nv50_context *); +void nv50_query_pushbuf_submit(struct nouveau_pushbuf *, + struct pipe_query *, unsigned result_offset); +void nv84_query_fifo_wait(struct nouveau_pushbuf *, struct pipe_query *); +void nva0_so_target_save_offset(struct pipe_context *, + struct pipe_stream_output_target *, + unsigned index, boolean seralize); + +#define NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET (PIPE_QUERY_TYPES + 0) /* nv50_shader_state.c */ void nv50_vertprog_validate(struct nv50_context *); @@ -177,6 +193,7 @@ void nv50_fp_linkage_validate(struct nv50_context *); void nv50_gp_linkage_validate(struct nv50_context *); void nv50_constbufs_validate(struct nv50_context *); void nv50_validate_derived_rs(struct nv50_context *); +void nv50_stream_output_validate(struct nv50_context *); /* nv50_state.c */ extern void nv50_init_state_functions(struct nv50_context *); diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 1b2e2934b79..ca40ac2dd43 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -68,6 +68,17 @@ nv50_vertprog_assign_slots(struct nv50_ir_prog_info *info) break; } } + + /* + * Corner case: VP has no inputs, but we will still need to submit data to + * draw it. HW will shout at us and won't draw anything if we don't enable + * any input, so let's just pretend it's the first one. + */ + if (prog->vp.attrs[0] == 0 && + prog->vp.attrs[1] == 0 && + prog->vp.attrs[2] == 0) + prog->vp.attrs[0] |= 0xf; + /* VertexID before InstanceID */ if (info->io.vertexId < info->numSysVals) info->sv[info->io.vertexId].slot[0] = n++; @@ -235,6 +246,59 @@ nv50_program_assign_varying_slots(struct nv50_ir_prog_info *info) } } +static struct nv50_stream_output_state * +nv50_program_create_strmout_state(const struct nv50_ir_prog_info *info, + const struct pipe_stream_output_info *pso) +{ + struct nv50_stream_output_state *so; + unsigned b, i, c; + unsigned base[4]; + + so = MALLOC_STRUCT(nv50_stream_output_state); + if (!so) + return NULL; + memset(so->map, 0xff, sizeof(so->map)); + + for (b = 0; b < 4; ++b) + so->num_attribs[b] = 0; + for (i = 0; i < pso->num_outputs; ++i) { + unsigned end = pso->output[i].dst_offset + pso->output[i].num_components; + b = pso->output[i].output_buffer; + assert(b < 4); + so->num_attribs[b] = MAX2(so->num_attribs[b], end); + } + + so->ctrl = NV50_3D_STRMOUT_BUFFERS_CTRL_INTERLEAVED; + + so->stride[0] = pso->stride[0] * 4; + base[0] = 0; + for (b = 1; b < 4; ++b) { + assert(!so->num_attribs[b] || so->num_attribs[b] == pso->stride[b]); + so->stride[b] = so->num_attribs[b] * 4; + if (so->num_attribs[b]) + so->ctrl = (b + 1) << NV50_3D_STRMOUT_BUFFERS_CTRL_SEPARATE__SHIFT; + base[b] = align(base[b - 1] + so->num_attribs[b - 1], 4); + } + if (so->ctrl & NV50_3D_STRMOUT_BUFFERS_CTRL_INTERLEAVED) { + assert(so->stride[0] < NV50_3D_STRMOUT_BUFFERS_CTRL_STRIDE__MAX); + so->ctrl |= so->stride[0] << NV50_3D_STRMOUT_BUFFERS_CTRL_STRIDE__SHIFT; + } + + so->map_size = base[3] + so->num_attribs[3]; + + for (i = 0; i < pso->num_outputs; ++i) { + const unsigned s = pso->output[i].start_component; + const unsigned p = pso->output[i].dst_offset; + const unsigned r = pso->output[i].register_index; + b = pso->output[i].output_buffer; + + for (c = 0; c < pso->output[i].num_components; ++c) + so->map[base[b] + p + c] = info->out[r].slot[s + c]; + } + + return so; +} + boolean nv50_program_translate(struct nv50_program *prog, uint16_t chipset) { @@ -293,6 +357,10 @@ nv50_program_translate(struct nv50_program *prog, uint16_t chipset) prog->fp.flags[0] |= NV50_3D_FP_CONTROL_USES_KIL; } + if (prog->pipe.stream_output.num_outputs) + prog->so = nv50_program_create_strmout_state(info, + &prog->pipe.stream_output); + out: FREE(info); return !ret; diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h index 92361ad9946..f56268b5439 100644 --- a/src/gallium/drivers/nv50/nv50_program.h +++ b/src/gallium/drivers/nv50/nv50_program.h @@ -42,6 +42,15 @@ struct nv50_varying { ubyte si; /* semantic index */ }; +struct nv50_stream_output_state +{ + uint32_t ctrl; + uint16_t stride[4]; + uint8_t num_attribs[4]; + uint8_t map_size; + uint8_t map[128]; +}; + struct nv50_program { struct pipe_shader_state pipe; @@ -88,6 +97,8 @@ struct nv50_program { void *fixups; /* relocation records */ struct nouveau_heap *mem; + + struct nv50_stream_output_state *so; }; boolean nv50_program_translate(struct nv50_program *, uint16_t chipset); diff --git a/src/gallium/drivers/nv50/nv50_push.c b/src/gallium/drivers/nv50/nv50_push.c index 04e32b7e8b9..3abe189e7b5 100644 --- a/src/gallium/drivers/nv50/nv50_push.c +++ b/src/gallium/drivers/nv50/nv50_push.c @@ -210,7 +210,8 @@ nv50_push_vbo(struct nv50_context *nv50, const struct pipe_draw_info *info) { struct push_context ctx; unsigned i, index_size; - unsigned inst = info->instance_count; + unsigned inst_count = info->instance_count; + unsigned vert_count = info->count; boolean apply_bias = info->indexed && info->index_bias; ctx.push = nv50->base.pushbuf; @@ -242,6 +243,17 @@ nv50_push_vbo(struct nv50_context *nv50, const struct pipe_draw_info *info) ctx.primitive_restart = info->primitive_restart; ctx.restart_index = info->restart_index; } else { + if (unlikely(info->count_from_stream_output)) { + struct pipe_context *pipe = &nv50->base.pipe; + struct nv50_so_target *targ; + targ = nv50_so_target(info->count_from_stream_output); + if (!targ->pq) { + NOUVEAU_ERR("draw_stream_output not supported on pre-NVA0 cards\n"); + return; + } + pipe->get_query_result(pipe, targ->pq, TRUE, (void *)&vert_count); + vert_count /= targ->stride; + } ctx.idxbuf = NULL; index_size = 0; ctx.primitive_restart = FALSE; @@ -262,21 +274,21 @@ nv50_push_vbo(struct nv50_context *nv50, const struct pipe_draw_info *info) } nv50->state.prim_restart = info->primitive_restart; - while (inst--) { + while (inst_count--) { BEGIN_NV04(ctx.push, NV50_3D(VERTEX_BEGIN_GL), 1); PUSH_DATA (ctx.push, ctx.prim); switch (index_size) { case 0: - emit_vertices_seq(&ctx, info->start, info->count); + emit_vertices_seq(&ctx, info->start, vert_count); break; case 1: - emit_vertices_i08(&ctx, info->start, info->count); + emit_vertices_i08(&ctx, info->start, vert_count); break; case 2: - emit_vertices_i16(&ctx, info->start, info->count); + emit_vertices_i16(&ctx, info->start, vert_count); break; case 4: - emit_vertices_i32(&ctx, info->start, info->count); + emit_vertices_i32(&ctx, info->start, vert_count); break; default: assert(0); diff --git a/src/gallium/drivers/nv50/nv50_query.c b/src/gallium/drivers/nv50/nv50_query.c index 5275e74964a..8e62c5f11bc 100644 --- a/src/gallium/drivers/nv50/nv50_query.c +++ b/src/gallium/drivers/nv50/nv50_query.c @@ -36,7 +36,8 @@ struct nv50_query { uint32_t *data; - uint32_t type; + uint16_t type; + uint16_t index; uint32_t sequence; struct nouveau_bo *bo; uint32_t base; @@ -170,21 +171,15 @@ nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq) BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1); PUSH_DATA (push, 1); break; - case PIPE_QUERY_PRIMITIVES_GENERATED: /* store before & after instead ? */ - PUSH_SPACE(push, 2); - BEGIN_NV04(push, NV50_3D(COUNTER_RESET), 1); - PUSH_DATA (push, NV50_3D_COUNTER_RESET_GENERATED_PRIMITIVES); + case PIPE_QUERY_PRIMITIVES_GENERATED: + nv50_query_get(push, q, 0x10, 0x06805002); break; case PIPE_QUERY_PRIMITIVES_EMITTED: - PUSH_SPACE(push, 2); - BEGIN_NV04(push, NV50_3D(COUNTER_RESET), 1); - PUSH_DATA (push, NV50_3D_COUNTER_RESET_TRANSFORM_FEEDBACK); + nv50_query_get(push, q, 0x10, 0x05805002); break; case PIPE_QUERY_SO_STATISTICS: - PUSH_SPACE(push, 3); - BEGIN_NI04(push, NV50_3D(COUNTER_RESET), 2); - PUSH_DATA (push, NV50_3D_COUNTER_RESET_TRANSFORM_FEEDBACK); - PUSH_DATA (push, NV50_3D_COUNTER_RESET_GENERATED_PRIMITIVES); + nv50_query_get(push, q, 0x20, 0x05805002); + nv50_query_get(push, q, 0x30, 0x06805002); break; case PIPE_QUERY_TIMESTAMP_DISJOINT: case PIPE_QUERY_TIME_ELAPSED: @@ -227,6 +222,9 @@ nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq) case PIPE_QUERY_GPU_FINISHED: nv50_query_get(push, q, 0, 0x1000f010); break; + case NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET: + nv50_query_get(push, q, 0, 0x0d005002 | (q->index << 5)); + break; default: assert(0); break; @@ -247,6 +245,7 @@ nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq, struct nv50_context *nv50 = nv50_context(pipe); struct nv50_query *q = nv50_query(pq); uint64_t *res64 = (uint64_t *)result; + uint32_t *res32 = (uint32_t *)result; boolean *res8 = (boolean *)result; uint64_t *data64 = (uint64_t *)q->data; @@ -275,11 +274,11 @@ nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq, break; case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */ case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */ - res64[0] = data64[0]; + res64[0] = data64[0] - data64[2]; break; case PIPE_QUERY_SO_STATISTICS: - res64[0] = data64[0]; - res64[1] = data64[1]; + res64[0] = data64[0] - data64[4]; + res64[1] = data64[2] - data64[6]; break; case PIPE_QUERY_TIMESTAMP_DISJOINT: /* u32 sequence, u32 0, u64 time */ res64[0] = 1000000000; @@ -288,6 +287,9 @@ nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq, case PIPE_QUERY_TIME_ELAPSED: res64[0] = data64[1] - data64[3]; break; + case NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET: + res32[0] = q->data[1]; + break; default: return FALSE; } @@ -295,6 +297,21 @@ nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq, return TRUE; } +void +nv84_query_fifo_wait(struct nouveau_pushbuf *push, struct pipe_query *pq) +{ + struct nv50_query *q = nv50_query(pq); + unsigned offset = q->offset; + + PUSH_SPACE(push, 5); + PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD); + BEGIN_NV04(push, SUBC_3D(NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH), 4); + PUSH_DATAh(push, q->bo->offset + offset); + PUSH_DATA (push, q->bo->offset + offset); + PUSH_DATA (push, q->sequence); + PUSH_DATA (push, NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL); +} + static void nv50_render_condition(struct pipe_context *pipe, struct pipe_query *pq, uint mode) @@ -325,6 +342,38 @@ nv50_render_condition(struct pipe_context *pipe, } void +nv50_query_pushbuf_submit(struct nouveau_pushbuf *push, + struct pipe_query *pq, unsigned result_offset) +{ + struct nv50_query *q = nv50_query(pq); + + /* XXX: does this exist ? */ +#define NV50_IB_ENTRY_1_NO_PREFETCH (0 << (31 - 8)) + + nouveau_pushbuf_space(push, 0, 0, 1); + nouveau_pushbuf_data(push, q->bo, q->offset + result_offset, 4 | + NV50_IB_ENTRY_1_NO_PREFETCH); +} + +void +nva0_so_target_save_offset(struct pipe_context *pipe, + struct pipe_stream_output_target *ptarg, + unsigned index, boolean serialize) +{ + struct nv50_so_target *targ = nv50_so_target(ptarg); + + if (serialize) { + struct nouveau_pushbuf *push = nv50_context(pipe)->base.pushbuf; + PUSH_SPACE(push, 2); + BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1); + PUSH_DATA (push, 0); + } + + nv50_query(targ->pq)->index = index; + nv50_query_end(pipe, targ->pq); +} + +void nv50_init_query_functions(struct nv50_context *nv50) { struct pipe_context *pipe = &nv50->base.pipe; diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index a6dfbedf299..c96e028b2a2 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -73,6 +73,8 @@ nv50_screen_is_format_supported(struct pipe_screen *pscreen, static int nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) { + const uint16_t class_3d = nouveau_screen(pscreen)->class_3d; + switch (param) { case PIPE_CAP_MAX_COMBINED_SAMPLERS: return 64; @@ -82,8 +84,8 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return 12; case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: return 14; - case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS: /* shader support missing */ - return 0; + case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS: + return 512; case PIPE_CAP_MIN_TEXEL_OFFSET: return -8; case PIPE_CAP_MAX_TEXEL_OFFSET: @@ -95,7 +97,6 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_ANISOTROPIC_FILTER: case PIPE_CAP_SCALED_RESOLVE: return 1; - case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME: case PIPE_CAP_SEAMLESS_CUBE_MAP: return nv50_screen(pscreen)->tesla->oclass >= NVA0_3D_CLASS; case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: @@ -121,11 +122,12 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_OCCLUSION_QUERY: return 1; case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS: - return 0; + return 4; case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS: - return 128; case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS: - return 32; + return 64; + case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME: + return (class_3d >= NVA0_3D_CLASS) ? 1 : 0; case PIPE_CAP_BLEND_EQUATION_SEPARATE: case PIPE_CAP_INDEP_BLEND_ENABLE: return 1; diff --git a/src/gallium/drivers/nv50/nv50_shader_state.c b/src/gallium/drivers/nv50/nv50_shader_state.c index aef3f129c81..d070f07bbbc 100644 --- a/src/gallium/drivers/nv50/nv50_shader_state.c +++ b/src/gallium/drivers/nv50/nv50_shader_state.c @@ -207,6 +207,8 @@ nv50_gmtyprog_validate(struct nv50_context *nv50) PUSH_DATA (push, gp->gp.vert_count); BEGIN_NV04(push, NV50_3D(GP_START_ID), 1); PUSH_DATA (push, gp->code_base); + + nv50->state.prim_size = gp->gp.prim_type; /* enum matches vertex count */ } nv50_program_update_context_state(nv50, gp, 2); @@ -278,6 +280,12 @@ nv50_validate_derived_rs(struct nv50_context *nv50) nv50_sprite_coords_validate(nv50); + if (nv50->state.rasterizer_discard != nv50->rast->pipe.rasterizer_discard) { + nv50->state.rasterizer_discard = nv50->rast->pipe.rasterizer_discard; + BEGIN_NV04(push, NV50_3D(RASTERIZE_ENABLE), 1); + PUSH_DATA (push, !nv50->rast->pipe.rasterizer_discard); + } + if (nv50->dirty & NV50_NEW_FRAGPROG) return; psize = nv50->state.semantic_psize & ~NV50_3D_SEMANTIC_PTSZ_PTSZ_EN__MASK; @@ -343,6 +351,7 @@ nv50_fp_linkage_validate(struct nv50_context *nv50) uint32_t colors = fp->fp.colors; uint32_t lin[4]; uint8_t map[64]; + uint8_t so_map[64]; if (!(nv50->dirty & (NV50_NEW_VERTPROG | NV50_NEW_FRAGPROG | @@ -411,6 +420,30 @@ nv50_fp_linkage_validate(struct nv50_context *nv50) if (nv50->rast->pipe.clamp_vertex_color) colors |= NV50_3D_SEMANTIC_COLOR_CLMP_EN; + if (unlikely(vp->so)) { + /* Slot i in STRMOUT_MAP specifies the offset where slot i in RESULT_MAP + * gets written. + * + * TODO: + * Inverting vp->so->map (output -> offset) would probably speed this up. + */ + memset(so_map, 0, sizeof(so_map)); + for (i = 0; i < vp->so->map_size; ++i) { + if (vp->so->map[i] == 0xff) + continue; + for (c = 0; c < m; ++c) + if (map[c] == vp->so->map[i] && !so_map[c]) + break; + if (c == m) { + c = m; + map[m++] = vp->so->map[i]; + } + so_map[c] = 0x80 | i; + } + for (c = m; c & 3; ++c) + so_map[c] = 0; + } + n = (m + 3) / 4; assert(m <= 64); @@ -451,6 +484,11 @@ nv50_fp_linkage_validate(struct nv50_context *nv50) BEGIN_NV04(push, NV50_3D(GP_ENABLE), 1); PUSH_DATA (push, nv50->gmtyprog ? 1 : 0); + + if (vp->so) { + BEGIN_NV04(push, NV50_3D(STRMOUT_MAP(0)), n); + PUSH_DATAp(push, so_map, n); + } } static int @@ -509,3 +547,75 @@ nv50_gp_linkage_validate(struct nv50_context *nv50) BEGIN_NV04(push, NV50_3D(VP_RESULT_MAP(0)), n); PUSH_DATAp(push, map, n); } + +void +nv50_stream_output_validate(struct nv50_context *nv50) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct nv50_stream_output_state *so; + uint32_t ctrl; + unsigned i; + unsigned prims = ~0; + + so = nv50->gmtyprog ? nv50->gmtyprog->so : nv50->vertprog->so; + + if (!so || !nv50->num_so_targets) { + BEGIN_NV04(push, NV50_3D(STRMOUT_ENABLE), 1); + PUSH_DATA (push, 0); + if (nv50->screen->base.class_3d < NVA0_3D_CLASS) { + BEGIN_NV04(push, NV50_3D(STRMOUT_PRIMITIVE_LIMIT), 1); + PUSH_DATA (push, 0); + } + BEGIN_NV04(push, NV50_3D(STRMOUT_PARAMS_LATCH), 1); + PUSH_DATA (push, 1); + return; + } + + ctrl = so->ctrl; + if (nv50->screen->base.class_3d >= NVA0_3D_CLASS) + ctrl |= NVA0_3D_STRMOUT_BUFFERS_CTRL_LIMIT_MODE_OFFSET; + + BEGIN_NV04(push, NV50_3D(STRMOUT_BUFFERS_CTRL), 1); + PUSH_DATA (push, ctrl); + + nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_SO); + + for (i = 0; i < nv50->num_so_targets; ++i) { + struct nv50_so_target *targ = nv50_so_target(nv50->so_target[i]); + struct nv04_resource *buf = nv04_resource(targ->pipe.buffer); + + const unsigned n = nv50->screen->base.class_3d >= NVA0_3D_CLASS ? 4 : 3; + + if (n == 4 && !targ->clean) + nv84_query_fifo_wait(push, targ->pq); + BEGIN_NV04(push, NV50_3D(STRMOUT_ADDRESS_HIGH(i)), n); + PUSH_DATAh(push, buf->address + targ->pipe.buffer_offset); + PUSH_DATA (push, buf->address + targ->pipe.buffer_offset); + PUSH_DATA (push, so->num_attribs[i]); + if (n == 4) { + PUSH_DATA(push, targ->pipe.buffer_size); + + BEGIN_NV04(push, NVA0_3D(STRMOUT_OFFSET(i)), 1); + if (!targ->clean) { + assert(targ->pq); + nv50_query_pushbuf_submit(push, targ->pq, 0x4); + } else { + PUSH_DATA(push, 0); + targ->clean = FALSE; + } + } else { + const unsigned limit = targ->pipe.buffer_size / + (so->stride[i] * nv50->state.prim_size); + prims = MIN2(prims, limit); + } + BCTX_REFN(nv50->bufctx_3d, SO, buf, WR); + } + if (prims != ~0) { + BEGIN_NV04(push, NV50_3D(STRMOUT_PRIMITIVE_LIMIT), 1); + PUSH_DATA (push, prims); + } + BEGIN_NV04(push, NV50_3D(STRMOUT_PARAMS_LATCH), 1); + PUSH_DATA (push, 1); + BEGIN_NV04(push, NV50_3D(STRMOUT_ENABLE), 1); + PUSH_DATA (push, 1); +} diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c index a17540a1492..7f840e2b42e 100644 --- a/src/gallium/drivers/nv50/nv50_state.c +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -680,6 +680,9 @@ nv50_sp_state_create(struct pipe_context *pipe, prog->type = type; prog->pipe.tokens = tgsi_dup_tokens(cso->tokens); + if (cso->stream_output.num_outputs) + prog->pipe.stream_output = cso->stream_output; + return (void *)prog; } @@ -920,6 +923,90 @@ nv50_vertex_state_bind(struct pipe_context *pipe, void *hwcso) nv50->dirty |= NV50_NEW_VERTEX; } +static struct pipe_stream_output_target * +nv50_so_target_create(struct pipe_context *pipe, + struct pipe_resource *res, + unsigned offset, unsigned size) +{ + struct nv50_so_target *targ = MALLOC_STRUCT(nv50_so_target); + if (!targ) + return NULL; + + if (nouveau_context(pipe)->screen->class_3d >= NVA0_3D_CLASS) { + targ->pq = pipe->create_query(pipe, + NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET); + if (!targ->pq) { + FREE(targ); + return NULL; + } + } else { + targ->pq = NULL; + } + targ->clean = TRUE; + + targ->pipe.buffer_size = size; + targ->pipe.buffer_offset = offset; + targ->pipe.context = pipe; + targ->pipe.buffer = NULL; + pipe_resource_reference(&targ->pipe.buffer, res); + pipe_reference_init(&targ->pipe.reference, 1); + + return &targ->pipe; +} + +static void +nv50_so_target_destroy(struct pipe_context *pipe, + struct pipe_stream_output_target *ptarg) +{ + struct nv50_so_target *targ = nv50_so_target(ptarg); + if (targ->pq) + pipe->destroy_query(pipe, targ->pq); + FREE(targ); +} + +static void +nv50_set_stream_output_targets(struct pipe_context *pipe, + unsigned num_targets, + struct pipe_stream_output_target **targets, + unsigned append_mask) +{ + struct nv50_context *nv50 = nv50_context(pipe); + unsigned i; + boolean serialize = TRUE; + const boolean can_resume = nv50->screen->base.class_3d >= NVA0_3D_CLASS; + + assert(num_targets <= 4); + + for (i = 0; i < num_targets; ++i) { + const boolean changed = nv50->so_target[i] != targets[i]; + if (!changed && (append_mask & (1 << i))) + continue; + nv50->so_targets_dirty |= 1 << i; + + if (can_resume && changed && nv50->so_target[i]) { + nva0_so_target_save_offset(pipe, nv50->so_target[i], i, serialize); + serialize = FALSE; + } + + if (targets[i] && !(append_mask & (1 << i))) + nv50_so_target(targets[i])->clean = TRUE; + + pipe_so_target_reference(&nv50->so_target[i], targets[i]); + } + for (; i < nv50->num_so_targets; ++i) { + if (can_resume && nv50->so_target[i]) { + nva0_so_target_save_offset(pipe, nv50->so_target[i], i, serialize); + serialize = FALSE; + } + pipe_so_target_reference(&nv50->so_target[i], NULL); + nv50->so_targets_dirty |= 1 << i; + } + nv50->num_so_targets = num_targets; + + if (nv50->so_targets_dirty) + nv50->dirty |= NV50_NEW_STRMOUT; +} + void nv50_init_state_functions(struct nv50_context *nv50) { @@ -975,5 +1062,8 @@ nv50_init_state_functions(struct nv50_context *nv50) pipe->set_vertex_buffers = nv50_set_vertex_buffers; pipe->set_index_buffer = nv50_set_index_buffer; -} + pipe->create_stream_output_target = nv50_so_target_create; + pipe->stream_output_target_destroy = nv50_so_target_destroy; + pipe->set_stream_output_targets = nv50_set_stream_output_targets; +} diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index c19acf6c426..a95e96d3c51 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -360,6 +360,8 @@ static struct state_validate { { nv50_constbufs_validate, NV50_NEW_CONSTBUF }, { nv50_validate_textures, NV50_NEW_TEXTURES }, { nv50_validate_samplers, NV50_NEW_SAMPLERS }, + { nv50_stream_output_validate, NV50_NEW_STRMOUT | + NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG }, { nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS } }; #define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0])) diff --git a/src/gallium/drivers/nv50/nv50_stateobj.h b/src/gallium/drivers/nv50/nv50_stateobj.h index 188406da600..8a9260c937e 100644 --- a/src/gallium/drivers/nv50/nv50_stateobj.h +++ b/src/gallium/drivers/nv50/nv50_stateobj.h @@ -51,4 +51,17 @@ struct nv50_vertex_stateobj { struct nv50_vertex_element element[0]; }; +struct nv50_so_target { + struct pipe_stream_output_target pipe; + struct pipe_query *pq; + unsigned stride; + boolean clean; +}; + +static INLINE struct nv50_so_target * +nv50_so_target(struct pipe_stream_output_target *ptarg) +{ + return (struct nv50_so_target *)ptarg; +} + #endif diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c index b38e49ffcc1..15c88d5316d 100644 --- a/src/gallium/drivers/nv50/nv50_surface.c +++ b/src/gallium/drivers/nv50/nv50_surface.c @@ -711,7 +711,7 @@ nv50_blit_set_src(struct nv50_context *nv50, templ.swizzle_a = PIPE_SWIZZLE_ALPHA; nv50->textures[2][0] = nv50_create_sampler_view(pipe, res, &templ); - nv50->textures[2][0] = NULL; + nv50->textures[2][1] = NULL; nv50_blit_fixup_tic_entry(nv50->textures[2][0]); diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index bc01e69decf..323677eaf80 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -405,6 +405,25 @@ nv50_prim_gl(unsigned prim) } } +/* For pre-nva0 transform feedback. */ +static const uint8_t nv50_pipe_prim_to_prim_size[PIPE_PRIM_MAX + 1] = +{ + [PIPE_PRIM_POINTS] = 1, + [PIPE_PRIM_LINES] = 2, + [PIPE_PRIM_LINE_LOOP] = 2, + [PIPE_PRIM_LINE_STRIP] = 2, + [PIPE_PRIM_TRIANGLES] = 3, + [PIPE_PRIM_TRIANGLE_STRIP] = 3, + [PIPE_PRIM_TRIANGLE_FAN] = 3, + [PIPE_PRIM_QUADS] = 3, + [PIPE_PRIM_QUAD_STRIP] = 3, + [PIPE_PRIM_POLYGON] = 3, + [PIPE_PRIM_LINES_ADJACENCY] = 2, + [PIPE_PRIM_LINE_STRIP_ADJACENCY] = 2, + [PIPE_PRIM_TRIANGLES_ADJACENCY] = 3, + [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = 3 +}; + static void nv50_draw_arrays(struct nv50_context *nv50, unsigned mode, unsigned start, unsigned count, @@ -624,6 +643,51 @@ nv50_draw_elements(struct nv50_context *nv50, boolean shorten, } static void +nva0_draw_stream_output(struct nv50_context *nv50, + const struct pipe_draw_info *info) +{ + struct nouveau_pushbuf *push = nv50->base.pushbuf; + struct nv50_so_target *so = nv50_so_target(info->count_from_stream_output); + struct nv04_resource *res = nv04_resource(so->pipe.buffer); + unsigned num_instances = info->instance_count; + unsigned mode = nv50_prim_gl(info->mode); + + if (unlikely(nv50->screen->base.class_3d < NVA0_3D_CLASS)) { + /* A proper implementation without waiting doesn't seem possible, + * so don't bother. + */ + NOUVEAU_ERR("draw_stream_output not supported on pre-NVA0 cards\n"); + return; + } + + if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) { + res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING; + PUSH_SPACE(push, 4); + BEGIN_NV04(push, SUBC_3D(NV50_GRAPH_SERIALIZE), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FLUSH), 1); + PUSH_DATA (push, 0); + } + + assert(num_instances); + do { + PUSH_SPACE(push, 8); + BEGIN_NV04(push, NV50_3D(VERTEX_BEGIN_GL), 1); + PUSH_DATA (push, mode); + BEGIN_NV04(push, NVA0_3D(DRAW_TFB_BASE), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NVA0_3D(DRAW_TFB_STRIDE), 1); + PUSH_DATA (push, 0); + BEGIN_NV04(push, NVA0_3D(DRAW_TFB_BYTES), 1); + nv50_query_pushbuf_submit(push, so->pq, 0x4); + BEGIN_NV04(push, NV50_3D(VERTEX_END_GL), 1); + PUSH_DATA (push, 0); + + mode |= NV50_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT; + } while (--num_instances); +} + +static void nv50_draw_vbo_kick_notify(struct nouveau_pushbuf *chan) { struct nv50_screen *screen = chan->user_priv; @@ -655,6 +719,9 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) if (nv50->vbo_user && !(nv50->dirty & (NV50_NEW_VERTEX | NV50_NEW_ARRAYS))) nv50_update_user_vbufs(nv50); + if (unlikely(nv50->num_so_targets && !nv50->gmtyprog)) + nv50->state.prim_size = nv50_pipe_prim_to_prim_size[info->mode]; + nv50_state_validate(nv50, ~0, 8); /* 8 as minimum, we use flush_notify */ push->kick_notify = nv50_draw_vbo_kick_notify; @@ -679,11 +746,7 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) nv50->base.vbo_dirty = FALSE; } - if (!info->indexed) { - nv50_draw_arrays(nv50, - info->mode, info->start, info->count, - info->instance_count); - } else { + if (info->indexed) { boolean shorten = info->max_index <= 65535; assert(nv50->idxbuf.buffer); @@ -713,6 +776,13 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) nv50_draw_elements(nv50, shorten, info->mode, info->start, info->count, info->instance_count, info->index_bias); + } else + if (unlikely(info->count_from_stream_output)) { + nva0_draw_stream_output(nv50, info); + } else { + nv50_draw_arrays(nv50, + info->mode, info->start, info->count, + info->instance_count); } push->kick_notify = nv50_default_kick_notify; diff --git a/src/gallium/drivers/nv50/nv50_winsys.h b/src/gallium/drivers/nv50/nv50_winsys.h index b36898dabe6..145ee70cb9f 100644 --- a/src/gallium/drivers/nv50/nv50_winsys.h +++ b/src/gallium/drivers/nv50/nv50_winsys.h @@ -49,6 +49,7 @@ PUSH_REFN(struct nouveau_pushbuf *push, struct nouveau_bo *bo, uint32_t flags) #define SUBC_3D(m) 3, (m) #define NV50_3D(n) SUBC_3D(NV50_3D_##n) +#define NVA0_3D(n) SUBC_3D(NVA0_3D_##n) #define SUBC_2D(m) 4, (m) #define NV50_2D(n) SUBC_2D(NV50_2D_##n) diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp index 2ca4979dc74..fbd1aa5dfc9 100644 --- a/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp +++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_emit_nvc0.cpp @@ -1020,7 +1020,7 @@ CodeEmitterNVC0::emitTEX(const TexInstruction *i) code[1] |= 0x02000000; } - if (i->tex.derivAll) + if (i->op != OP_TXD && i->tex.derivAll) code[1] |= 1 << 13; defId(i->def(0), 14); diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp index 02ae9fd5d0e..900e998df8d 100644 --- a/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_lowering_nvc0.cpp @@ -749,21 +749,22 @@ bool NVC0LoweringPass::handleTXD(TexInstruction *txd) { int dim = txd->tex.target.getDim(); - int arg = txd->tex.target.getDim() + txd->tex.target.isArray(); + int arg = txd->tex.target.getArgCount(); handleTEX(txd); - while (txd->src(arg).exists()) + while (txd->srcExists(arg)) ++arg; txd->tex.derivAll = true; - if (dim > 2 || txd->tex.target.isShadow()) + if (dim > 2 || + txd->tex.target.isCube() || + arg > 4 || + txd->tex.target.isShadow()) return handleManualTXD(txd); - assert(arg <= 4); // at most s/t/array, x, y, offset - for (int c = 0; c < dim; ++c) { - txd->src(arg + c * 2 + 0).set(txd->dPdx[c]); - txd->src(arg + c * 2 + 1).set(txd->dPdy[c]); + txd->setSrc(arg + c * 2 + 0, txd->dPdx[c]); + txd->setSrc(arg + c * 2 + 1, txd->dPdy[c]); txd->dPdx[c].set(NULL); txd->dPdy[c].set(NULL); } diff --git a/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp index 10c2d09d657..e4b9dc18311 100644 --- a/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp +++ b/src/gallium/drivers/nvc0/codegen/nv50_ir_target_nvc0.cpp @@ -223,6 +223,9 @@ static const struct opProperties _initProps[] = { OP_ABS, 0x0, 0x0, 0x0, 0x0, 0x1, 0x0 }, { OP_NEG, 0x0, 0x1, 0x0, 0x0, 0x1, 0x0 }, { OP_CVT, 0x1, 0x1, 0x0, 0x8, 0x1, 0x0 }, + { OP_CEIL, 0x1, 0x1, 0x0, 0x8, 0x1, 0x0 }, + { OP_FLOOR, 0x1, 0x1, 0x0, 0x8, 0x1, 0x0 }, + { OP_TRUNC, 0x1, 0x1, 0x0, 0x8, 0x1, 0x0 }, { OP_AND, 0x0, 0x0, 0x3, 0x0, 0x2, 0x2 | 0x8 }, { OP_OR, 0x0, 0x0, 0x3, 0x0, 0x2, 0x2 | 0x8 }, { OP_XOR, 0x0, 0x0, 0x3, 0x0, 0x2, 0x2 | 0x8 }, diff --git a/src/gallium/drivers/r300/r300_vs_draw.c b/src/gallium/drivers/r300/r300_vs_draw.c index 69d67585d8b..b9e73dd514b 100644 --- a/src/gallium/drivers/r300/r300_vs_draw.c +++ b/src/gallium/drivers/r300/r300_vs_draw.c @@ -94,11 +94,12 @@ static void emit_output(struct tgsi_transform_context *ctx, decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_OUTPUT; - decl.Declaration.Interpolate = interp; + decl.Declaration.Interpolate = 1; decl.Declaration.Semantic = TRUE; decl.Semantic.Name = name; decl.Semantic.Index = index; decl.Range.First = decl.Range.Last = reg; + decl.Interp.Interpolate = interp; ctx->emit_declaration(ctx, &decl); ++vsctx->num_outputs; } diff --git a/src/gallium/drivers/r600/Makefile.am b/src/gallium/drivers/r600/Makefile.am index 3089a829e53..77d2674d262 100644 --- a/src/gallium/drivers/r600/Makefile.am +++ b/src/gallium/drivers/r600/Makefile.am @@ -29,7 +29,7 @@ libr600_a_SOURCES += \ $(LLVM_C_SOURCES) libr600_a_LIBADD = \ - $(top_srcdir)/src/gallium/drivers/radeon/libradeon.a + $(top_builddir)/src/gallium/drivers/radeon/libradeon.a AM_CFLAGS += \ $(LLVM_CFLAGS) \ diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c index b6d03ef37de..d2c1679796a 100644 --- a/src/gallium/drivers/r600/eg_asm.c +++ b/src/gallium/drivers/r600/eg_asm.c @@ -133,6 +133,10 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf) S_SQ_CF_WORD1_COND(cf->cond) | S_SQ_CF_WORD1_POP_COUNT(cf->pop_count); break; + case CF_NATIVE: + bc->bytecode[id++] = cf->isa[0]; + bc->bytecode[id++] = cf->isa[1]; + break; default: R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst); return -EINVAL; diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index 517121dc288..81aedb5c0ac 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -796,11 +796,11 @@ static void *evergreen_create_dsa_state(struct pipe_context *ctx, alpha_test_control |= S_028410_ALPHA_TEST_ENABLE(1); alpha_ref = fui(state->alpha.ref_value); } + dsa->sx_alpha_test_control = alpha_test_control & 0xff; dsa->alpha_ref = alpha_ref; /* misc */ db_render_control = 0; - r600_pipe_state_add_reg(rstate, R_028410_SX_ALPHA_TEST_CONTROL, alpha_test_control); r600_pipe_state_add_reg(rstate, R_028800_DB_DEPTH_CONTROL, db_depth_control); r600_pipe_state_add_reg(rstate, R_028000_DB_RENDER_CONTROL, db_render_control); return rstate; @@ -1428,6 +1428,11 @@ static void evergreen_cb(struct r600_context *rctx, struct r600_pipe_state *rsta blend_bypass = 1; } + if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) + rctx->sx_alpha_test_control |= S_028410_ALPHA_TEST_BYPASS(1); + else + rctx->sx_alpha_test_control &= C_028410_ALPHA_TEST_BYPASS; + color_info |= S_028C70_FORMAT(format) | S_028C70_COMP_SWAP(swap) | S_028C70_BLEND_CLAMP(blend_clamp) | diff --git a/src/gallium/drivers/r600/evergreend.h b/src/gallium/drivers/r600/evergreend.h index 4009e91d4fc..105d80f061d 100644 --- a/src/gallium/drivers/r600/evergreend.h +++ b/src/gallium/drivers/r600/evergreend.h @@ -32,20 +32,16 @@ #define EVERGREEN_CONTEXT_REG_OFFSET 0X00028000 #define EVERGREEN_CONTEXT_REG_END 0X00029000 #define EVERGREEN_RESOURCE_OFFSET 0x00030000 -#define EVERGREEN_RESOURCE_END 0x00034000 -#define CAYMAN_RESOURCE_END 0x00038000 +#define EVERGREEN_RESOURCE_END 0x00038000 #define EVERGREEN_LOOP_CONST_OFFSET 0x0003A200 -#define EVERGREEN_LOOP_CONST_END 0x0003A26C +#define EVERGREEN_LOOP_CONST_END 0x0003A500 #define EVERGREEN_BOOL_CONST_OFFSET 0x0003A500 -#define EVERGREEN_BOOL_CONST_END 0x0003A506 -#define CAYMAN_BOOL_CONST_END 0x0003A518 +#define EVERGREEN_BOOL_CONST_END 0x0003A518 #define EVERGREEN_SAMPLER_OFFSET 0X0003C000 -#define EVERGREEN_SAMPLER_END 0X0003CFF0 -#define CAYMAN_SAMPLER_END 0X0003C600 +#define EVERGREEN_SAMPLER_END 0X0003C600 #define EVERGREEN_CTL_CONST_OFFSET 0x0003CFF0 -#define EVERGREEN_CTL_CONST_END 0x0003E200 -#define CAYMAN_CTL_CONST_END 0x0003FF0C +#define EVERGREEN_CTL_CONST_END 0x0003FF0C #define EVENT_TYPE_PS_PARTIAL_FLUSH 0x10 #define EVENT_TYPE_ZPASS_DONE 0x15 diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 651933bf37c..5a10bd90776 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -94,6 +94,7 @@ static inline unsigned int r600_bytecode_get_num_operands(struct r600_bytecode * case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_GPR_INT: case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT: case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT: + case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL: case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR: case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC: case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE: @@ -153,6 +154,7 @@ static inline unsigned int r600_bytecode_get_num_operands(struct r600_bytecode * case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_INT: case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE_UINT: case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT: case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT: case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE: case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE: @@ -171,6 +173,7 @@ static inline unsigned int r600_bytecode_get_num_operands(struct r600_bytecode * case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV: case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT: case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT: + case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL: case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR: case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC: case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE: @@ -1927,6 +1930,7 @@ int r600_bytecode_build(struct r600_bytecode *bc) case EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS: case EG_V_SQ_CF_WORD1_SQ_CF_INST_RETURN: case CM_V_SQ_CF_WORD1_SQ_CF_INST_END: + case CF_NATIVE: break; default: R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst); @@ -2025,13 +2029,12 @@ int r600_bytecode_build(struct r600_bytecode *bc) } break; case EG_V_SQ_CF_WORD1_SQ_CF_INST_TEX: - if (bc->chip_class == CAYMAN) { - LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { - r = r600_bytecode_vtx_build(bc, vtx, addr); - if (r) - return r; - addr += 4; - } + LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { + assert(bc->chip_class >= EVERGREEN); + r = r600_bytecode_vtx_build(bc, vtx, addr); + if (r) + return r; + addr += 4; } LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) { r = r600_bytecode_tex_build(bc, tex, addr); @@ -2069,6 +2072,8 @@ int r600_bytecode_build(struct r600_bytecode *bc) case EG_V_SQ_CF_WORD1_SQ_CF_INST_RETURN: case CM_V_SQ_CF_WORD1_SQ_CF_INST_END: break; + case CF_NATIVE: + break; default: R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst); return -EINVAL; @@ -2341,6 +2346,10 @@ void r600_bytecode_dump(struct r600_bytecode *bc) fprintf(stderr, "COND:%X ", cf->cond); fprintf(stderr, "POP_COUNT:%X\n", cf->pop_count); break; + case CF_NATIVE: + fprintf(stderr, "%04d %08X CF NATIVE\n", id, bc->bytecode[id]); + fprintf(stderr, "%04d %08X CF NATIVE\n", id + 1, bc->bytecode[id + 1]); + break; default: R600_ERR("Unknown instruction %0x\n", cf->inst); } @@ -2477,7 +2486,8 @@ void r600_bytecode_dump(struct r600_bytecode *bc) if (alu->last) { for (i = 0; i < nliteral; i++, id++) { float *f = (float*)(bc->bytecode + id); - fprintf(stderr, "%04d %08X\t%f\n", id, bc->bytecode[id], *f); + fprintf(stderr, "%04d %08X\t%f (%d)\n", id, bc->bytecode[id], *f, + *(bc->bytecode + id)); } id += nliteral & 1; nliteral = 0; diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index 5790ead991f..a8a157b79e4 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -135,6 +135,14 @@ struct r600_bytecode_kcache { unsigned addr; }; +/* A value of CF_NATIVE in r600_bytecode_cf::inst means that this instruction + * has already been encoded, and the encoding has been stored in + * r600_bytecode::isa. This is used by the LLVM backend to emit CF instructions + * e.g. RAT_WRITE_* that can't be properly represented by struct + * r600_bytecode_cf. + */ +#define CF_NATIVE ~0 + struct r600_bytecode_cf { struct list_head list; @@ -157,6 +165,7 @@ struct r600_bytecode_cf { struct r600_bytecode_alu *curr_bs_head; struct r600_bytecode_alu *prev_bs_head; struct r600_bytecode_alu *prev2_bs_head; + unsigned isa[2]; }; #define FC_NONE 0 diff --git a/src/gallium/drivers/r600/r600_llvm.c b/src/gallium/drivers/r600/r600_llvm.c index d467baf60fb..f916604db7b 100644 --- a/src/gallium/drivers/r600/r600_llvm.c +++ b/src/gallium/drivers/r600/r600_llvm.c @@ -21,10 +21,44 @@ static LLVMValueRef llvm_fetch_const( enum tgsi_opcode_type type, unsigned swizzle) { - return lp_build_intrinsic_unary(bld_base->base.gallivm->builder, + LLVMValueRef cval = lp_build_intrinsic_unary(bld_base->base.gallivm->builder, "llvm.AMDGPU.load.const", bld_base->base.elem_type, lp_build_const_int32(bld_base->base.gallivm, radeon_llvm_reg_index_soa(reg->Register.Index, swizzle))); + + return bitcast(bld_base, type, cval); +} + +static void llvm_load_system_value( + struct radeon_llvm_context * ctx, + unsigned index, + const struct tgsi_full_declaration *decl) +{ + unsigned chan; + + switch (decl->Semantic.Name) { + case TGSI_SEMANTIC_INSTANCEID: chan = 3; break; + case TGSI_SEMANTIC_VERTEXID: chan = 0; break; + default: assert(!"unknown system value"); + } + + LLVMValueRef reg = lp_build_const_int32( + ctx->soa.bld_base.base.gallivm, chan); + ctx->system_values[index] = lp_build_intrinsic_unary( + ctx->soa.bld_base.base.gallivm->builder, + "llvm.R600.load.input", + ctx->soa.bld_base.base.elem_type, reg); +} + +static LLVMValueRef llvm_fetch_system_value( + struct lp_build_tgsi_context * bld_base, + const struct tgsi_full_src_register *reg, + enum tgsi_opcode_type type, + unsigned swizzle) +{ + struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); + LLVMValueRef cval = ctx->system_values[reg->Register.Index]; + return bitcast(bld_base, type, cval); } static void llvm_load_input( @@ -59,17 +93,13 @@ static void llvm_emit_prologue(struct lp_build_tgsi_context * bld_base) for (i = 0; i < ctx->reserved_reg_count; i++) { unsigned chan; for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { - LLVMValueRef reg; LLVMValueRef reg_index = lp_build_const_int32( base->gallivm, radeon_llvm_reg_index_soa(i, chan)); - reg = lp_build_intrinsic_unary(base->gallivm->builder, - "llvm.AMDGPU.reserve.reg", - base->elem_type, reg_index); lp_build_intrinsic_unary(base->gallivm->builder, - "llvm.AMDGPU.export.reg", + "llvm.AMDGPU.reserve.reg", LLVMVoidTypeInContext(base->gallivm->context), - reg); + reg_index); } } } @@ -85,7 +115,6 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base) unsigned chan; for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { LLVMValueRef output; - LLVMValueRef store_output; unsigned adjusted_reg_idx = i + ctx->reserved_reg_count; LLVMValueRef reg_index = lp_build_const_int32( @@ -95,16 +124,11 @@ static void llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base) output = LLVMBuildLoad(base->gallivm->builder, ctx->soa.outputs[i][chan], ""); - store_output = lp_build_intrinsic_binary( + lp_build_intrinsic_binary( base->gallivm->builder, "llvm.AMDGPU.store.output", - base->elem_type, - output, reg_index); - - lp_build_intrinsic_unary(base->gallivm->builder, - "llvm.AMDGPU.export.reg", LLVMVoidTypeInContext(base->gallivm->context), - store_output); + output, reg_index); } } } @@ -169,28 +193,7 @@ static struct lp_build_tgsi_action dot_action = { .intr_name = "llvm.AMDGPU.dp4" }; -static void txp_fetch_args( - struct lp_build_tgsi_context * bld_base, - struct lp_build_emit_data * emit_data) -{ - LLVMValueRef src_w; - unsigned chan; - LLVMValueRef coords[4]; - - emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4); - src_w = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W); - for (chan = 0; chan < 3; chan++ ) { - LLVMValueRef arg = lp_build_emit_fetch(bld_base, - emit_data->inst, 0, chan); - coords[chan] = lp_build_emit_llvm_binary(bld_base, - TGSI_OPCODE_DIV, arg, src_w); - } - coords[3] = bld_base->base.one; - emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm, - coords, 4); - emit_data->arg_count = 1; -} LLVMModuleRef r600_tgsi_llvm( struct radeon_llvm_context * ctx, @@ -204,20 +207,25 @@ LLVMModuleRef r600_tgsi_llvm( bld_base->info = &shader_info; bld_base->userdata = ctx; bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = llvm_fetch_const; + bld_base->emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = llvm_fetch_system_value; bld_base->emit_prologue = llvm_emit_prologue; bld_base->emit_epilogue = llvm_emit_epilogue; ctx->userdata = ctx; ctx->load_input = llvm_load_input; + ctx->load_system_value = llvm_load_system_value; bld_base->op_actions[TGSI_OPCODE_DP2] = dot_action; bld_base->op_actions[TGSI_OPCODE_DP3] = dot_action; bld_base->op_actions[TGSI_OPCODE_DP4] = dot_action; bld_base->op_actions[TGSI_OPCODE_DPH] = dot_action; + bld_base->op_actions[TGSI_OPCODE_DDX].emit = llvm_emit_tex; + bld_base->op_actions[TGSI_OPCODE_DDY].emit = llvm_emit_tex; bld_base->op_actions[TGSI_OPCODE_TEX].emit = llvm_emit_tex; bld_base->op_actions[TGSI_OPCODE_TXB].emit = llvm_emit_tex; bld_base->op_actions[TGSI_OPCODE_TXD].emit = llvm_emit_tex; bld_base->op_actions[TGSI_OPCODE_TXL].emit = llvm_emit_tex; - bld_base->op_actions[TGSI_OPCODE_TXP].fetch_args = txp_fetch_args; + bld_base->op_actions[TGSI_OPCODE_TXF].emit = llvm_emit_tex; + bld_base->op_actions[TGSI_OPCODE_TXQ].emit = llvm_emit_tex; bld_base->op_actions[TGSI_OPCODE_TXP].emit = llvm_emit_tex; lp_build_tgsi_llvm(bld_base, tokens); diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 63fc27564d7..db455f021ad 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -161,6 +161,7 @@ struct r600_pipe_dsa { ubyte valuemask[2]; ubyte writemask[2]; bool is_flush; + unsigned sx_alpha_test_control; }; struct r600_vertex_element @@ -250,6 +251,7 @@ struct r600_context { struct pipe_framebuffer_state framebuffer; unsigned cb_target_mask; unsigned fb_cb_shader_mask; + unsigned sx_alpha_test_control; unsigned cb_shader_mask; unsigned cb_color_control; unsigned pa_sc_line_stipple; diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 5e22b35ba48..cd78104a010 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -287,6 +287,7 @@ static void llvm_if(struct r600_shader_ctx *ctx, struct r600_bytecode_alu * alu, { alu->inst = pred_inst; alu->predicate = 1; + alu->dst.write = 0; alu->src[1].sel = V_SQ_ALU_SRC_0; alu->src[1].chan = 0; alu->last = 1; @@ -362,6 +363,10 @@ static unsigned r600_fc_from_byte_stream(struct r600_shader_ctx *ctx, tgsi_loop_brk_cont(ctx); } break; + case 8: + r600_break_from_byte_stream(ctx, &alu, + CTX_INST(V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT)); + break; } return bytes_read; @@ -401,10 +406,43 @@ static unsigned r600_tex_from_byte_stream(struct r600_shader_ctx *ctx, return bytes_read; } +static int r600_vtx_from_byte_stream(struct r600_shader_ctx *ctx, + unsigned char * bytes, unsigned bytes_read) +{ + struct r600_bytecode_vtx vtx; + memset(&vtx, 0, sizeof(vtx)); + vtx.inst = bytes[bytes_read++]; + vtx.fetch_type = bytes[bytes_read++]; + vtx.buffer_id = bytes[bytes_read++]; + vtx.src_gpr = bytes[bytes_read++]; + vtx.src_sel_x = bytes[bytes_read++]; + vtx.mega_fetch_count = bytes[bytes_read++]; + vtx.dst_gpr = bytes[bytes_read++]; + vtx.dst_sel_x = bytes[bytes_read++]; + vtx.dst_sel_y = bytes[bytes_read++]; + vtx.dst_sel_z = bytes[bytes_read++]; + vtx.dst_sel_w = bytes[bytes_read++]; + vtx.use_const_fields = bytes[bytes_read++]; + vtx.data_format = bytes[bytes_read++]; + vtx.num_format_all = bytes[bytes_read++]; + vtx.format_comp_all = bytes[bytes_read++]; + vtx.srf_mode_all = bytes[bytes_read++]; + vtx.offset = bytes[bytes_read++]; + vtx.endian = bytes[bytes_read++]; + + if (r600_bytecode_add_vtx(ctx->bc, &vtx)) { + fprintf(stderr, "Error adding vtx\n"); + } + /* Use the Texture Cache */ + ctx->bc->cf_last->inst = EG_V_SQ_CF_WORD1_SQ_CF_INST_TEX; + return bytes_read; +} + static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx, unsigned char * bytes, unsigned num_bytes) { unsigned bytes_read = 0; + unsigned i, byte; while (bytes_read < num_bytes) { char inst_type = bytes[bytes_read++]; switch (inst_type) { @@ -420,6 +458,20 @@ static void r600_bytecode_from_byte_stream(struct r600_shader_ctx *ctx, bytes_read = r600_fc_from_byte_stream(ctx, bytes, bytes_read); break; + case 3: + r600_bytecode_add_cfinst(ctx->bc, CF_NATIVE); + for (i = 0; i < 2; i++) { + for (byte = 0 ; byte < 4; byte++) { + ctx->bc->cf_last->isa[i] |= + (bytes[bytes_read++] << (byte * 8)); + } + } + break; + + case 4: + bytes_read = r600_vtx_from_byte_stream(ctx, bytes, + bytes_read); + break; default: /* XXX: Error here */ break; @@ -670,8 +722,8 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) ctx->shader->input[i].name = d->Semantic.Name; ctx->shader->input[i].sid = d->Semantic.Index; ctx->shader->input[i].spi_sid = r600_spi_sid(&ctx->shader->input[i]); - ctx->shader->input[i].interpolate = d->Declaration.Interpolate; - ctx->shader->input[i].centroid = d->Declaration.Centroid; + ctx->shader->input[i].interpolate = d->Interp.Interpolate; + ctx->shader->input[i].centroid = d->Interp.Centroid; ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + d->Range.First; if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { switch (ctx->shader->input[i].name) { @@ -697,7 +749,7 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) ctx->shader->output[i].sid = d->Semantic.Index; ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]); ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + d->Range.First; - ctx->shader->output[i].interpolate = d->Declaration.Interpolate; + ctx->shader->output[i].interpolate = d->Interp.Interpolate; ctx->shader->output[i].write_mask = d->Declaration.UsageMask; if (ctx->type == TGSI_PROCESSOR_VERTEX) { switch (d->Semantic.Name) { @@ -5102,7 +5154,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {80, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_PUSHA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_POPA, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_CEIL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL, tgsi_op2}, {TGSI_OPCODE_I2F, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2_trans}, {TGSI_OPCODE_NOT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2}, {TGSI_OPCODE_TRUNC, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2}, @@ -5168,16 +5220,16 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_CASE, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_DEFAULT, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_ENDSWITCH, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_LOAD, 0, 0, tgsi_unsupported}, - {TGSI_OPCODE_LOAD_MS, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SAMPLE_I, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SAMPLE_I_MS, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported}, - {TGSI_OPCODE_RESINFO, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SVIEWINFO, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_UARL, 0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_r600_arl}, @@ -5276,7 +5328,7 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL, tgsi_op2}, {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2_trans}, {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2}, {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2}, @@ -5342,16 +5394,16 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_LOAD, 0, 0, tgsi_unsupported}, - {TGSI_OPCODE_LOAD_MS, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SAMPLE_I, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SAMPLE_I_MS, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported}, - {TGSI_OPCODE_RESINFO, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SVIEWINFO, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_UARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_eg_arl}, @@ -5450,7 +5502,7 @@ static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = { {80, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_PUSHA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_POPA, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, + {TGSI_OPCODE_CEIL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CEIL, tgsi_op2}, {TGSI_OPCODE_I2F, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT, tgsi_op2}, {TGSI_OPCODE_NOT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOT_INT, tgsi_op2}, {TGSI_OPCODE_TRUNC, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC, tgsi_op2}, @@ -5516,16 +5568,16 @@ static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = { {TGSI_OPCODE_CASE, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_DEFAULT, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, {TGSI_OPCODE_ENDSWITCH, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported}, - {TGSI_OPCODE_LOAD, 0, 0, tgsi_unsupported}, - {TGSI_OPCODE_LOAD_MS, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SAMPLE_I, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SAMPLE_I_MS, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE_B, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE_C, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE_C_LZ, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE_D, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE_L, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_GATHER4, 0, 0, tgsi_unsupported}, - {TGSI_OPCODE_RESINFO, 0, 0, tgsi_unsupported}, + {TGSI_OPCODE_SVIEWINFO, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE_POS, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_SAMPLE_INFO, 0, 0, tgsi_unsupported}, {TGSI_OPCODE_UARL, 0, EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT, tgsi_eg_arl}, diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c index 3a83b613e58..acf59f80bf4 100644 --- a/src/gallium/drivers/r600/r600_state.c +++ b/src/gallium/drivers/r600/r600_state.c @@ -805,9 +805,9 @@ static void *r600_create_dsa_state(struct pipe_context *ctx, alpha_test_control |= S_028410_ALPHA_TEST_ENABLE(1); alpha_ref = fui(state->alpha.ref_value); } + dsa->sx_alpha_test_control = alpha_test_control & 0xff; dsa->alpha_ref = alpha_ref; - r600_pipe_state_add_reg(rstate, R_028410_SX_ALPHA_TEST_CONTROL, alpha_test_control); r600_pipe_state_add_reg(rstate, R_028800_DB_DEPTH_CONTROL, db_depth_control); return rstate; } @@ -1466,6 +1466,11 @@ static void r600_cb(struct r600_context *rctx, struct r600_pipe_state *rstate, blend_bypass = 1; } + if (ntype == V_0280A0_NUMBER_UINT || ntype == V_0280A0_NUMBER_SINT) + rctx->sx_alpha_test_control |= S_028410_ALPHA_TEST_BYPASS(1); + else + rctx->sx_alpha_test_control &= C_028410_ALPHA_TEST_BYPASS; + color_info |= S_0280A0_FORMAT(format) | S_0280A0_COMP_SWAP(swap) | S_0280A0_BLEND_BYPASS(blend_bypass) | diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index ccae7d91d43..d47383558d9 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -244,6 +244,8 @@ void r600_bind_dsa_state(struct pipe_context *ctx, void *state) return; rstate = &dsa->rstate; rctx->states[rstate->id] = rstate; + rctx->sx_alpha_test_control &= ~0xff; + rctx->sx_alpha_test_control |= dsa->sx_alpha_test_control; rctx->alpha_ref = dsa->alpha_ref; rctx->alpha_ref_dirty = true; r600_context_pipe_state_set(rctx, rstate); @@ -796,6 +798,7 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *dinfo) r600_pipe_state_add_reg(&rctx->vgt, R_02823C_CB_SHADER_MASK, 0); r600_pipe_state_add_reg(&rctx->vgt, R_028408_VGT_INDX_OFFSET, info.index_bias); r600_pipe_state_add_reg(&rctx->vgt, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, info.restart_index); + r600_pipe_state_add_reg(&rctx->vgt, R_028410_SX_ALPHA_TEST_CONTROL, 0); r600_pipe_state_add_reg(&rctx->vgt, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, info.primitive_restart); r600_pipe_state_add_reg(&rctx->vgt, R_03CFF4_SQ_VTX_START_INST_LOC, info.start_instance); r600_pipe_state_add_reg(&rctx->vgt, R_028A0C_PA_SC_LINE_STIPPLE, 0); @@ -817,6 +820,7 @@ void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *dinfo) r600_pipe_state_mod_reg(&rctx->vgt, rctx->cb_shader_mask); r600_pipe_state_mod_reg(&rctx->vgt, info.index_bias); r600_pipe_state_mod_reg(&rctx->vgt, info.restart_index); + r600_pipe_state_mod_reg(&rctx->vgt, rctx->sx_alpha_test_control); r600_pipe_state_mod_reg(&rctx->vgt, info.primitive_restart); r600_pipe_state_mod_reg(&rctx->vgt, info.start_instance); diff --git a/src/gallium/drivers/radeon/AMDGPU.h b/src/gallium/drivers/radeon/AMDGPU.h index eff002a5eae..0f42cb744d3 100644 --- a/src/gallium/drivers/radeon/AMDGPU.h +++ b/src/gallium/drivers/radeon/AMDGPU.h @@ -1,4 +1,4 @@ -//===-- AMDGPU.h - TODO: Add brief description -------===// +//===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=// // // The LLVM Compiler Infrastructure // @@ -6,10 +6,6 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// TODO: Add full description -// -//===----------------------------------------------------------------------===// #ifndef AMDGPU_H #define AMDGPU_H @@ -19,29 +15,24 @@ #include "llvm/Target/TargetMachine.h" namespace llvm { - class FunctionPass; - class AMDGPUTargetMachine; - - FunctionPass *createR600CodeEmitterPass(formatted_raw_ostream &OS); - FunctionPass *createR600LowerShaderInstructionsPass(TargetMachine &tm); - FunctionPass *createR600LowerInstructionsPass(TargetMachine &tm); - - FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm); - FunctionPass *createSIInitMachineFunctionInfoPass(TargetMachine &tm); - FunctionPass *createSILowerShaderInstructionsPass(TargetMachine &tm); - FunctionPass *createSIPropagateImmReadsPass(TargetMachine &tm); - FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS); - FunctionPass *createAMDGPUReorderPreloadInstructionsPass(TargetMachine &tm); +class FunctionPass; +class AMDGPUTargetMachine; - FunctionPass *createAMDGPULowerInstructionsPass(TargetMachine &tm); - FunctionPass *createAMDGPULowerShaderInstructionsPass(TargetMachine &tm); +// R600 Passes +FunctionPass* createR600KernelParametersPass(const TargetData* TD); +FunctionPass *createR600CodeEmitterPass(formatted_raw_ostream &OS); +FunctionPass *createR600LowerInstructionsPass(TargetMachine &tm); - FunctionPass *createAMDGPUDelimitInstGroupsPass(TargetMachine &tm); +// SI Passes +FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm); +FunctionPass *createSIPropagateImmReadsPass(TargetMachine &tm); +FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS); - FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm); +// Passes common to R600 and SI +FunctionPass *createAMDGPULowerInstructionsPass(TargetMachine &tm); +FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm); - FunctionPass *createAMDGPUFixRegClassesPass(TargetMachine &tm); +} // End namespace llvm -} /* End namespace llvm */ -#endif /* AMDGPU_H */ +#endif // AMDGPU_H diff --git a/src/gallium/drivers/radeon/AMDGPUConstants.pm b/src/gallium/drivers/radeon/AMDGPUConstants.pm deleted file mode 100644 index b64ff49c187..00000000000 --- a/src/gallium/drivers/radeon/AMDGPUConstants.pm +++ /dev/null @@ -1,44 +0,0 @@ -#===-- AMDGPUConstants.pm - TODO: Add brief description -------===# -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -#===----------------------------------------------------------------------===# -# -# TODO: Add full description -# -#===----------------------------------------------------------------------===# - -package AMDGPUConstants; - -use base 'Exporter'; - -use constant CONST_REG_COUNT => 256; -use constant TEMP_REG_COUNT => 128; - -our @EXPORT = ('TEMP_REG_COUNT', 'CONST_REG_COUNT', 'get_hw_index', 'get_chan_str'); - -sub get_hw_index { - my ($index) = @_; - return int($index / 4); -} - -sub get_chan_str { - my ($index) = @_; - my $chan = $index % 4; - if ($chan == 0 ) { - return 'X'; - } elsif ($chan == 1) { - return 'Y'; - } elsif ($chan == 2) { - return 'Z'; - } elsif ($chan == 3) { - return 'W'; - } else { - die("Unknown chan value: $chan"); - } -} - -1; diff --git a/src/gallium/drivers/radeon/AMDGPUConvertToISA.cpp b/src/gallium/drivers/radeon/AMDGPUConvertToISA.cpp index ce947f8ff78..8e82b8438bb 100644 --- a/src/gallium/drivers/radeon/AMDGPUConvertToISA.cpp +++ b/src/gallium/drivers/radeon/AMDGPUConvertToISA.cpp @@ -34,7 +34,7 @@ namespace { virtual bool runOnMachineFunction(MachineFunction &MF); }; -} /* End anonymous namespace */ +} // End anonymous namespace char AMDGPUConvertToISAPass::ID = 0; diff --git a/src/gallium/drivers/radeon/AMDGPUGenInstrEnums.pl b/src/gallium/drivers/radeon/AMDGPUGenInstrEnums.pl index 1fd4fb04b3e..130eaac72bc 100644 --- a/src/gallium/drivers/radeon/AMDGPUGenInstrEnums.pl +++ b/src/gallium/drivers/radeon/AMDGPUGenInstrEnums.pl @@ -1,15 +1,32 @@ -#===-- AMDGPUGenInstrEnums.pl - TODO: Add brief description -------===# +#===-- AMDGPUGenInstrEnums.pl - Script for generating instruction enums ----===# # # The LLVM Compiler Infrastructure # # This file is distributed under the University of Illinois Open Source # License. See LICENSE.TXT for details. # -#===----------------------------------------------------------------------===# +#===-----------------------------------------------------------------------===# # -# TODO: Add full description +# This perl script is used to generate the following files: # -#===----------------------------------------------------------------------===# +# 1. perl AMDGPUGenInstrEnums.pl td > AMDGPUInstrEnums.td +# +# This file contains Tablegen constants used for matching hw instructions +# from R600 and SI with functionally similar AMDIL instruction. It aslo +# contains definitions of floating point constants like pi (in hex notation) +# that are used in some of the shader patterns. +# +# 2. perl AMDGPUGenInstrEnums.pl h > AMDGPUInstrEnums.h +# +# This file contains cpp enums that match the constant values in +# AMDGPUInstrEnums.td +# +# 3. perl AMDGPUGenInstrEnums.pl inc > AMDGPUInstrEnums.include +# +# This file contains a function called GetRealAMDILOpcode which maps the +# constant values defined in AMDGPUInstrEnums.h to the corresponding AMDIL +# instructions. +#===-----------------------------------------------------------------------===# use warnings; use strict; @@ -41,7 +58,7 @@ my $FILE_TYPE = $ARGV[0]; open AMDIL, '<', 'AMDILInstructions.td'; -my @INST_ENUMS = ('NONE', 'FEQ', 'FGE', 'FLT', 'FNE', 'MOVE_f32', 'MOVE_i32', 'FTOI', 'ITOF', 'CMOVLOG_f32', 'UGT', 'IGE', 'INE', 'UGE', 'IEQ'); +my @INST_ENUMS = ('NONE', 'FEQ', 'FGE', 'FLT', 'FNE', 'MOVE_f32', 'MOVE_i32', 'FTOI', 'ITOF', 'CMOVLOG_f32', 'UGT', 'IGE', 'INE', 'UGE', 'IEQ', 'BINARY_OR_i32', 'BINARY_NOT_i32'); while (<AMDIL>) { if ($_ =~ /defm\s+([A-Z_]+)\s+:\s+([A-Za-z0-9]+)</) { diff --git a/src/gallium/drivers/radeon/AMDGPUGenShaderPatterns.pl b/src/gallium/drivers/radeon/AMDGPUGenShaderPatterns.pl deleted file mode 100644 index 60523a7b48f..00000000000 --- a/src/gallium/drivers/radeon/AMDGPUGenShaderPatterns.pl +++ /dev/null @@ -1,30 +0,0 @@ -#===-- AMDGPUGenShaderPatterns.pl - TODO: Add brief description -------===# -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -#===----------------------------------------------------------------------===# -# -# TODO: Add full description -# -#===----------------------------------------------------------------------===# - -use strict; -use warnings; - -use AMDGPUConstants; - -my $reg_prefix = $ARGV[0]; - -for (my $i = 0; $i < CONST_REG_COUNT * 4; $i++) { - my $index = get_hw_index($i); - my $chan = get_chan_str($i); -print <<STRING; -def : Pat < - (int_AMDGPU_load_const $i), - (f32 (MOV (f32 $reg_prefix$index\_$chan))) ->; -STRING -} diff --git a/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp b/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp index 2c1052fd8ea..2bdc8a759f2 100644 --- a/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp +++ b/src/gallium/drivers/radeon/AMDGPUISelLowering.cpp @@ -1,4 +1,4 @@ -//===-- AMDGPUISelLowering.cpp - TODO: Add brief description -------===// +//===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// This is the parent TargetLowering class for hardware code gen targets. // //===----------------------------------------------------------------------===// diff --git a/src/gallium/drivers/radeon/AMDGPUISelLowering.h b/src/gallium/drivers/radeon/AMDGPUISelLowering.h index 3c5beb1cdae..1b3f71006e2 100644 --- a/src/gallium/drivers/radeon/AMDGPUISelLowering.h +++ b/src/gallium/drivers/radeon/AMDGPUISelLowering.h @@ -1,4 +1,4 @@ -//===-- AMDGPUISelLowering.h - TODO: Add brief description -------===// +//===-- AMDGPUISelLowering.h - AMDGPU Lowering Interface --------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,8 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// This file contains the interface defintiion of the TargetLowering class +// that is common to all AMD GPUs. // //===----------------------------------------------------------------------===// diff --git a/src/gallium/drivers/radeon/AMDGPUInstrInfo.cpp b/src/gallium/drivers/radeon/AMDGPUInstrInfo.cpp index 4742283f688..ecd8ac90526 100644 --- a/src/gallium/drivers/radeon/AMDGPUInstrInfo.cpp +++ b/src/gallium/drivers/radeon/AMDGPUInstrInfo.cpp @@ -108,9 +108,4 @@ unsigned AMDGPUInstrInfo::getISAOpcode(unsigned opcode) const } } -bool AMDGPUInstrInfo::isRegPreload(const MachineInstr &MI) const -{ - return (get(MI.getOpcode()).TSFlags >> AMDGPU_TFLAG_SHIFTS::PRELOAD_REG) & 0x1; -} - #include "AMDGPUInstrEnums.include" diff --git a/src/gallium/drivers/radeon/AMDGPUInstrInfo.h b/src/gallium/drivers/radeon/AMDGPUInstrInfo.h index fa009bc6302..930b41e7191 100644 --- a/src/gallium/drivers/radeon/AMDGPUInstrInfo.h +++ b/src/gallium/drivers/radeon/AMDGPUInstrInfo.h @@ -1,4 +1,4 @@ -//===-- AMDGPUInstrInfo.h - TODO: Add brief description -------===// +//===-- AMDGPUInstrInfo.h - AMDGPU Instruction Information ------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,8 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// This file contains the definitoin of a TargetInstrInfo class that is common +// to all AMD GPUs. // //===----------------------------------------------------------------------===// @@ -21,17 +22,17 @@ namespace llvm { - class AMDGPUTargetMachine; - class MachineFunction; - class MachineInstr; - class MachineInstrBuilder; +class AMDGPUTargetMachine; +class MachineFunction; +class MachineInstr; +class MachineInstrBuilder; - class AMDGPUInstrInfo : public AMDILInstrInfo { - private: +class AMDGPUInstrInfo : public AMDILInstrInfo { +private: AMDGPUTargetMachine & TM; std::map<unsigned, unsigned> amdilToISA; - public: +public: explicit AMDGPUInstrInfo(AMDGPUTargetMachine &tm); virtual const AMDGPURegisterInfo &getRegisterInfo() const = 0; @@ -41,19 +42,9 @@ namespace llvm { virtual MachineInstr * convertToISA(MachineInstr & MI, MachineFunction &MF, DebugLoc DL) const; - bool isRegPreload(const MachineInstr &MI) const; - #include "AMDGPUInstrEnums.h.include" - }; +}; } // End llvm namespace -/* AMDGPU target flags are stored in bits 32-39 */ -namespace AMDGPU_TFLAG_SHIFTS { - enum TFLAGS { - PRELOAD_REG = 32 - }; -} - - #endif // AMDGPUINSTRINFO_H_ diff --git a/src/gallium/drivers/radeon/AMDGPUInstructions.td b/src/gallium/drivers/radeon/AMDGPUInstructions.td index 0433c8dcd95..f689356e488 100644 --- a/src/gallium/drivers/radeon/AMDGPUInstructions.td +++ b/src/gallium/drivers/radeon/AMDGPUInstructions.td @@ -1,4 +1,4 @@ -//===-- AMDGPUInstructions.td - TODO: Add brief description -------===// +//===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,8 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// This file contains instruction defs that are common to all hw codegen +// targets. // //===----------------------------------------------------------------------===// @@ -16,14 +17,12 @@ include "AMDGPUInstrEnums.td" class AMDGPUInst <dag outs, dag ins, string asm, list<dag> pattern> : Instruction { field bits<16> AMDILOp = 0; field bits<3> Gen = 0; - field bit PreloadReg = 0; let Namespace = "AMDIL"; let OutOperandList = outs; let InOperandList = ins; let AsmString = asm; let Pattern = pattern; - let TSFlags{32} = PreloadReg; let TSFlags{42-40} = Gen; let TSFlags{63-48} = AMDILOp; } @@ -37,42 +36,12 @@ class AMDGPUShaderInst <dag outs, dag ins, string asm, list<dag> pattern> let isCodeGenOnly = 1 in { - def EXPORT_REG : AMDGPUShaderInst < - (outs), - (ins GPRF32:$src), - "EXPORT_REG $src", - [(int_AMDGPU_export_reg GPRF32:$src)] - >; - - def LOAD_INPUT : AMDGPUShaderInst < - (outs GPRF32:$dst), - (ins i32imm:$src), - "LOAD_INPUT $dst, $src", - [] >{ - let PreloadReg = 1; - } - def MASK_WRITE : AMDGPUShaderInst < (outs), (ins GPRF32:$src), "MASK_WRITE $src", [] >; - - def RESERVE_REG : AMDGPUShaderInst < - (outs GPRF32:$dst), - (ins i32imm:$src), - "RESERVE_REG $dst, $src", - [(set GPRF32:$dst, (int_AMDGPU_reserve_reg imm:$src))]> { - let PreloadReg = 1; - } - - def STORE_OUTPUT: AMDGPUShaderInst < - (outs GPRF32:$dst), - (ins GPRF32:$src0, i32imm:$src1), - "STORE_OUTPUT $dst, $src0, $src1", - [(set GPRF32:$dst, (int_AMDGPU_store_output GPRF32:$src0, imm:$src1))] - >; } /* Generic helper patterns for intrinsics */ diff --git a/src/gallium/drivers/radeon/AMDGPUIntrinsics.td b/src/gallium/drivers/radeon/AMDGPUIntrinsics.td index d2cda0db936..398fd11431f 100644 --- a/src/gallium/drivers/radeon/AMDGPUIntrinsics.td +++ b/src/gallium/drivers/radeon/AMDGPUIntrinsics.td @@ -1,4 +1,4 @@ -//===-- AMDGPUIntrinsics.td - TODO: Add brief description -------===// +//===-- AMDGPUIntrinsics.td - Common intrinsics -*- tablegen -*-----------===// // // The LLVM Compiler Infrastructure // @@ -7,17 +7,16 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// This file defines intrinsics that are used by all hw codegen targets. // //===----------------------------------------------------------------------===// let TargetPrefix = "AMDGPU", isTarget = 1 in { - def int_AMDGPU_export_reg : Intrinsic<[], [llvm_float_ty], []>; def int_AMDGPU_load_const : Intrinsic<[llvm_float_ty], [llvm_i32_ty], []>; def int_AMDGPU_load_imm : Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty], []>; - def int_AMDGPU_reserve_reg : Intrinsic<[llvm_float_ty], [llvm_i32_ty], []>; - def int_AMDGPU_store_output : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_i32_ty], []>; + def int_AMDGPU_reserve_reg : Intrinsic<[], [llvm_i32_ty], []>; + def int_AMDGPU_store_output : Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>; def int_AMDGPU_swizzle : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty], []>; def int_AMDGPU_arl : Intrinsic<[llvm_i32_ty], [llvm_float_ty], []>; @@ -26,7 +25,7 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in { def int_AMDGPU_div : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], []>; def int_AMDGPU_dp4 : Intrinsic<[llvm_float_ty], [llvm_v4f32_ty, llvm_v4f32_ty], []>; def int_AMDGPU_floor : Intrinsic<[llvm_float_ty], [llvm_float_ty], []>; - def int_AMDGPU_kill : Intrinsic<[llvm_float_ty], [llvm_float_ty], []>; + def int_AMDGPU_kill : Intrinsic<[], [llvm_float_ty], []>; def int_AMDGPU_kilp : Intrinsic<[], [], []>; def int_AMDGPU_lrp : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; def int_AMDGPU_mul : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], []>; @@ -35,7 +34,7 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in { def int_AMDGPU_rsq : Intrinsic<[llvm_float_ty], [llvm_float_ty], []>; def int_AMDGPU_seq : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], []>; def int_AMDGPU_sgt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], []>; - def int_AMDGPU_sge : BinaryIntFloat; + def int_AMDGPU_sge : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], []>; def int_AMDGPU_sin : Intrinsic<[llvm_float_ty], [llvm_float_ty], []>; def int_AMDGPU_sle : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], []>; def int_AMDGPU_sne : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], []>; @@ -43,9 +42,18 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in { def int_AMDGPU_mullit : Intrinsic<[llvm_v4f32_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; def int_AMDGPU_tex : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>; def int_AMDGPU_txb : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>; + def int_AMDGPU_txf : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>; + def int_AMDGPU_txq : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>; def int_AMDGPU_txd : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>; def int_AMDGPU_txl : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>; def int_AMDGPU_trunc : Intrinsic<[llvm_float_ty], [llvm_float_ty], []>; + def int_AMDGPU_ddx : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>; + def int_AMDGPU_ddy : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>; + def int_AMDGPU_imax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; + def int_AMDGPU_imin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; + def int_AMDGPU_umax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; + def int_AMDGPU_umin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>; + def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], []>; } let TargetPrefix = "TGSI", isTarget = 1 in { diff --git a/src/gallium/drivers/radeon/AMDGPULowerInstructions.cpp b/src/gallium/drivers/radeon/AMDGPULowerInstructions.cpp index b49d0dddf65..2e455fea8ab 100644 --- a/src/gallium/drivers/radeon/AMDGPULowerInstructions.cpp +++ b/src/gallium/drivers/radeon/AMDGPULowerInstructions.cpp @@ -1,4 +1,4 @@ -//===-- AMDGPULowerInstructions.cpp - TODO: Add brief description -------===// +//===-- AMDGPULowerInstructions.cpp - AMDGPU lowering pass ----------------===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,8 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// This pass lowers unsupported AMDIL MachineInstrs to LLVM pseudo +// MachineInstrs for hw codegen targets. // //===----------------------------------------------------------------------===// @@ -27,7 +28,7 @@ namespace { private: static char ID; TargetMachine &TM; - void lowerVCREATE_v4f32(MachineInstr &MI, MachineBasicBlock::iterator I, + void lowerVCREATE_v4(MachineInstr &MI, MachineBasicBlock::iterator I, MachineBasicBlock &MBB, MachineFunction &MF); public: @@ -56,8 +57,9 @@ bool AMDGPULowerInstructionsPass::runOnMachineFunction(MachineFunction &MF) switch (MI.getOpcode()) { default: continue; - case AMDIL::VCREATE_v4f32: lowerVCREATE_v4f32(MI, I, MBB, MF); break; - + case AMDIL::VCREATE_v4f32: + case AMDIL::VCREATE_v4i32: + lowerVCREATE_v4(MI, I, MBB, MF); break; } MI.eraseFromParent(); } @@ -65,7 +67,7 @@ bool AMDGPULowerInstructionsPass::runOnMachineFunction(MachineFunction &MF) return false; } -void AMDGPULowerInstructionsPass::lowerVCREATE_v4f32(MachineInstr &MI, +void AMDGPULowerInstructionsPass::lowerVCREATE_v4(MachineInstr &MI, MachineBasicBlock::iterator I, MachineBasicBlock &MBB, MachineFunction &MF) { MachineRegisterInfo & MRI = MF.getRegInfo(); diff --git a/src/gallium/drivers/radeon/AMDGPULowerShaderInstructions.cpp b/src/gallium/drivers/radeon/AMDGPULowerShaderInstructions.cpp deleted file mode 100644 index d33055ccb87..00000000000 --- a/src/gallium/drivers/radeon/AMDGPULowerShaderInstructions.cpp +++ /dev/null @@ -1,38 +0,0 @@ -//===-- AMDGPULowerShaderInstructions.cpp - TODO: Add brief description -------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// TODO: Add full description -// -//===----------------------------------------------------------------------===// - - -#include "AMDGPULowerShaderInstructions.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetInstrInfo.h" - -using namespace llvm; - -void AMDGPULowerShaderInstructionsPass::preloadRegister(MachineFunction * MF, - const TargetInstrInfo * TII, unsigned physReg, unsigned virtReg) const -{ - if (!MRI->isLiveIn(physReg)) { - MRI->addLiveIn(physReg, virtReg); - MachineBasicBlock &EntryMBB = MF->front(); - BuildMI(MF->front(), EntryMBB.begin(), DebugLoc(), TII->get(TargetOpcode::COPY), - virtReg) - .addReg(physReg); - } else { - /* We can't mark the same register as preloaded twice, but we still must - * associate virtReg with the correct preloaded register. */ - unsigned newReg = MRI->getLiveInVirtReg(physReg); - MRI->replaceRegWith(virtReg, newReg); - } -} diff --git a/src/gallium/drivers/radeon/AMDGPULowerShaderInstructions.h b/src/gallium/drivers/radeon/AMDGPULowerShaderInstructions.h deleted file mode 100644 index 5ee77fafe2b..00000000000 --- a/src/gallium/drivers/radeon/AMDGPULowerShaderInstructions.h +++ /dev/null @@ -1,40 +0,0 @@ -//===-- AMDGPULowerShaderInstructions.h - TODO: Add brief description -------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// TODO: Add full description -// -//===----------------------------------------------------------------------===// - - -#ifndef AMDGPU_LOWER_SHADER_INSTRUCTIONS -#define AMDGPU_LOWER_SHADER_INSTRUCTIONS - -namespace llvm { - -class MachineFunction; -class MachineRegisterInfo; -class TargetInstrInfo; - -class AMDGPULowerShaderInstructionsPass { - - protected: - MachineRegisterInfo * MRI; - /** - * @param physReg The physical register that will be preloaded. - * @param virtReg The virtual register that currently holds the - * preloaded value. - */ - void preloadRegister(MachineFunction * MF, const TargetInstrInfo * TII, - unsigned physReg, unsigned virtReg) const; -}; - -} // end namespace llvm - - -#endif // AMDGPU_LOWER_SHADER_INSTRUCTIONS diff --git a/src/gallium/drivers/radeon/AMDGPURegisterInfo.cpp b/src/gallium/drivers/radeon/AMDGPURegisterInfo.cpp index 162a49116a0..ad48335fd33 100644 --- a/src/gallium/drivers/radeon/AMDGPURegisterInfo.cpp +++ b/src/gallium/drivers/radeon/AMDGPURegisterInfo.cpp @@ -1,4 +1,4 @@ -//===-- AMDGPURegisterInfo.cpp - TODO: Add brief description -------===// +//===-- AMDGPURegisterInfo.cpp - AMDGPU Register Information -------------===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// Parent TargetRegisterInfo class common to all hw codegen targets. // //===----------------------------------------------------------------------===// diff --git a/src/gallium/drivers/radeon/AMDGPURegisterInfo.h b/src/gallium/drivers/radeon/AMDGPURegisterInfo.h index f4492e9795d..d545c06f69e 100644 --- a/src/gallium/drivers/radeon/AMDGPURegisterInfo.h +++ b/src/gallium/drivers/radeon/AMDGPURegisterInfo.h @@ -1,4 +1,4 @@ -//===-- AMDGPURegisterInfo.h - TODO: Add brief description -------===// +//===-- AMDGPURegisterInfo.h - AMDGPURegisterInfo Interface -*- C++ -*-----===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,8 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// This file contains the TargetRegisterInfo interface that is implemented +// by all hw codegen targets. // //===----------------------------------------------------------------------===// diff --git a/src/gallium/drivers/radeon/AMDGPURegisterInfo.td b/src/gallium/drivers/radeon/AMDGPURegisterInfo.td index 173d6622569..1707903ae7e 100644 --- a/src/gallium/drivers/radeon/AMDGPURegisterInfo.td +++ b/src/gallium/drivers/radeon/AMDGPURegisterInfo.td @@ -1,4 +1,4 @@ -//===-- AMDGPURegisterInfo.td - TODO: Add brief description -------===// +//===-- AMDGPURegisterInfo.td - AMDGPU register info -------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// Tablegen register definitions common to all hw codegen targets. // //===----------------------------------------------------------------------===// diff --git a/src/gallium/drivers/radeon/AMDGPUReorderPreloadInstructions.cpp b/src/gallium/drivers/radeon/AMDGPUReorderPreloadInstructions.cpp deleted file mode 100644 index c923f19c39f..00000000000 --- a/src/gallium/drivers/radeon/AMDGPUReorderPreloadInstructions.cpp +++ /dev/null @@ -1,66 +0,0 @@ -//===-- AMDGPUReorderPreloadInstructions.cpp - TODO: Add brief description -------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// TODO: Add full description -// -//===----------------------------------------------------------------------===// - -#include "AMDGPU.h" -#include "AMDIL.h" -#include "AMDILInstrInfo.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Function.h" - -using namespace llvm; - -namespace { - class AMDGPUReorderPreloadInstructionsPass : public MachineFunctionPass { - - private: - static char ID; - TargetMachine &TM; - - public: - AMDGPUReorderPreloadInstructionsPass(TargetMachine &tm) : - MachineFunctionPass(ID), TM(tm) { } - - bool runOnMachineFunction(MachineFunction &MF); - - const char *getPassName() const { return "AMDGPU Reorder Preload Instructions"; } - }; -} /* End anonymous namespace */ - -char AMDGPUReorderPreloadInstructionsPass::ID = 0; - -FunctionPass *llvm::createAMDGPUReorderPreloadInstructionsPass(TargetMachine &tm) { - return new AMDGPUReorderPreloadInstructionsPass(tm); -} - -/* This pass moves instructions that represent preloaded registers to the - * start of the program. */ -bool AMDGPUReorderPreloadInstructionsPass::runOnMachineFunction(MachineFunction &MF) -{ - const AMDGPUInstrInfo * TII = - static_cast<const AMDGPUInstrInfo*>(TM.getInstrInfo()); - - for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); - BB != BB_E; ++BB) { - MachineBasicBlock &MBB = *BB; - for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I); - I != MBB.end(); I = Next, Next = llvm::next(I) ) { - MachineInstr &MI = *I; - if (TII->isRegPreload(MI)) { - MF.front().insert(MF.front().begin(), MI.removeFromParent()); - } - } - } - return false; -} diff --git a/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp b/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp index 313349ce01b..c1c21abc9c1 100644 --- a/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp +++ b/src/gallium/drivers/radeon/AMDGPUTargetMachine.cpp @@ -1,4 +1,4 @@ -//===-- AMDGPUTargetMachine.cpp - TODO: Add brief description -------===// +//===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,8 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// The AMDGPU target machine contains all of the hardware specific information +// needed to emit code for R600 and SI GPUs. // //===----------------------------------------------------------------------===// @@ -16,7 +17,6 @@ #include "AMDILTargetMachine.h" #include "R600ISelLowering.h" #include "R600InstrInfo.h" -#include "R600KernelParameters.h" #include "SIISelLowering.h" #include "SIInstrInfo.h" #include "llvm/Analysis/Passes.h" @@ -112,31 +112,28 @@ AMDGPUPassConfig::addPreISel() { const AMDILSubtarget &ST = TM->getSubtarget<AMDILSubtarget>(); if (ST.device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) { - PM.add(createR600KernelParametersPass( + PM->add(createR600KernelParametersPass( getAMDGPUTargetMachine().getTargetData())); } return false; } bool AMDGPUPassConfig::addInstSelector() { - PM.add(createAMDILPeepholeOpt(*TM)); - PM.add(createAMDILISelDag(getAMDGPUTargetMachine())); + PM->add(createAMDILPeepholeOpt(*TM)); + PM->add(createAMDILISelDag(getAMDGPUTargetMachine())); return false; } bool AMDGPUPassConfig::addPreRegAlloc() { const AMDILSubtarget &ST = TM->getSubtarget<AMDILSubtarget>(); - PM.add(createAMDGPUReorderPreloadInstructionsPass(*TM)); if (ST.device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) { - PM.add(createR600LowerShaderInstructionsPass(*TM)); - PM.add(createR600LowerInstructionsPass(*TM)); + PM->add(createR600LowerInstructionsPass(*TM)); } else { - PM.add(createSILowerShaderInstructionsPass(*TM)); - PM.add(createSIAssignInterpRegsPass(*TM)); + PM->add(createSIAssignInterpRegsPass(*TM)); } - PM.add(createAMDGPULowerInstructionsPass(*TM)); - PM.add(createAMDGPUConvertToISAPass(*TM)); + PM->add(createAMDGPULowerInstructionsPass(*TM)); + PM->add(createAMDGPUConvertToISAPass(*TM)); return false; } @@ -150,10 +147,10 @@ bool AMDGPUPassConfig::addPreSched2() { bool AMDGPUPassConfig::addPreEmitPass() { const AMDILSubtarget &ST = TM->getSubtarget<AMDILSubtarget>(); - PM.add(createAMDILCFGPreparationPass(*TM)); - PM.add(createAMDILCFGStructurizerPass(*TM)); + PM->add(createAMDILCFGPreparationPass(*TM)); + PM->add(createAMDILCFGStructurizerPass(*TM)); if (ST.device()->getGeneration() == AMDILDeviceInfo::HD7XXX) { - PM.add(createSIPropagateImmReadsPass(*TM)); + PM->add(createSIPropagateImmReadsPass(*TM)); } return false; diff --git a/src/gallium/drivers/radeon/AMDGPUTargetMachine.h b/src/gallium/drivers/radeon/AMDGPUTargetMachine.h index d4165b09e84..2428fe638a7 100644 --- a/src/gallium/drivers/radeon/AMDGPUTargetMachine.h +++ b/src/gallium/drivers/radeon/AMDGPUTargetMachine.h @@ -1,4 +1,4 @@ -//===-- AMDGPUTargetMachine.h - TODO: Add brief description -------===// +//===-- AMDGPUTargetMachine.h - AMDGPU TargetMachine Interface --*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// The AMDGPU TargetMachine interface definition for hw codgen targets. // //===----------------------------------------------------------------------===// @@ -52,9 +52,6 @@ public: formatted_raw_ostream &Out, CodeGenFileType FileType, bool DisableVerify); -public: - void dumpCode() { mDump = true; } - bool shouldDumpCode() const { return mDump; } }; } /* End namespace llvm */ diff --git a/src/gallium/drivers/radeon/AMDGPUUtil.cpp b/src/gallium/drivers/radeon/AMDGPUUtil.cpp index a5045436ab4..bd8f5eef697 100644 --- a/src/gallium/drivers/radeon/AMDGPUUtil.cpp +++ b/src/gallium/drivers/radeon/AMDGPUUtil.cpp @@ -1,4 +1,4 @@ -//===-- AMDGPUUtil.cpp - TODO: Add brief description -------===// +//===-- AMDGPUUtil.cpp - AMDGPU Utility functions -------------------------===// // // The LLVM Compiler Infrastructure // @@ -7,39 +7,39 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// Common utility functions used by hw codegen targets // //===----------------------------------------------------------------------===// #include "AMDGPUUtil.h" #include "AMDGPURegisterInfo.h" #include "AMDIL.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; -/* Some instructions act as place holders to emulate operations that the GPU - * hardware does automatically. This function can be used to check if - * an opcode falls into this category. */ -bool llvm::isPlaceHolderOpcode(unsigned opcode) +// Some instructions act as place holders to emulate operations that the GPU +// hardware does automatically. This function can be used to check if +// an opcode falls into this category. +bool AMDGPU::isPlaceHolderOpcode(unsigned opcode) { switch (opcode) { default: return false; - case AMDIL::EXPORT_REG: case AMDIL::RETURN: case AMDIL::LOAD_INPUT: case AMDIL::LAST: + case AMDIL::MASK_WRITE: case AMDIL::RESERVE_REG: return true; } } -bool llvm::isTransOp(unsigned opcode) +bool AMDGPU::isTransOp(unsigned opcode) { switch(opcode) { default: return false; @@ -67,10 +67,12 @@ bool llvm::isTransOp(unsigned opcode) } } -bool llvm::isTexOp(unsigned opcode) +bool AMDGPU::isTexOp(unsigned opcode) { switch(opcode) { default: return false; + case AMDIL::TEX_LD: + case AMDIL::TEX_GET_TEXTURE_RESINFO: case AMDIL::TEX_SAMPLE: case AMDIL::TEX_SAMPLE_C: case AMDIL::TEX_SAMPLE_L: @@ -79,11 +81,13 @@ bool llvm::isTexOp(unsigned opcode) case AMDIL::TEX_SAMPLE_C_LB: case AMDIL::TEX_SAMPLE_G: case AMDIL::TEX_SAMPLE_C_G: + case AMDIL::TEX_GET_GRADIENTS_H: + case AMDIL::TEX_GET_GRADIENTS_V: return true; } } -bool llvm::isReductionOp(unsigned opcode) +bool AMDGPU::isReductionOp(unsigned opcode) { switch(opcode) { default: return false; @@ -93,13 +97,25 @@ bool llvm::isReductionOp(unsigned opcode) } } -bool llvm::isFCOp(unsigned opcode) +bool AMDGPU::isCubeOp(unsigned opcode) +{ + switch(opcode) { + default: return false; + case AMDIL::CUBE_r600: + case AMDIL::CUBE_eg: + return true; + } +} + + +bool AMDGPU::isFCOp(unsigned opcode) { switch(opcode) { default: return false; case AMDIL::BREAK_LOGICALZ_f32: case AMDIL::BREAK_LOGICALNZ_i32: case AMDIL::BREAK_LOGICALZ_i32: + case AMDIL::BREAK_LOGICALNZ_f32: case AMDIL::CONTINUE_LOGICALNZ_f32: case AMDIL::IF_LOGICALNZ_i32: case AMDIL::IF_LOGICALZ_f32: @@ -112,11 +128,14 @@ bool llvm::isFCOp(unsigned opcode) } } -void AMDGPU::utilAddLiveIn(MachineFunction * MF, MachineRegisterInfo & MRI, - const struct TargetInstrInfo * TII, unsigned physReg, unsigned virtReg) +void AMDGPU::utilAddLiveIn(llvm::MachineFunction * MF, + llvm::MachineRegisterInfo & MRI, + const struct llvm::TargetInstrInfo * TII, + unsigned physReg, unsigned virtReg) { if (!MRI.isLiveIn(physReg)) { MRI.addLiveIn(physReg, virtReg); + MF->front().addLiveIn(physReg); BuildMI(MF->front(), MF->front().begin(), DebugLoc(), TII->get(TargetOpcode::COPY), virtReg) .addReg(physReg); diff --git a/src/gallium/drivers/radeon/AMDGPUUtil.h b/src/gallium/drivers/radeon/AMDGPUUtil.h index 299146e1ba7..15f2ce57af9 100644 --- a/src/gallium/drivers/radeon/AMDGPUUtil.h +++ b/src/gallium/drivers/radeon/AMDGPUUtil.h @@ -1,4 +1,4 @@ -//===-- AMDGPUUtil.h - TODO: Add brief description -------===// +//===-- AMDGPUUtil.h - AMDGPU Utility function declarations -----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,43 +7,40 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// Declarations for utility functions common to all hw codegen targets. // //===----------------------------------------------------------------------===// #ifndef AMDGPU_UTIL_H #define AMDGPU_UTIL_H -#include "AMDGPURegisterInfo.h" -#include "llvm/Support/DataTypes.h" - namespace llvm { -class AMDILMachineFunctionInfo; +class MachineFunction; +class MachineRegisterInfo; +class TargetInstrInfo; + +} -class TargetMachine; -class TargetRegisterInfo; +namespace AMDGPU { bool isPlaceHolderOpcode(unsigned opcode); bool isTransOp(unsigned opcode); bool isTexOp(unsigned opcode); bool isReductionOp(unsigned opcode); +bool isCubeOp(unsigned opcode); bool isFCOp(unsigned opcode); -/* XXX: Move these to AMDGPUInstrInfo.h */ +// XXX: Move these to AMDGPUInstrInfo.h #define MO_FLAG_CLAMP (1 << 0) #define MO_FLAG_NEG (1 << 1) #define MO_FLAG_ABS (1 << 2) #define MO_FLAG_MASK (1 << 3) -} /* End namespace llvm */ - -namespace AMDGPU { - void utilAddLiveIn(llvm::MachineFunction * MF, llvm::MachineRegisterInfo & MRI, const struct llvm::TargetInstrInfo * TII, unsigned physReg, unsigned virtReg); } // End namespace AMDGPU -#endif /* AMDGPU_UTIL_H */ +#endif // AMDGPU_UTIL_H diff --git a/src/gallium/drivers/radeon/AMDIL.h b/src/gallium/drivers/radeon/AMDIL.h index 317ea124f66..6759ccd9527 100644 --- a/src/gallium/drivers/radeon/AMDIL.h +++ b/src/gallium/drivers/radeon/AMDIL.h @@ -137,11 +137,6 @@ enum AddressSpaces { LAST_ADDRESS = 8 }; -// We are piggybacking on the CommentFlag enum in MachineInstr.h to -// set bits in AsmPrinterFlags of the MachineInstruction. We will -// start at bit 16 and allocate down while LLVM will start at bit -// 1 and allocate up. - // This union/struct combination is an easy way to read out the // exact bits that are needed. typedef union ResourceRec { @@ -181,26 +176,6 @@ typedef union ResourceRec { } // namespace AMDILAS -// The OpSwizzle encodes a subset of all possible -// swizzle combinations into a number of bits using -// only the combinations utilized by the backend. -// The lower 128 are for source swizzles and the -// upper 128 or for destination swizzles. -// The valid mappings can be found in the -// getSrcSwizzle and getDstSwizzle functions of -// AMDILUtilityFunctions.cpp. -typedef union SwizzleRec { - struct { -#ifdef __BIG_ENDIAN__ - unsigned char dst : 1; - unsigned char swizzle : 7; -#else - unsigned char swizzle : 7; - unsigned char dst : 1; -#endif - } bits; - unsigned char u8all; -} OpSwizzle; // Enums corresponding to AMDIL condition codes for IL. These // values must be kept in sync with the ones in the .td file. namespace AMDILCC { diff --git a/src/gallium/drivers/radeon/AMDIL.td b/src/gallium/drivers/radeon/AMDIL.td index 9bcccac2411..deee290fad5 100644 --- a/src/gallium/drivers/radeon/AMDIL.td +++ b/src/gallium/drivers/radeon/AMDIL.td @@ -1,4 +1,4 @@ -//===-- AMDIL.td - TODO: Add brief description -------===// +//===-- AMDIL.td - AMDIL Tablegen files --*- tablegen -*-------------------===// // // The LLVM Compiler Infrastructure // diff --git a/src/gallium/drivers/radeon/AMDIL7XXDevice.cpp b/src/gallium/drivers/radeon/AMDIL7XXDevice.cpp index 6625dd77d5f..d7c96573a15 100644 --- a/src/gallium/drivers/radeon/AMDIL7XXDevice.cpp +++ b/src/gallium/drivers/radeon/AMDIL7XXDevice.cpp @@ -1,4 +1,4 @@ -//===-- AMDIL7XXDevice.cpp - TODO: Add brief description -------===// +//===-- AMDIL7XXDevice.cpp - Device Info for 7XX GPUs ---------------------===// // // The LLVM Compiler Infrastructure // diff --git a/src/gallium/drivers/radeon/AMDILBase.td b/src/gallium/drivers/radeon/AMDILBase.td index 2706b211f2d..31ebed31d72 100644 --- a/src/gallium/drivers/radeon/AMDILBase.td +++ b/src/gallium/drivers/radeon/AMDILBase.td @@ -60,6 +60,11 @@ def FeatureDebug : SubtargetFeature<"debug", "CapsOverride[AMDILDeviceInfo::Debug]", "true", "Debug mode is enabled, so disable hardware accelerated address spaces.">; +def FeatureDumpCode : SubtargetFeature <"DumpCode", + "mDumpCode", + "true", + "Dump MachineInstrs in the CodeEmitter">; + //===----------------------------------------------------------------------===// // Register File, Calling Conv, Instruction Descriptions diff --git a/src/gallium/drivers/radeon/AMDILCFGStructurizer.cpp b/src/gallium/drivers/radeon/AMDILCFGStructurizer.cpp index 289af6f210e..cdcd5e89880 100644 --- a/src/gallium/drivers/radeon/AMDILCFGStructurizer.cpp +++ b/src/gallium/drivers/radeon/AMDILCFGStructurizer.cpp @@ -7,22 +7,22 @@ // //==-----------------------------------------------------------------------===// -#define DEBUG_TYPE "structcfg" -#ifdef DEBUG -#define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) -#else #define DEBUGME 0 -#endif +#define DEBUG_TYPE "structcfg" #include "AMDILTargetMachine.h" #include "AMDILUtilityFunctions.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/DominatorInternals.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -30,8 +30,6 @@ #define FirstNonDebugInstr(A) A->begin() using namespace llvm; -// bixia TODO: move this out to analysis lib. Make this work for both target -// AMDIL and CBackend. // TODO: move-begin. //===----------------------------------------------------------------------===// @@ -109,23 +107,6 @@ void ReverseVector(SmallVector<NodeT *, DEFAULT_VEC_SLOTS> &Src) { // //===----------------------------------------------------------------------===// -#include "AMDILTargetMachine.h" -#include "AMDILUtilityFunctions.h" -#include "llvm/ADT/SCCIterator.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/DominatorInternals.h" -#include "llvm/Analysis/Dominators.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFunctionAnalysis.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineJumpTableInfo.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" - namespace llvm { /// PostDominatorTree Class - Concrete subclass of DominatorTree that is used @@ -3156,10 +3137,6 @@ struct CFGStructTraits<AMDILCFGStructurizer> iterEnd = srcBlk->end(); iter != iterEnd; ++iter) { MachineInstr *instr = func->CloneMachineInstr(iter); - // This is a workaround for LLVM bugzilla 8420 because CloneMachineInstr - // does not clone the AsmPrinterFlags. - instr->setAsmPrinterFlag( - (llvm::MachineInstr::CommentFlag)iter->getAsmPrinterFlags()); newBlk->push_back(instr); } return newBlk; diff --git a/src/gallium/drivers/radeon/AMDILCodeEmitter.h b/src/gallium/drivers/radeon/AMDILCodeEmitter.h index b0ea1455cf9..fa46cbd203d 100644 --- a/src/gallium/drivers/radeon/AMDILCodeEmitter.h +++ b/src/gallium/drivers/radeon/AMDILCodeEmitter.h @@ -1,23 +1,21 @@ -// The LLVM Compiler Infrastructure +//===-- AMDILCodeEmitter.h - AMDIL Code Emitter interface -----------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -//===-- AMDILCodeEmitter.h - TODO: Add brief description -------===// -//===-- AMDILCodeEmitter.h - TODO: Add brief description -------===// -//===-- AMDILCodeEmitter.h - TODO: Add brief description -------===// +//===----------------------------------------------------------------------===// // -// The LLVM Compiler Infrastructure +// CodeEmitter interface for R600 and SI codegen. // +//===----------------------------------------------------------------------===// #ifndef AMDILCODEEMITTER_H #define AMDILCODEEMITTER_H namespace llvm { - /* XXX: Temp HACK to work around tablegen name generation */ class AMDILCodeEmitter { public: uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const; diff --git a/src/gallium/drivers/radeon/AMDILConversions.td b/src/gallium/drivers/radeon/AMDILConversions.td index 0db66ae8475..1bc5e4ddf37 100644 --- a/src/gallium/drivers/radeon/AMDILConversions.td +++ b/src/gallium/drivers/radeon/AMDILConversions.td @@ -1,4 +1,4 @@ -//===-- AMDILConversions.td - TODO: Add brief description -------===// +//==- AMDILConversions.td - Type conversion tablegen patterns -*-tablegen -*-=// // // The LLVM Compiler Infrastructure // diff --git a/src/gallium/drivers/radeon/AMDILDevice.cpp b/src/gallium/drivers/radeon/AMDILDevice.cpp index aa6d8af7012..4294a8bef0c 100644 --- a/src/gallium/drivers/radeon/AMDILDevice.cpp +++ b/src/gallium/drivers/radeon/AMDILDevice.cpp @@ -1,4 +1,4 @@ -//===-- AMDILDevice.cpp - TODO: Add brief description -------===// +//===-- AMDILDevice.cpp - Base class for AMDIL Devices --------------------===// // // The LLVM Compiler Infrastructure // diff --git a/src/gallium/drivers/radeon/AMDILDeviceInfo.cpp b/src/gallium/drivers/radeon/AMDILDeviceInfo.cpp index 89b8312c294..cbf5b512471 100644 --- a/src/gallium/drivers/radeon/AMDILDeviceInfo.cpp +++ b/src/gallium/drivers/radeon/AMDILDeviceInfo.cpp @@ -1,4 +1,4 @@ -//===-- AMDILDeviceInfo.cpp - TODO: Add brief description -------===// +//===-- AMDILDeviceInfo.cpp - AMDILDeviceInfo class -----------------------===// // // The LLVM Compiler Infrastructure // @@ -6,11 +6,16 @@ // License. See LICENSE.TXT for details. // //==-----------------------------------------------------------------------===// +// +// Function that creates DeviceInfo from a device name and other information. +// +//==-----------------------------------------------------------------------===// #include "AMDILDevices.h" #include "AMDILSubtarget.h" using namespace llvm; namespace llvm { +namespace AMDILDeviceInfo { AMDILDevice* getDeviceFromName(const std::string &deviceName, AMDILSubtarget *ptr, bool is64bit, bool is64on32bit) { @@ -84,4 +89,5 @@ getDeviceFromName(const std::string &deviceName, AMDILSubtarget *ptr, bool is64b return new AMDIL7XXDevice(ptr); } } -} +} // End namespace AMDILDeviceInfo +} // End namespace llvm diff --git a/src/gallium/drivers/radeon/AMDILDeviceInfo.h b/src/gallium/drivers/radeon/AMDILDeviceInfo.h index c4acf9145ae..06ac4322d0f 100644 --- a/src/gallium/drivers/radeon/AMDILDeviceInfo.h +++ b/src/gallium/drivers/radeon/AMDILDeviceInfo.h @@ -1,4 +1,4 @@ -//===-- AMDILDeviceInfo.h - TODO: Add brief description -------===// +//===-- AMDILDeviceInfo.h - Constants for describing devices --------------===// // // The LLVM Compiler Infrastructure // @@ -82,8 +82,8 @@ namespace llvm }; + AMDILDevice* + getDeviceFromName(const std::string &name, AMDILSubtarget *ptr, bool is64bit = false, bool is64on32bit = false); } // namespace AMDILDeviceInfo - llvm::AMDILDevice* - getDeviceFromName(const std::string &name, llvm::AMDILSubtarget *ptr, bool is64bit = false, bool is64on32bit = false); } // namespace llvm #endif // _AMDILDEVICEINFO_H_ diff --git a/src/gallium/drivers/radeon/AMDILDevices.h b/src/gallium/drivers/radeon/AMDILDevices.h index 3fc5fa05669..cfcc3304b4b 100644 --- a/src/gallium/drivers/radeon/AMDILDevices.h +++ b/src/gallium/drivers/radeon/AMDILDevices.h @@ -1,4 +1,4 @@ -//===-- AMDILDevices.h - TODO: Add brief description -------===// +//===-- AMDILDevices.h - Consolidate AMDIL Device headers -----------------===// // // The LLVM Compiler Infrastructure // diff --git a/src/gallium/drivers/radeon/AMDILEnumeratedTypes.td b/src/gallium/drivers/radeon/AMDILEnumeratedTypes.td index 445fd608bbb..f10936b8c6c 100644 --- a/src/gallium/drivers/radeon/AMDILEnumeratedTypes.td +++ b/src/gallium/drivers/radeon/AMDILEnumeratedTypes.td @@ -1,4 +1,4 @@ -//===-- AMDILEnumeratedTypes.td - TODO: Add brief description -------===// +//===-- AMDILEnumeratedTypes.td - IL Type definitions --*- tablegen -*-----===// // // The LLVM Compiler Infrastructure // diff --git a/src/gallium/drivers/radeon/AMDILEvergreenDevice.cpp b/src/gallium/drivers/radeon/AMDILEvergreenDevice.cpp index 7b5c52345d2..779b2d3df2f 100644 --- a/src/gallium/drivers/radeon/AMDILEvergreenDevice.cpp +++ b/src/gallium/drivers/radeon/AMDILEvergreenDevice.cpp @@ -1,4 +1,4 @@ -//===-- AMDILEvergreenDevice.cpp - TODO: Add brief description -------===// +//===-- AMDILEvergreenDevice.cpp - Device Info for Evergreen --------------===// // // The LLVM Compiler Infrastructure // diff --git a/src/gallium/drivers/radeon/AMDILISelDAGToDAG.cpp b/src/gallium/drivers/radeon/AMDILISelDAGToDAG.cpp index ff04d9d55bf..b8898828dd6 100644 --- a/src/gallium/drivers/radeon/AMDILISelDAGToDAG.cpp +++ b/src/gallium/drivers/radeon/AMDILISelDAGToDAG.cpp @@ -13,9 +13,12 @@ #include "AMDILDevices.h" #include "AMDILTargetMachine.h" #include "AMDILUtilityFunctions.h" +#include "llvm/ADT/ValueMap.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Support/Compiler.h" +#include <list> +#include <queue> using namespace llvm; @@ -35,13 +38,21 @@ class AMDILDAGToDAGISel : public SelectionDAGISel { public: AMDILDAGToDAGISel(AMDILTargetMachine &TM AMDIL_OPT_LEVEL_DECL); virtual ~AMDILDAGToDAGISel(); - inline SDValue getSmallIPtrImm(unsigned Imm); SDNode *Select(SDNode *N); + virtual const char *getPassName() const; + +private: + inline SDValue getSmallIPtrImm(unsigned Imm); + // Complex pattern selectors bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2); bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2); bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2); + + static bool checkType(const Value *ptr, unsigned int addrspace); + static const Value *getBasePointerValue(const Value *V); + static bool isGlobalStore(const StoreSDNode *N); static bool isPrivateStore(const StoreSDNode *N); static bool isLocalStore(const StoreSDNode *N); @@ -54,8 +65,6 @@ public: static bool isLocalLoad(const LoadSDNode *N); static bool isRegionLoad(const LoadSDNode *N); - virtual const char *getPassName() const; -private: SDNode *xformAtomicInst(SDNode *N); // Include the pieces autogenerated from the target description. @@ -165,26 +174,75 @@ SDNode *AMDILDAGToDAGISel::Select(SDNode *N) { return SelectCode(N); } +bool AMDILDAGToDAGISel::checkType(const Value *ptr, unsigned int addrspace) { + if (!ptr) { + return false; + } + Type *ptrType = ptr->getType(); + return dyn_cast<PointerType>(ptrType)->getAddressSpace() == addrspace; +} + +const Value * AMDILDAGToDAGISel::getBasePointerValue(const Value *V) +{ + if (!V) { + return NULL; + } + const Value *ret = NULL; + ValueMap<const Value *, bool> ValueBitMap; + std::queue<const Value *, std::list<const Value *> > ValueQueue; + ValueQueue.push(V); + while (!ValueQueue.empty()) { + V = ValueQueue.front(); + if (ValueBitMap.find(V) == ValueBitMap.end()) { + ValueBitMap[V] = true; + if (dyn_cast<Argument>(V) && dyn_cast<PointerType>(V->getType())) { + ret = V; + break; + } else if (dyn_cast<GlobalVariable>(V)) { + ret = V; + break; + } else if (dyn_cast<Constant>(V)) { + const ConstantExpr *CE = dyn_cast<ConstantExpr>(V); + if (CE) { + ValueQueue.push(CE->getOperand(0)); + } + } else if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) { + ret = AI; + break; + } else if (const Instruction *I = dyn_cast<Instruction>(V)) { + uint32_t numOps = I->getNumOperands(); + for (uint32_t x = 0; x < numOps; ++x) { + ValueQueue.push(I->getOperand(x)); + } + } else { + // assert(0 && "Found a Value that we didn't know how to handle!"); + } + } + ValueQueue.pop(); + } + return ret; +} + bool AMDILDAGToDAGISel::isGlobalStore(const StoreSDNode *N) { - return check_type(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS); + return checkType(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS); } bool AMDILDAGToDAGISel::isPrivateStore(const StoreSDNode *N) { - return (!check_type(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS) - && !check_type(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS) - && !check_type(N->getSrcValue(), AMDILAS::REGION_ADDRESS)); + return (!checkType(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS) + && !checkType(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS) + && !checkType(N->getSrcValue(), AMDILAS::REGION_ADDRESS)); } bool AMDILDAGToDAGISel::isLocalStore(const StoreSDNode *N) { - return check_type(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS); + return checkType(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS); } bool AMDILDAGToDAGISel::isRegionStore(const StoreSDNode *N) { - return check_type(N->getSrcValue(), AMDILAS::REGION_ADDRESS); + return checkType(N->getSrcValue(), AMDILAS::REGION_ADDRESS); } bool AMDILDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int cbID) { - if (check_type(N->getSrcValue(), AMDILAS::CONSTANT_ADDRESS)) { + if (checkType(N->getSrcValue(), AMDILAS::CONSTANT_ADDRESS)) { return true; } MachineMemOperand *MMO = N->getMemOperand(); @@ -195,27 +253,27 @@ bool AMDILDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int cbID) { && ((V && dyn_cast<GlobalValue>(V)) || (BV && dyn_cast<GlobalValue>( getBasePointerValue(MMO->getValue()))))) { - return check_type(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS); + return checkType(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS); } else { return false; } } bool AMDILDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) { - return check_type(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS); + return checkType(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS); } bool AMDILDAGToDAGISel::isLocalLoad(const LoadSDNode *N) { - return check_type(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS); + return checkType(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS); } bool AMDILDAGToDAGISel::isRegionLoad(const LoadSDNode *N) { - return check_type(N->getSrcValue(), AMDILAS::REGION_ADDRESS); + return checkType(N->getSrcValue(), AMDILAS::REGION_ADDRESS); } bool AMDILDAGToDAGISel::isCPLoad(const LoadSDNode *N) { MachineMemOperand *MMO = N->getMemOperand(); - if (check_type(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS)) { + if (checkType(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS)) { if (MMO) { const Value *V = MMO->getValue(); const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V); @@ -228,19 +286,19 @@ bool AMDILDAGToDAGISel::isCPLoad(const LoadSDNode *N) { } bool AMDILDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) { - if (check_type(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS)) { + if (checkType(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS)) { // Check to make sure we are not a constant pool load or a constant load // that is marked as a private load if (isCPLoad(N) || isConstantLoad(N, -1)) { return false; } } - if (!check_type(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS) - && !check_type(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS) - && !check_type(N->getSrcValue(), AMDILAS::REGION_ADDRESS) - && !check_type(N->getSrcValue(), AMDILAS::CONSTANT_ADDRESS) - && !check_type(N->getSrcValue(), AMDILAS::PARAM_D_ADDRESS) - && !check_type(N->getSrcValue(), AMDILAS::PARAM_I_ADDRESS)) + if (!checkType(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS) + && !checkType(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS) + && !checkType(N->getSrcValue(), AMDILAS::REGION_ADDRESS) + && !checkType(N->getSrcValue(), AMDILAS::CONSTANT_ADDRESS) + && !checkType(N->getSrcValue(), AMDILAS::PARAM_D_ADDRESS) + && !checkType(N->getSrcValue(), AMDILAS::PARAM_I_ADDRESS)) { return true; } diff --git a/src/gallium/drivers/radeon/AMDILISelLowering.cpp b/src/gallium/drivers/radeon/AMDILISelLowering.cpp index 54c6ea65065..19b12fcf72b 100644 --- a/src/gallium/drivers/radeon/AMDILISelLowering.cpp +++ b/src/gallium/drivers/radeon/AMDILISelLowering.cpp @@ -623,6 +623,48 @@ translateToOpcode(uint64_t CCCode, unsigned int regClass) assert(0 && "Unknown opcode retrieved"); return 0; } + +/// Helper function used by LowerFormalArguments +static const TargetRegisterClass* +getRegClassFromType(unsigned int type) { + switch (type) { + default: + assert(0 && "Passed in type does not match any register classes."); + case MVT::i8: + return &AMDIL::GPRI8RegClass; + case MVT::i16: + return &AMDIL::GPRI16RegClass; + case MVT::i32: + return &AMDIL::GPRI32RegClass; + case MVT::f32: + return &AMDIL::GPRF32RegClass; + case MVT::i64: + return &AMDIL::GPRI64RegClass; + case MVT::f64: + return &AMDIL::GPRF64RegClass; + case MVT::v4f32: + return &AMDIL::GPRV4F32RegClass; + case MVT::v4i8: + return &AMDIL::GPRV4I8RegClass; + case MVT::v4i16: + return &AMDIL::GPRV4I16RegClass; + case MVT::v4i32: + return &AMDIL::GPRV4I32RegClass; + case MVT::v2f32: + return &AMDIL::GPRV2F32RegClass; + case MVT::v2i8: + return &AMDIL::GPRV2I8RegClass; + case MVT::v2i16: + return &AMDIL::GPRV2I16RegClass; + case MVT::v2i32: + return &AMDIL::GPRV2I32RegClass; + case MVT::v2f64: + return &AMDIL::GPRV2F64RegClass; + case MVT::v2i64: + return &AMDIL::GPRV2I64RegClass; + } +} + SDValue AMDILTargetLowering::LowerMemArgument( SDValue Chain, @@ -2189,6 +2231,7 @@ AMDILTargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const SDValue Result = DAG.getTargetExternalSymbol(Sym, MVT::i32); return Result; } + /// LowerFORMAL_ARGUMENTS - transform physical registers into /// virtual registers and generate load operations for /// arguments places on the stack. @@ -3191,7 +3234,7 @@ AMDILTargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const amdtm = reinterpret_cast<const AMDILTargetMachine*> (&this->getTargetMachine()); const AMDILSubtarget* - stm = dynamic_cast<const AMDILSubtarget*>( + stm = static_cast<const AMDILSubtarget*>( amdtm->getSubtargetImpl()); if (RST == MVT::f64 && RHSVT.isVector() && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { @@ -3248,7 +3291,7 @@ AMDILTargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const amdtm = reinterpret_cast<const AMDILTargetMachine*> (&this->getTargetMachine()); const AMDILSubtarget* - stm = dynamic_cast<const AMDILSubtarget*>( + stm = static_cast<const AMDILSubtarget*>( amdtm->getSubtargetImpl()); if (RST == MVT::f64 && RHSVT.isVector() && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { @@ -3314,7 +3357,7 @@ AMDILTargetLowering::genu32tof64(SDValue RHS, EVT LHSVT, amdtm = reinterpret_cast<const AMDILTargetMachine*> (&this->getTargetMachine()); const AMDILSubtarget* - stm = dynamic_cast<const AMDILSubtarget*>( + stm = static_cast<const AMDILSubtarget*>( amdtm->getSubtargetImpl()); if (stm->calVersion() >= CAL_VERSION_SC_135) { // unsigned x = RHS; @@ -3489,7 +3532,7 @@ AMDILTargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const amdtm = reinterpret_cast<const AMDILTargetMachine*> (&this->getTargetMachine()); const AMDILSubtarget* - stm = dynamic_cast<const AMDILSubtarget*>( + stm = static_cast<const AMDILSubtarget*>( amdtm->getSubtargetImpl()); if (LST == MVT::f64 && LHSVT.isVector() && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { @@ -3543,7 +3586,7 @@ AMDILTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const amdtm = reinterpret_cast<const AMDILTargetMachine*> (&this->getTargetMachine()); const AMDILSubtarget* - stm = dynamic_cast<const AMDILSubtarget*>( + stm = static_cast<const AMDILSubtarget*>( amdtm->getSubtargetImpl()); if (LST == MVT::f64 && LHSVT.isVector() && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { @@ -3843,7 +3886,6 @@ SDValue AMDILTargetLowering::LowerBUILD_VECTOR( SDValue Op, SelectionDAG &DAG ) const { EVT VT = Op.getValueType(); - //printSDValue(Op, 1); SDValue Nodes1; SDValue second; SDValue third; @@ -3965,7 +4007,6 @@ AMDILTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); - //printSDValue(Op, 1); const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(1)); uint64_t swizzleNum = 0; DebugLoc DL = Op.getDebugLoc(); @@ -4782,7 +4823,7 @@ uint32_t AMDILTargetLowering::genVReg(uint32_t regType) const { return mBB->getParent()->getRegInfo().createVirtualRegister( - getRegClassFromID(regType)); + getTargetMachine().getRegisterInfo()->getRegClass(regType)); } MachineInstrBuilder diff --git a/src/gallium/drivers/radeon/AMDILInstrInfo.cpp b/src/gallium/drivers/radeon/AMDILInstrInfo.cpp index fbc3e45b357..cd2fb48209c 100644 --- a/src/gallium/drivers/radeon/AMDILInstrInfo.cpp +++ b/src/gallium/drivers/radeon/AMDILInstrInfo.cpp @@ -10,13 +10,10 @@ // This file contains the AMDIL implementation of the TargetInstrInfo class. // //===----------------------------------------------------------------------===// -#include "AMDILInstrInfo.h" -#include "AMDILUtilityFunctions.h" - -#define GET_INSTRINFO_CTOR -#include "AMDILGenInstrInfo.inc" #include "AMDILInstrInfo.h" +#include "AMDIL.h" +#include "AMDILISelLowering.h" #include "AMDILUtilityFunctions.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -24,6 +21,9 @@ #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Instructions.h" +#define GET_INSTRINFO_CTOR +#include "AMDILGenInstrInfo.inc" + using namespace llvm; AMDILInstrInfo::AMDILInstrInfo(AMDILTargetMachine &tm) @@ -36,28 +36,6 @@ const AMDILRegisterInfo &AMDILInstrInfo::getRegisterInfo() const { return RI; } -/// Return true if the instruction is a register to register move and leave the -/// source and dest operands in the passed parameters. -bool AMDILInstrInfo::isMoveInstr(const MachineInstr &MI, unsigned int &SrcReg, - unsigned int &DstReg, unsigned int &SrcSubIdx, - unsigned int &DstSubIdx) const { - // FIXME: we should look for: - // add with 0 - //assert(0 && "is Move Instruction has not been implemented yet!"); - //return true; - if (!isMove(MI.getOpcode())) { - return false; - } - if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg()) { - return false; - } - SrcReg = MI.getOperand(1).getReg(); - DstReg = MI.getOperand(0).getReg(); - DstSubIdx = 0; - SrcSubIdx = 0; - return true; -} - bool AMDILInstrInfo::isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg, unsigned &SubIdx) const { @@ -99,22 +77,7 @@ bool AMDILInstrInfo::hasStoreFromStackSlot(const MachineInstr *MI, // TODO: Implement this function return false; } -#if 0 -void -AMDILInstrInfo::reMaterialize(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned DestReg, unsigned SubIdx, - const MachineInstr *Orig, - const TargetRegisterInfo *TRI) const { -// TODO: Implement this function -} -MachineInst AMDILInstrInfo::duplicate(MachineInstr *Orig, - MachineFunction &MF) const { -// TODO: Implement this function - return NULL; -} -#endif MachineInstr * AMDILInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, MachineBasicBlock::iterator &MBBI, @@ -122,25 +85,6 @@ AMDILInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, // TODO: Implement this function return NULL; } -#if 0 -MachineInst AMDILInstrInfo::commuteInstruction(MachineInstr *MI, - bool NewMI = false) const { -// TODO: Implement this function - return NULL; -} -bool -AMDILInstrInfo::findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1, - unsigned &SrcOpIdx2) const -{ -// TODO: Implement this function -} -bool -AMDILInstrInfo::produceSameValue(const MachineInstr *MI0, - const MachineInstr *MI1) const -{ -// TODO: Implement this function -} -#endif bool AMDILInstrInfo::getNextBranchInstr(MachineBasicBlock::iterator &iter, MachineBasicBlock &MBB) const { while (iter != MBB.end()) { @@ -299,43 +243,6 @@ MachineBasicBlock::iterator skipFlowControl(MachineBasicBlock *MBB) { return MBB->end(); } -bool -AMDILInstrInfo::copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const { - // If we are adding to the end of a basic block we can safely assume that the - // move is caused by a PHI node since all move instructions that are non-PHI - // have already been inserted into the basic blocks Therefor we call the skip - // flow control instruction to move the iterator before the flow control - // instructions and put the move instruction there. - bool phi = (DestReg < 1025) || (SrcReg < 1025); - int movInst = phi ? getMoveInstFromID(DestRC->getID()) - : getPHIMoveInstFromID(DestRC->getID()); - - MachineBasicBlock::iterator iTemp = (I == MBB.end()) ? skipFlowControl(&MBB) - : I; - if (DestRC != SrcRC) { - //int convInst; - size_t dSize = DestRC->getSize(); - size_t sSize = SrcRC->getSize(); - if (dSize > sSize) { - // Elements are going to get duplicated. - BuildMI(MBB, iTemp, DL, get(movInst), DestReg).addReg(SrcReg); - } else if (dSize == sSize) { - // Direct copy, conversions are not handled. - BuildMI(MBB, iTemp, DL, get(movInst), DestReg).addReg(SrcReg); - } else if (dSize < sSize) { - // Elements are going to get dropped. - BuildMI(MBB, iTemp, DL, get(movInst), DestReg).addReg(SrcReg); - } - } else { - BuildMI( MBB, iTemp, DL, get(movInst), DestReg).addReg(SrcReg); - } - return true; -} void AMDILInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, DebugLoc DL, @@ -427,15 +334,11 @@ AMDILInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, if (MI != MBB.end()) { DL = MI->getDebugLoc(); } - MachineInstr *nMI = BuildMI(MBB, MI, DL, get(Opc)) + BuildMI(MBB, MI, DL, get(Opc)) .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FrameIndex) .addMemOperand(MMO) .addImm(0); - AMDILAS::InstrResEnc curRes; - curRes.bits.ResourceID - = TM.getSubtargetImpl()->device()->getResourceID(AMDILDevice::SCRATCH_ID); - setAsmPrinterFlags(nMI, curRes); } void @@ -511,16 +414,11 @@ AMDILInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, if (MI != MBB.end()) { DL = MI->getDebugLoc(); } - MachineInstr* nMI = BuildMI(MBB, MI, DL, get(Opc)) + BuildMI(MBB, MI, DL, get(Opc)) .addReg(DestReg, RegState::Define) .addFrameIndex(FrameIndex) .addMemOperand(MMO) .addImm(0); - AMDILAS::InstrResEnc curRes; - curRes.bits.ResourceID - = TM.getSubtargetImpl()->device()->getResourceID(AMDILDevice::SCRATCH_ID); - setAsmPrinterFlags(nMI, curRes); - } MachineInstr * AMDILInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, @@ -569,65 +467,6 @@ AMDILInstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc, return 0; } -bool -AMDILInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, - int64_t &Offset1, - int64_t &Offset2) const { - return false; - if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode()) { - return false; - } - const MachineSDNode *mload1 = dyn_cast<MachineSDNode>(Load1); - const MachineSDNode *mload2 = dyn_cast<MachineSDNode>(Load2); - if (!mload1 || !mload2) { - return false; - } - if (mload1->memoperands_empty() || - mload2->memoperands_empty()) { - return false; - } - MachineMemOperand *memOp1 = (*mload1->memoperands_begin()); - MachineMemOperand *memOp2 = (*mload2->memoperands_begin()); - const Value *mv1 = memOp1->getValue(); - const Value *mv2 = memOp2->getValue(); - if (!memOp1->isLoad() || !memOp2->isLoad()) { - return false; - } - if (getBasePointerValue(mv1) == getBasePointerValue(mv2)) { - if (isa<GetElementPtrInst>(mv1) && isa<GetElementPtrInst>(mv2)) { - const GetElementPtrInst *gep1 = dyn_cast<GetElementPtrInst>(mv1); - const GetElementPtrInst *gep2 = dyn_cast<GetElementPtrInst>(mv2); - if (!gep1 || !gep2) { - return false; - } - if (gep1->getNumOperands() != gep2->getNumOperands()) { - return false; - } - for (unsigned i = 0, e = gep1->getNumOperands() - 1; i < e; ++i) { - const Value *op1 = gep1->getOperand(i); - const Value *op2 = gep2->getOperand(i); - if (op1 != op2) { - // If any value except the last one is different, return false. - return false; - } - } - unsigned size = gep1->getNumOperands()-1; - if (!isa<ConstantInt>(gep1->getOperand(size)) - || !isa<ConstantInt>(gep2->getOperand(size))) { - return false; - } - Offset1 = dyn_cast<ConstantInt>(gep1->getOperand(size))->getSExtValue(); - Offset2 = dyn_cast<ConstantInt>(gep2->getOperand(size))->getSExtValue(); - return true; - } else if (isa<Argument>(mv1) && isa<Argument>(mv2)) { - return false; - } else if (isa<GlobalValue>(mv1) && isa<GlobalValue>(mv2)) { - return false; - } - } - return false; -} - bool AMDILInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, int64_t Offset1, int64_t Offset2, unsigned NumLoads) const { @@ -654,16 +493,6 @@ bool AMDILInstrInfo::isPredicated(const MachineInstr *MI) const { // TODO: Implement this function return false; } -#if 0 -bool AMDILInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { - // TODO: Implement this function -} - -bool AMDILInstrInfo::PredicateInstruction(MachineInstr *MI, - const SmallVectorImpl<MachineOperand> &Pred) const { - // TODO: Implement this function -} -#endif bool AMDILInstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, const SmallVectorImpl<MachineOperand> &Pred2) @@ -689,21 +518,112 @@ AMDILInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const { return true; } -unsigned AMDILInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { - // TODO: Implement this function - return 0; +bool AMDILInstrInfo::isLoadInst(MachineInstr *MI) const { + if (strstr(getName(MI->getOpcode()), "LOADCONST")) { + return false; + } + return strstr(getName(MI->getOpcode()), "LOAD"); } -#if 0 -unsigned -AMDILInstrInfo::GetFunctionSizeInBytes(const MachineFunction &MF) const { - // TODO: Implement this function - return 0; +bool AMDILInstrInfo::isSWSExtLoadInst(MachineInstr *MI) const +{ +switch (MI->getOpcode()) { + default: + break; + ExpandCaseToByteShortTypes(AMDIL::LOCALLOAD); + ExpandCaseToByteShortTypes(AMDIL::GLOBALLOAD); + ExpandCaseToByteShortTypes(AMDIL::REGIONLOAD); + ExpandCaseToByteShortTypes(AMDIL::PRIVATELOAD); + ExpandCaseToByteShortTypes(AMDIL::CPOOLLOAD); + ExpandCaseToByteShortTypes(AMDIL::CONSTANTLOAD); + return true; + }; + return false; } -unsigned AMDILInstrInfo::getInlineAsmLength(const char *Str, - const MCAsmInfo &MAI) const { - // TODO: Implement this function - return 0; +bool AMDILInstrInfo::isExtLoadInst(MachineInstr *MI) const { + return strstr(getName(MI->getOpcode()), "EXTLOAD"); +} + +bool AMDILInstrInfo::isSExtLoadInst(MachineInstr *MI) const { + return strstr(getName(MI->getOpcode()), "SEXTLOAD"); +} + +bool AMDILInstrInfo::isAExtLoadInst(MachineInstr *MI) const { + return strstr(getName(MI->getOpcode()), "AEXTLOAD"); +} + +bool AMDILInstrInfo::isZExtLoadInst(MachineInstr *MI) const { + return strstr(getName(MI->getOpcode()), "ZEXTLOAD"); +} + +bool AMDILInstrInfo::isStoreInst(MachineInstr *MI) const { + return strstr(getName(MI->getOpcode()), "STORE"); +} + +bool AMDILInstrInfo::isTruncStoreInst(MachineInstr *MI) const { + return strstr(getName(MI->getOpcode()), "TRUNCSTORE"); +} + +bool AMDILInstrInfo::isAtomicInst(MachineInstr *MI) const { + return strstr(getName(MI->getOpcode()), "ATOM"); +} + +bool AMDILInstrInfo::isVolatileInst(MachineInstr *MI) const { + if (!MI->memoperands_empty()) { + for (MachineInstr::mmo_iterator mob = MI->memoperands_begin(), + moe = MI->memoperands_end(); mob != moe; ++mob) { + // If there is a volatile mem operand, this is a volatile instruction. + if ((*mob)->isVolatile()) { + return true; + } + } + } + return false; +} +bool AMDILInstrInfo::isGlobalInst(llvm::MachineInstr *MI) const +{ + return strstr(getName(MI->getOpcode()), "GLOBAL"); +} +bool AMDILInstrInfo::isPrivateInst(llvm::MachineInstr *MI) const +{ + return strstr(getName(MI->getOpcode()), "PRIVATE"); +} +bool AMDILInstrInfo::isConstantInst(llvm::MachineInstr *MI) const +{ + return strstr(getName(MI->getOpcode()), "CONSTANT") + || strstr(getName(MI->getOpcode()), "CPOOL"); +} +bool AMDILInstrInfo::isRegionInst(llvm::MachineInstr *MI) const +{ + return strstr(getName(MI->getOpcode()), "REGION"); +} +bool AMDILInstrInfo::isLocalInst(llvm::MachineInstr *MI) const +{ + return strstr(getName(MI->getOpcode()), "LOCAL"); +} +bool AMDILInstrInfo::isImageInst(llvm::MachineInstr *MI) const +{ + return strstr(getName(MI->getOpcode()), "IMAGE"); +} +bool AMDILInstrInfo::isAppendInst(llvm::MachineInstr *MI) const +{ + return strstr(getName(MI->getOpcode()), "APPEND"); +} +bool AMDILInstrInfo::isRegionAtomic(llvm::MachineInstr *MI) const +{ + return strstr(getName(MI->getOpcode()), "ATOM_R"); +} +bool AMDILInstrInfo::isLocalAtomic(llvm::MachineInstr *MI) const +{ + return strstr(getName(MI->getOpcode()), "ATOM_L"); +} +bool AMDILInstrInfo::isGlobalAtomic(llvm::MachineInstr *MI) const +{ + return strstr(getName(MI->getOpcode()), "ATOM_G") + || isArenaAtomic(MI); +} +bool AMDILInstrInfo::isArenaAtomic(llvm::MachineInstr *MI) const +{ + return strstr(getName(MI->getOpcode()), "ATOM_A"); } -#endif diff --git a/src/gallium/drivers/radeon/AMDILInstrInfo.h b/src/gallium/drivers/radeon/AMDILInstrInfo.h index 88dd4e9441a..4121246e6f9 100644 --- a/src/gallium/drivers/radeon/AMDILInstrInfo.h +++ b/src/gallium/drivers/radeon/AMDILInstrInfo.h @@ -40,12 +40,6 @@ public: // always be able to get register info as well (through this method). const AMDILRegisterInfo &getRegisterInfo() const; - // Return true if the instruction is a register to register move and leave the - // source and dest operands in the passed parameters. - bool isMoveInstr(const MachineInstr &MI, unsigned int &SrcReg, - unsigned int &DstReg, unsigned int &SrcSubIdx, - unsigned int &DstSubIdx) const; - bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg, unsigned &SubIdx) const; @@ -62,29 +56,10 @@ public: const MachineMemOperand *&MMO, int &FrameIndex) const; - -#if 0 - void reMaterialize(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned DestReg, unsigned SubIdx, - const MachineInstr *Orig, - const TargetRegisterInfo *TRI) const; - MachineInstr *duplicate(MachineInstr *Orig, - MachineFunction &MF) const; -#endif MachineInstr * convertToThreeAddress(MachineFunction::iterator &MFI, MachineBasicBlock::iterator &MBBI, LiveVariables *LV) const; -#if 0 - MachineInstr *commuteInstruction(MachineInstr *MI, - bool NewMI = false) const; - bool findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1, - unsigned &SrcOpIdx2) const; - bool produceSameValue(const MachineInstr *MI0, - const MachineInstr *MI1) const; - -#endif bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, @@ -99,12 +74,6 @@ public: const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const; - bool copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const; virtual void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, DebugLoc DL, unsigned DestReg, unsigned SrcReg, @@ -141,8 +110,6 @@ public: unsigned getOpcodeAfterMemoryUnfold(unsigned Opc, bool UnfoldLoad, bool UnfoldStore, unsigned *LoadRegIndex = 0) const; - bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, - int64_t &Offset1, int64_t &Offset2) const; bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, int64_t Offset1, int64_t Offset2, unsigned NumLoads) const; @@ -151,24 +118,36 @@ public: void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const; bool isPredicated(const MachineInstr *MI) const; -#if 0 - bool isUnpredicatedTerminator(const MachineInstr *MI) const; - bool PredicateInstruction(MachineInstr *MI, - const SmallVectorImpl<MachineOperand> &Pred) const; -#endif bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, const SmallVectorImpl<MachineOperand> &Pred2) const; bool DefinesPredicate(MachineInstr *MI, std::vector<MachineOperand> &Pred) const; bool isPredicable(MachineInstr *MI) const; bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const; - unsigned GetInstSizeInBytes(const MachineInstr *MI) const; -#if 0 - unsigned GetFunctionSizeInBytes(const MachineFunction &MF) const; - unsigned getInlineAsmLength(const char *Str, - const MCAsmInfo &MAI) const; -#endif - }; + + // Helper functions that check the opcode for status information + bool isLoadInst(llvm::MachineInstr *MI) const; + bool isExtLoadInst(llvm::MachineInstr *MI) const; + bool isSWSExtLoadInst(llvm::MachineInstr *MI) const; + bool isSExtLoadInst(llvm::MachineInstr *MI) const; + bool isZExtLoadInst(llvm::MachineInstr *MI) const; + bool isAExtLoadInst(llvm::MachineInstr *MI) const; + bool isStoreInst(llvm::MachineInstr *MI) const; + bool isTruncStoreInst(llvm::MachineInstr *MI) const; + bool isAtomicInst(llvm::MachineInstr *MI) const; + bool isVolatileInst(llvm::MachineInstr *MI) const; + bool isGlobalInst(llvm::MachineInstr *MI) const; + bool isPrivateInst(llvm::MachineInstr *MI) const; + bool isConstantInst(llvm::MachineInstr *MI) const; + bool isRegionInst(llvm::MachineInstr *MI) const; + bool isLocalInst(llvm::MachineInstr *MI) const; + bool isImageInst(llvm::MachineInstr *MI) const; + bool isAppendInst(llvm::MachineInstr *MI) const; + bool isRegionAtomic(llvm::MachineInstr *MI) const; + bool isLocalAtomic(llvm::MachineInstr *MI) const; + bool isGlobalAtomic(llvm::MachineInstr *MI) const; + bool isArenaAtomic(llvm::MachineInstr *MI) const; +}; } diff --git a/src/gallium/drivers/radeon/AMDILInstructions.td b/src/gallium/drivers/radeon/AMDILInstructions.td index f824a67d7ad..db56e2121b3 100644 --- a/src/gallium/drivers/radeon/AMDILInstructions.td +++ b/src/gallium/drivers/radeon/AMDILInstructions.td @@ -1,4 +1,4 @@ -//===-- AMDILInstructions.td - TODO: Add brief description -------===// +//===-- AMDILInstructions.td - AMDIL Instruction definitions --------------===// // // The LLVM Compiler Infrastructure // diff --git a/src/gallium/drivers/radeon/AMDILMCCodeEmitter.cpp b/src/gallium/drivers/radeon/AMDILMCCodeEmitter.cpp deleted file mode 100644 index 9366f2e7bcb..00000000000 --- a/src/gallium/drivers/radeon/AMDILMCCodeEmitter.cpp +++ /dev/null @@ -1,158 +0,0 @@ -//===---- AMDILMCCodeEmitter.cpp - Convert AMDIL text to AMDIL binary ----===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//==-----------------------------------------------------------------------===// -// -//===---------------------------------------------------------------------===// - -#define DEBUG_TYPE "amdil-emitter" -#include "AMDIL.h" -#include "AMDILInstrInfo.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/MC/MCCodeEmitter.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCInst.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; -#if 0 -namespace { - class AMDILMCCodeEmitter : public MCCodeEmitter { - AMDILMCCodeEmitter(const AMDILMCCodeEmitter &);// DO NOT IMPLEMENT - void operator=(const AMDILMCCodeEmitter &); // DO NOT IMPLEMENT - const TargetMachine &TM; - const TargetInstrInfo &TII; - MCContext &Ctx; - bool Is64BitMode; - public: - AMDILMCCodeEmitter(TargetMachine &tm, MCContext &ctx, bool is64Bit); - ~AMDILMCCodeEmitter(); - unsigned getNumFixupKinds() const; - const MCFixupKindInfo& getFixupKindInfo(MCFixupKind Kind) const; - static unsigned GetAMDILRegNum(const MCOperand &MO); - void EmitByte(unsigned char C, unsigned &CurByte, raw_ostream &OS) const; - void EmitConstant(uint64_t Val, unsigned Size, unsigned &CurByte, - raw_ostream &OS) const; - void EmitImmediate(const MCOperand &Disp, unsigned ImmSize, - MCFixupKind FixupKind, unsigned &CurByte, raw_ostream &os, - SmallVectorImpl<MCFixup> &Fixups, int ImmOffset = 0) const; - - void EncodeInstruction(const MCInst &MI, raw_ostream &OS, - SmallVectorImpl<MCFixup> &Fixups) const; - - }; // class AMDILMCCodeEmitter -}; // anonymous namespace - -namespace llvm { - MCCodeEmitter *createAMDILMCCodeEmitter(const Target &, - TargetMachine &TM, MCContext &Ctx) - { - return new AMDILMCCodeEmitter(TM, Ctx, false); - } -} - -AMDILMCCodeEmitter::AMDILMCCodeEmitter(TargetMachine &tm, MCContext &ctx - , bool is64Bit) -: TM(tm), TII(*TM.getInstrInfo()), Ctx(ctx) -{ - Is64BitMode = is64Bit; -} - -AMDILMCCodeEmitter::~AMDILMCCodeEmitter() -{ -} - -unsigned -AMDILMCCodeEmitter::getNumFixupKinds() const -{ - return 0; -} - -const MCFixupKindInfo & -AMDILMCCodeEmitter::getFixupKindInfo(MCFixupKind Kind) const -{ -// const static MCFixupKindInfo Infos[] = {}; - if (Kind < FirstTargetFixupKind) { - return MCCodeEmitter::getFixupKindInfo(Kind); - } - assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && - "Invalid kind!"); - return MCCodeEmitter::getFixupKindInfo(Kind); - // return Infos[Kind - FirstTargetFixupKind]; - -} - -void -AMDILMCCodeEmitter::EmitByte(unsigned char C, unsigned &CurByte, - raw_ostream &OS) const -{ - OS << (char) C; - ++CurByte; -} -void -AMDILMCCodeEmitter::EmitConstant(uint64_t Val, unsigned Size, unsigned &CurByte, - raw_ostream &OS) const -{ - // Output the constant in little endian byte order - for (unsigned i = 0; i != Size; ++i) { - EmitByte(Val & 255, CurByte, OS); - Val >>= 8; - } -} -void -AMDILMCCodeEmitter::EmitImmediate(const MCOperand &DispOp, unsigned ImmSize, - MCFixupKind FixupKind, unsigned &CurByte, raw_ostream &OS, - SmallVectorImpl<MCFixup> &Fixups, int ImmOffset) const -{ - // If this is a simple integer displacement that doesn't require a relocation - // emit it now. - if (DispOp.isImm()) { - EmitConstant(DispOp.getImm() + ImmOffset, ImmSize, CurByte, OS); - } - - // If we have an immoffset, add it to the expression - const MCExpr *Expr = DispOp.getExpr(); - - if (ImmOffset) { - Expr = MCBinaryExpr::CreateAdd(Expr, - MCConstantExpr::Create(ImmOffset, Ctx), Ctx); - } - // Emit a symbolic constant as a fixup and 4 zeros. - Fixups.push_back(MCFixup::Create(CurByte, Expr, FixupKind)); - // TODO: Why the 4 zeros? - EmitConstant(0, ImmSize, CurByte, OS); -} - -void -AMDILMCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, - SmallVectorImpl<MCFixup> &Fixups) const -{ -#if 0 - unsigned Opcode = MI.getOpcode(); - const TargetInstrDesc &Desc = TII.get(Opcode); - unsigned TSFlags = Desc.TSFlags; - - // Keep track of the current byte being emitted. - unsigned CurByte = 0; - - unsigned NumOps = Desc.getNumOperands(); - unsigned CurOp = 0; - - unsigned char BaseOpcode = 0; -#ifndef NDEBUG - // FIXME: Verify. - if (// !Desc.isVariadic() && - CurOp != NumOps) { - errs() << "Cannot encode all operands of: "; - MI.dump(); - errs() << '\n'; - abort(); - } -#endif -#endif -} -#endif diff --git a/src/gallium/drivers/radeon/AMDILMachinePeephole.cpp b/src/gallium/drivers/radeon/AMDILMachinePeephole.cpp index b8e536361f0..5cb988785e2 100644 --- a/src/gallium/drivers/radeon/AMDILMachinePeephole.cpp +++ b/src/gallium/drivers/radeon/AMDILMachinePeephole.cpp @@ -8,17 +8,11 @@ //==-----------------------------------------------------------------------===// -#define DEBUG_TYPE "machine_peephole" -#if !defined(NDEBUG) -#define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) -#else -#define DEBUGME (false) -#endif - #include "AMDIL.h" +#include "AMDILInstrInfo.h" #include "AMDILSubtarget.h" -#include "AMDILUtilityFunctions.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Target/TargetMachine.h" @@ -56,7 +50,7 @@ namespace llvm AMDILMachinePeephole::AMDILMachinePeephole(TargetMachine &tm AMDIL_OPT_LEVEL_DECL) : MachineFunctionPass(ID), TM(tm) { - mDebug = DEBUGME; + mDebug = false; } bool @@ -64,6 +58,8 @@ AMDILMachinePeephole::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; const AMDILSubtarget *STM = &TM.getSubtarget<AMDILSubtarget>(); + const AMDILInstrInfo * AMDILII = + static_cast<const AMDILInstrInfo *>(TM.getInstrInfo()); for (MachineFunction::iterator MBB = MF.begin(), MBE = MF.end(); MBB != MBE; ++MBB) { MachineBasicBlock *mb = MBB; @@ -74,7 +70,7 @@ AMDILMachinePeephole::runOnMachineFunction(MachineFunction &MF) name = TM.getInstrInfo()->getName(mi->getOpcode()); switch (mi->getOpcode()) { default: - if (isAtomicInst(TM.getInstrInfo(), mi)) { + if (AMDILII->isAtomicInst(mi)) { // If we don't support the hardware accellerated address spaces, // then the atomic needs to be transformed to the global atomic. if (strstr(name, "_L_") @@ -94,7 +90,8 @@ AMDILMachinePeephole::runOnMachineFunction(MachineFunction &MF) TM.getInstrInfo()->get( (mi->getOpcode() - AMDIL::ATOM_R_ADD) + AMDIL::ATOM_G_ADD)); } - } else if ((isLoadInst(TM.getInstrInfo(), mi) || isStoreInst(TM.getInstrInfo(), mi)) && isVolatileInst(TM.getInstrInfo(), mi)) { + } else if ((AMDILII->isLoadInst(mi) || AMDILII->isStoreInst(mi)) + && AMDILII->isVolatileInst(mi)) { insertFence(MIB); } continue; diff --git a/src/gallium/drivers/radeon/AMDILMultiClass.td b/src/gallium/drivers/radeon/AMDILMultiClass.td index 92691db52fd..d6828178ba7 100644 --- a/src/gallium/drivers/radeon/AMDILMultiClass.td +++ b/src/gallium/drivers/radeon/AMDILMultiClass.td @@ -1,4 +1,4 @@ -//===-- AMDILMultiClass.td - TODO: Add brief description -------===// +//===-- AMDILMultiClass.td - AMDIL Multiclass defs ---*- tablegen -*-------===// // // The LLVM Compiler Infrastructure // diff --git a/src/gallium/drivers/radeon/AMDILNIDevice.cpp b/src/gallium/drivers/radeon/AMDILNIDevice.cpp index 8fda1c18ae5..d4112cda0b5 100644 --- a/src/gallium/drivers/radeon/AMDILNIDevice.cpp +++ b/src/gallium/drivers/radeon/AMDILNIDevice.cpp @@ -1,4 +1,4 @@ -//===-- AMDILNIDevice.cpp - TODO: Add brief description -------===// +//===-- AMDILNIDevice.cpp - Device Info for Northern Islands devices ------===// // // The LLVM Compiler Infrastructure // diff --git a/src/gallium/drivers/radeon/AMDILPeepholeOptimizer.cpp b/src/gallium/drivers/radeon/AMDILPeepholeOptimizer.cpp index 5fe9f53c8c8..b62c7ab048b 100644 --- a/src/gallium/drivers/radeon/AMDILPeepholeOptimizer.cpp +++ b/src/gallium/drivers/radeon/AMDILPeepholeOptimizer.cpp @@ -1,4 +1,4 @@ -//===-- AMDILPeepholeOptimizer.cpp - TODO: Add brief description -------===// +//===-- AMDILPeepholeOptimizer.cpp - AMDIL Peephole optimizations ---------===// // // The LLVM Compiler Infrastructure // @@ -7,20 +7,14 @@ // //==-----------------------------------------------------------------------===// -#define DEBUG_TYPE "PeepholeOpt" -#ifdef DEBUG -#define DEBUGME (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) -#else -#define DEBUGME 0 -#endif - #include "AMDILAlgorithms.tpp" #include "AMDILDevices.h" -#include "AMDILUtilityFunctions.h" +#include "AMDILInstrInfo.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/Constants.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/Function.h" @@ -41,6 +35,9 @@ using namespace llvm; // The Peephole optimization pass is used to do simple last minute optimizations // that are required for correct code or to remove redundant functions namespace { + +class OpaqueType; + class LLVM_LIBRARY_VISIBILITY AMDILPeepholeOpt : public FunctionPass { public: TargetMachine &TM; @@ -114,6 +111,19 @@ private: // samplers at compile time. bool propagateSamplerInst(CallInst *CI); + // Helper functions + + // Group of functions that recursively calculate the size of a structure based + // on it's sub-types. + size_t getTypeSize(Type * const T, bool dereferencePtr = false); + size_t getTypeSize(StructType * const ST, bool dereferencePtr = false); + size_t getTypeSize(IntegerType * const IT, bool dereferencePtr = false); + size_t getTypeSize(FunctionType * const FT,bool dereferencePtr = false); + size_t getTypeSize(ArrayType * const AT, bool dereferencePtr = false); + size_t getTypeSize(VectorType * const VT, bool dereferencePtr = false); + size_t getTypeSize(PointerType * const PT, bool dereferencePtr = false); + size_t getTypeSize(OpaqueType * const OT, bool dereferencePtr = false); + LLVMContext *mCTX; Function *mF; const AMDILSubtarget *mSTM; @@ -134,7 +144,7 @@ namespace llvm { AMDILPeepholeOpt::AMDILPeepholeOpt(TargetMachine &tm AMDIL_OPT_LEVEL_DECL) : FunctionPass(ID), TM(tm) { - mDebug = DEBUGME; + mDebug = false; optLevel = TM.getOptLevel(); } @@ -1136,3 +1146,106 @@ AMDILPeepholeOpt::getAnalysisUsage(AnalysisUsage &AU) const FunctionPass::getAnalysisUsage(AU); AU.setPreservesAll(); } + +size_t AMDILPeepholeOpt::getTypeSize(Type * const T, bool dereferencePtr) { + size_t size = 0; + if (!T) { + return size; + } + switch (T->getTypeID()) { + case Type::X86_FP80TyID: + case Type::FP128TyID: + case Type::PPC_FP128TyID: + case Type::LabelTyID: + assert(0 && "These types are not supported by this backend"); + default: + case Type::FloatTyID: + case Type::DoubleTyID: + size = T->getPrimitiveSizeInBits() >> 3; + break; + case Type::PointerTyID: + size = getTypeSize(dyn_cast<PointerType>(T), dereferencePtr); + break; + case Type::IntegerTyID: + size = getTypeSize(dyn_cast<IntegerType>(T), dereferencePtr); + break; + case Type::StructTyID: + size = getTypeSize(dyn_cast<StructType>(T), dereferencePtr); + break; + case Type::ArrayTyID: + size = getTypeSize(dyn_cast<ArrayType>(T), dereferencePtr); + break; + case Type::FunctionTyID: + size = getTypeSize(dyn_cast<FunctionType>(T), dereferencePtr); + break; + case Type::VectorTyID: + size = getTypeSize(dyn_cast<VectorType>(T), dereferencePtr); + break; + }; + return size; +} + +size_t AMDILPeepholeOpt::getTypeSize(StructType * const ST, + bool dereferencePtr) { + size_t size = 0; + if (!ST) { + return size; + } + Type *curType; + StructType::element_iterator eib; + StructType::element_iterator eie; + for (eib = ST->element_begin(), eie = ST->element_end(); eib != eie; ++eib) { + curType = *eib; + size += getTypeSize(curType, dereferencePtr); + } + return size; +} + +size_t AMDILPeepholeOpt::getTypeSize(IntegerType * const IT, + bool dereferencePtr) { + return IT ? (IT->getBitWidth() >> 3) : 0; +} + +size_t AMDILPeepholeOpt::getTypeSize(FunctionType * const FT, + bool dereferencePtr) { + assert(0 && "Should not be able to calculate the size of an function type"); + return 0; +} + +size_t AMDILPeepholeOpt::getTypeSize(ArrayType * const AT, + bool dereferencePtr) { + return (size_t)(AT ? (getTypeSize(AT->getElementType(), + dereferencePtr) * AT->getNumElements()) + : 0); +} + +size_t AMDILPeepholeOpt::getTypeSize(VectorType * const VT, + bool dereferencePtr) { + return VT ? (VT->getBitWidth() >> 3) : 0; +} + +size_t AMDILPeepholeOpt::getTypeSize(PointerType * const PT, + bool dereferencePtr) { + if (!PT) { + return 0; + } + Type *CT = PT->getElementType(); + if (CT->getTypeID() == Type::StructTyID && + PT->getAddressSpace() == AMDILAS::PRIVATE_ADDRESS) { + return getTypeSize(dyn_cast<StructType>(CT)); + } else if (dereferencePtr) { + size_t size = 0; + for (size_t x = 0, y = PT->getNumContainedTypes(); x < y; ++x) { + size += getTypeSize(PT->getContainedType(x), dereferencePtr); + } + return size; + } else { + return 4; + } +} + +size_t AMDILPeepholeOpt::getTypeSize(OpaqueType * const OT, + bool dereferencePtr) { + //assert(0 && "Should not be able to calculate the size of an opaque type"); + return 4; +} diff --git a/src/gallium/drivers/radeon/AMDILRegisterInfo.cpp b/src/gallium/drivers/radeon/AMDILRegisterInfo.cpp index 5588233378c..d7c1dc74b8b 100644 --- a/src/gallium/drivers/radeon/AMDILRegisterInfo.cpp +++ b/src/gallium/drivers/radeon/AMDILRegisterInfo.cpp @@ -20,7 +20,8 @@ #include "AMDILRegisterInfo.h" #include "AMDIL.h" -#include "AMDILUtilityFunctions.h" +#include "AMDILInstrInfo.h" +#include "AMDILTargetMachine.h" #include "llvm/ADT/BitVector.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -109,7 +110,9 @@ AMDILRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, if (!MI.getOperand(x).isFI()) { continue; } - bool def = isStoreInst(TM.getInstrInfo(), &MI); + const AMDILInstrInfo * AMDILII = + static_cast<const AMDILInstrInfo *>(TM.getInstrInfo()); + bool def = AMDILII->isStoreInst(&MI); int FrameIndex = MI.getOperand(x).getIndex(); int64_t Offset = MFI->getObjectOffset(FrameIndex); //int64_t Size = MF.getFrameInfo()->getObjectSize(FrameIndex); diff --git a/src/gallium/drivers/radeon/AMDILSIDevice.cpp b/src/gallium/drivers/radeon/AMDILSIDevice.cpp index ce560984ef9..ae402a5d1f7 100644 --- a/src/gallium/drivers/radeon/AMDILSIDevice.cpp +++ b/src/gallium/drivers/radeon/AMDILSIDevice.cpp @@ -1,49 +1,49 @@ -//===-- AMDILSIDevice.cpp - TODO: Add brief description -------===// +//===-- AMDILSIDevice.cpp - Device Info for Southern Islands GPUs ---------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -//==-----------------------------------------------------------------------===//
-#include "AMDILSIDevice.h"
-#include "AMDILEvergreenDevice.h"
-#include "AMDILNIDevice.h"
-#include "AMDILSubtarget.h"
+//==-----------------------------------------------------------------------===// +#include "AMDILSIDevice.h" +#include "AMDILEvergreenDevice.h" +#include "AMDILNIDevice.h" +#include "AMDILSubtarget.h" -using namespace llvm;
-
-AMDILSIDevice::AMDILSIDevice(AMDILSubtarget *ST)
- : AMDILEvergreenDevice(ST)
-{
-}
-AMDILSIDevice::~AMDILSIDevice()
-{
-}
-
-size_t
-AMDILSIDevice::getMaxLDSSize() const
-{
- if (usesHardware(AMDILDeviceInfo::LocalMem)) {
- return MAX_LDS_SIZE_900;
- } else {
- return 0;
- }
-}
-
-uint32_t
-AMDILSIDevice::getGeneration() const
-{
- return AMDILDeviceInfo::HD7XXX;
-}
-
-std::string
-AMDILSIDevice::getDataLayout() const
-{
- return std::string("e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16"
- "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
- "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
- "-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
- "-v512:512:512-v1024:1024:1024-v2048:2048:2048"
- "-n8:16:32:64");
-}
+using namespace llvm; + +AMDILSIDevice::AMDILSIDevice(AMDILSubtarget *ST) + : AMDILEvergreenDevice(ST) +{ +} +AMDILSIDevice::~AMDILSIDevice() +{ +} + +size_t +AMDILSIDevice::getMaxLDSSize() const +{ + if (usesHardware(AMDILDeviceInfo::LocalMem)) { + return MAX_LDS_SIZE_900; + } else { + return 0; + } +} + +uint32_t +AMDILSIDevice::getGeneration() const +{ + return AMDILDeviceInfo::HD7XXX; +} + +std::string +AMDILSIDevice::getDataLayout() const +{ + return std::string("e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16" + "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32" + "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64" + "-v96:128:128-v128:128:128-v192:256:256-v256:256:256" + "-v512:512:512-v1024:1024:1024-v2048:2048:2048" + "-n8:16:32:64"); +} diff --git a/src/gallium/drivers/radeon/AMDILSIDevice.h b/src/gallium/drivers/radeon/AMDILSIDevice.h index 69f35a0588d..b272af7cfcf 100644 --- a/src/gallium/drivers/radeon/AMDILSIDevice.h +++ b/src/gallium/drivers/radeon/AMDILSIDevice.h @@ -1,45 +1,45 @@ -//===------- AMDILSIDevice.h - Define SI Device for AMDIL -*- C++ -*------===//
+//===------- AMDILSIDevice.h - Define SI Device for AMDIL -*- C++ -*------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -//==-----------------------------------------------------------------------===//
-//
-// Interface for the subtarget data classes.
-//
-//===---------------------------------------------------------------------===//
-// This file will define the interface that each generation needs to
-// implement in order to correctly answer queries on the capabilities of the
+//==-----------------------------------------------------------------------===// +// +// Interface for the subtarget data classes. +// +//===---------------------------------------------------------------------===// +// This file will define the interface that each generation needs to +// implement in order to correctly answer queries on the capabilities of the // specific hardware.
-//===---------------------------------------------------------------------===//
-#ifndef _AMDILSIDEVICE_H_
-#define _AMDILSIDEVICE_H_
-#include "AMDILEvergreenDevice.h"
-#include "AMDILSubtarget.h"
+//===---------------------------------------------------------------------===// +#ifndef _AMDILSIDEVICE_H_ +#define _AMDILSIDEVICE_H_ +#include "AMDILEvergreenDevice.h" +#include "AMDILSubtarget.h" + +namespace llvm { + class AMDILSubtarget; +//===---------------------------------------------------------------------===// +// SI generation of devices and their respective sub classes +//===---------------------------------------------------------------------===// + +// The AMDILSIDevice is the base class for all Northern Island series of +// cards. It is very similiar to the AMDILEvergreenDevice, with the major +// exception being differences in wavefront size and hardware capabilities. The +// SI devices are all 64 wide wavefronts and also add support for signed 24 bit +// integer operations + + class AMDILSIDevice : public AMDILEvergreenDevice { + public: + AMDILSIDevice(AMDILSubtarget*); + virtual ~AMDILSIDevice(); + virtual size_t getMaxLDSSize() const; + virtual uint32_t getGeneration() const; + virtual std::string getDataLayout() const; + protected: + }; // AMDILSIDevice -namespace llvm {
- class AMDILSubtarget;
-//===---------------------------------------------------------------------===//
-// SI generation of devices and their respective sub classes
-//===---------------------------------------------------------------------===//
-
-// The AMDILSIDevice is the base class for all Northern Island series of
-// cards. It is very similiar to the AMDILEvergreenDevice, with the major
-// exception being differences in wavefront size and hardware capabilities. The
-// SI devices are all 64 wide wavefronts and also add support for signed 24 bit
-// integer operations
-
- class AMDILSIDevice : public AMDILEvergreenDevice {
- public:
- AMDILSIDevice(AMDILSubtarget*);
- virtual ~AMDILSIDevice();
- virtual size_t getMaxLDSSize() const;
- virtual uint32_t getGeneration() const;
- virtual std::string getDataLayout() const;
- protected:
- }; // AMDILSIDevice
-
-} // namespace llvm
-#endif // _AMDILSIDEVICE_H_
+} // namespace llvm +#endif // _AMDILSIDEVICE_H_ diff --git a/src/gallium/drivers/radeon/AMDILSubtarget.cpp b/src/gallium/drivers/radeon/AMDILSubtarget.cpp index 11b6bbe0c01..249cb03f4a3 100644 --- a/src/gallium/drivers/radeon/AMDILSubtarget.cpp +++ b/src/gallium/drivers/radeon/AMDILSubtarget.cpp @@ -27,7 +27,8 @@ using namespace llvm; #define GET_SUBTARGETINFO_TARGET_DESC #include "AMDILGenSubtargetInfo.inc" -AMDILSubtarget::AMDILSubtarget(llvm::StringRef TT, llvm::StringRef CPU, llvm::StringRef FS) : AMDILGenSubtargetInfo( TT, CPU, FS ) +AMDILSubtarget::AMDILSubtarget(llvm::StringRef TT, llvm::StringRef CPU, llvm::StringRef FS) : AMDILGenSubtargetInfo( TT, CPU, FS ), + mDumpCode(false) { memset(CapsOverride, 0, sizeof(*CapsOverride) * AMDILDeviceInfo::MaxNumberCapabilities); @@ -93,7 +94,7 @@ AMDILSubtarget::AMDILSubtarget(llvm::StringRef TT, llvm::StringRef CPU, llvm::St } #endif mDevName = GPU; - mDevice = getDeviceFromName(mDevName, this, mIs64bit); + mDevice = AMDILDeviceInfo::getDeviceFromName(mDevName, this, mIs64bit); } AMDILSubtarget::~AMDILSubtarget() { diff --git a/src/gallium/drivers/radeon/AMDILSubtarget.h b/src/gallium/drivers/radeon/AMDILSubtarget.h index a4b0e34ada7..38fcb859ac6 100644 --- a/src/gallium/drivers/radeon/AMDILSubtarget.h +++ b/src/gallium/drivers/radeon/AMDILSubtarget.h @@ -42,6 +42,7 @@ namespace llvm { uint32_t mVersion; bool mIs64bit; bool mIs32on64bit; + bool mDumpCode; public: AMDILSubtarget(llvm::StringRef TT, llvm::StringRef CPU, llvm::StringRef FS); virtual ~AMDILSubtarget(); @@ -67,6 +68,7 @@ namespace llvm { ParseSubtargetFeatures( llvm::StringRef CPU, llvm::StringRef FS); + bool dumpCode() const { return mDumpCode; } }; diff --git a/src/gallium/drivers/radeon/AMDILTargetMachine.cpp b/src/gallium/drivers/radeon/AMDILTargetMachine.cpp index 77fac1d97bd..0879d43ad72 100644 --- a/src/gallium/drivers/radeon/AMDILTargetMachine.cpp +++ b/src/gallium/drivers/radeon/AMDILTargetMachine.cpp @@ -150,8 +150,8 @@ bool AMDILPassConfig::addPreISel() bool AMDILPassConfig::addInstSelector() { - PM.add(createAMDILPeepholeOpt(*TM)); - PM.add(createAMDILISelDag(getAMDILTargetMachine())); + PM->add(createAMDILPeepholeOpt(*TM)); + PM->add(createAMDILISelDag(getAMDILTargetMachine())); return false; } @@ -162,7 +162,7 @@ bool AMDILPassConfig::addPreRegAlloc() llvm::RegisterScheduler::setDefault(&llvm::createSourceListDAGScheduler); } - PM.add(createAMDILMachinePeephole(*TM)); + PM->add(createAMDILMachinePeephole(*TM)); return false; } @@ -175,8 +175,8 @@ bool AMDILPassConfig::addPostRegAlloc() { /// true if -print-machineinstrs should print out the code after the passes. bool AMDILPassConfig::addPreEmitPass() { - PM.add(createAMDILCFGPreparationPass(*TM)); - PM.add(createAMDILCFGStructurizerPass(*TM)); + PM->add(createAMDILCFGPreparationPass(*TM)); + PM->add(createAMDILCFGStructurizerPass(*TM)); return true; } diff --git a/src/gallium/drivers/radeon/AMDILTokenDesc.td b/src/gallium/drivers/radeon/AMDILTokenDesc.td index b81f593506f..2dafb2cd559 100644 --- a/src/gallium/drivers/radeon/AMDILTokenDesc.td +++ b/src/gallium/drivers/radeon/AMDILTokenDesc.td @@ -1,4 +1,4 @@ -//===-- AMDILTokenDesc.td - TODO: Add brief description -------===// +//===-- AMDILTokenDesc.td - AMDIL Token Definitions --*- tablegen -*-----===// // // The LLVM Compiler Infrastructure // diff --git a/src/gallium/drivers/radeon/AMDILUtilityFunctions.cpp b/src/gallium/drivers/radeon/AMDILUtilityFunctions.cpp deleted file mode 100644 index f2ef4eb7771..00000000000 --- a/src/gallium/drivers/radeon/AMDILUtilityFunctions.cpp +++ /dev/null @@ -1,683 +0,0 @@ -//===-- AMDILUtilityFunctions.cpp - AMDIL Utility Functions ---------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//==-----------------------------------------------------------------------===// -// -// This file provides the implementations of functions that are declared in the -// AMDILUtilityFUnctions.h file. -// -//===----------------------------------------------------------------------===// -#include "AMDILUtilityFunctions.h" -#include "AMDILISelLowering.h" -#include "llvm/ADT/ValueMap.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Instruction.h" -#include "llvm/Instructions.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/FormattedStream.h" -#include "llvm/Type.h" - -#include <cstdio> -#include <list> -#include <queue> - -#define GET_OPCODE_NAME(TII, MI) \ - TII->getName(MI->getOpcode()) - - -using namespace llvm; -int64_t GET_SCALAR_SIZE(llvm::Type *A) { - return A->getScalarSizeInBits(); -} - -const TargetRegisterClass * getRegClassFromID(unsigned int ID) { - switch (ID) { - default: - assert(0 && "Passed in ID does not match any register classes."); - return NULL; - case AMDIL::GPRI8RegClassID: - return &AMDIL::GPRI8RegClass; - case AMDIL::GPRI16RegClassID: - return &AMDIL::GPRI16RegClass; - case AMDIL::GPRI32RegClassID: - return &AMDIL::GPRI32RegClass; - case AMDIL::GPRF32RegClassID: - return &AMDIL::GPRF32RegClass; - case AMDIL::GPRI64RegClassID: - return &AMDIL::GPRI64RegClass; - case AMDIL::GPRF64RegClassID: - return &AMDIL::GPRF64RegClass; - case AMDIL::GPRV4F32RegClassID: - return &AMDIL::GPRV4F32RegClass; - case AMDIL::GPRV4I8RegClassID: - return &AMDIL::GPRV4I8RegClass; - case AMDIL::GPRV4I16RegClassID: - return &AMDIL::GPRV4I16RegClass; - case AMDIL::GPRV4I32RegClassID: - return &AMDIL::GPRV4I32RegClass; - case AMDIL::GPRV2F32RegClassID: - return &AMDIL::GPRV2F32RegClass; - case AMDIL::GPRV2I8RegClassID: - return &AMDIL::GPRV2I8RegClass; - case AMDIL::GPRV2I16RegClassID: - return &AMDIL::GPRV2I16RegClass; - case AMDIL::GPRV2I32RegClassID: - return &AMDIL::GPRV2I32RegClass; - case AMDIL::GPRV2F64RegClassID: - return &AMDIL::GPRV2F64RegClass; - case AMDIL::GPRV2I64RegClassID: - return &AMDIL::GPRV2I64RegClass; - }; -} - -unsigned int getMoveInstFromID(unsigned int ID) { - switch (ID) { - default: - assert(0 && "Passed in ID does not match any move instructions."); - case AMDIL::GPRI8RegClassID: - return AMDIL::MOVE_i8; - case AMDIL::GPRI16RegClassID: - return AMDIL::MOVE_i16; - case AMDIL::GPRI32RegClassID: - return AMDIL::MOVE_i32; - case AMDIL::GPRF32RegClassID: - return AMDIL::MOVE_f32; - case AMDIL::GPRI64RegClassID: - return AMDIL::MOVE_i64; - case AMDIL::GPRF64RegClassID: - return AMDIL::MOVE_f64; - case AMDIL::GPRV4F32RegClassID: - return AMDIL::MOVE_v4f32; - case AMDIL::GPRV4I8RegClassID: - return AMDIL::MOVE_v4i8; - case AMDIL::GPRV4I16RegClassID: - return AMDIL::MOVE_v4i16; - case AMDIL::GPRV4I32RegClassID: - return AMDIL::MOVE_v4i32; - case AMDIL::GPRV2F32RegClassID: - return AMDIL::MOVE_v2f32; - case AMDIL::GPRV2I8RegClassID: - return AMDIL::MOVE_v2i8; - case AMDIL::GPRV2I16RegClassID: - return AMDIL::MOVE_v2i16; - case AMDIL::GPRV2I32RegClassID: - return AMDIL::MOVE_v2i32; - case AMDIL::GPRV2F64RegClassID: - return AMDIL::MOVE_v2f64; - case AMDIL::GPRV2I64RegClassID: - return AMDIL::MOVE_v2i64; - }; - return -1; -} - -unsigned int getPHIMoveInstFromID(unsigned int ID) { - switch (ID) { - default: - assert(0 && "Passed in ID does not match any move instructions."); - case AMDIL::GPRI8RegClassID: - return AMDIL::PHIMOVE_i8; - case AMDIL::GPRI16RegClassID: - return AMDIL::PHIMOVE_i16; - case AMDIL::GPRI32RegClassID: - return AMDIL::PHIMOVE_i32; - case AMDIL::GPRF32RegClassID: - return AMDIL::PHIMOVE_f32; - case AMDIL::GPRI64RegClassID: - return AMDIL::PHIMOVE_i64; - case AMDIL::GPRF64RegClassID: - return AMDIL::PHIMOVE_f64; - case AMDIL::GPRV4F32RegClassID: - return AMDIL::PHIMOVE_v4f32; - case AMDIL::GPRV4I8RegClassID: - return AMDIL::PHIMOVE_v4i8; - case AMDIL::GPRV4I16RegClassID: - return AMDIL::PHIMOVE_v4i16; - case AMDIL::GPRV4I32RegClassID: - return AMDIL::PHIMOVE_v4i32; - case AMDIL::GPRV2F32RegClassID: - return AMDIL::PHIMOVE_v2f32; - case AMDIL::GPRV2I8RegClassID: - return AMDIL::PHIMOVE_v2i8; - case AMDIL::GPRV2I16RegClassID: - return AMDIL::PHIMOVE_v2i16; - case AMDIL::GPRV2I32RegClassID: - return AMDIL::PHIMOVE_v2i32; - case AMDIL::GPRV2F64RegClassID: - return AMDIL::PHIMOVE_v2f64; - case AMDIL::GPRV2I64RegClassID: - return AMDIL::PHIMOVE_v2i64; - }; - return -1; -} - -const TargetRegisterClass* getRegClassFromType(unsigned int type) { - switch (type) { - default: - assert(0 && "Passed in type does not match any register classes."); - case MVT::i8: - return &AMDIL::GPRI8RegClass; - case MVT::i16: - return &AMDIL::GPRI16RegClass; - case MVT::i32: - return &AMDIL::GPRI32RegClass; - case MVT::f32: - return &AMDIL::GPRF32RegClass; - case MVT::i64: - return &AMDIL::GPRI64RegClass; - case MVT::f64: - return &AMDIL::GPRF64RegClass; - case MVT::v4f32: - return &AMDIL::GPRV4F32RegClass; - case MVT::v4i8: - return &AMDIL::GPRV4I8RegClass; - case MVT::v4i16: - return &AMDIL::GPRV4I16RegClass; - case MVT::v4i32: - return &AMDIL::GPRV4I32RegClass; - case MVT::v2f32: - return &AMDIL::GPRV2F32RegClass; - case MVT::v2i8: - return &AMDIL::GPRV2I8RegClass; - case MVT::v2i16: - return &AMDIL::GPRV2I16RegClass; - case MVT::v2i32: - return &AMDIL::GPRV2I32RegClass; - case MVT::v2f64: - return &AMDIL::GPRV2F64RegClass; - case MVT::v2i64: - return &AMDIL::GPRV2I64RegClass; - } -} - -void printSDNode(const SDNode *N) { - printf("Opcode: %d isTargetOpcode: %d isMachineOpcode: %d\n", - N->getOpcode(), N->isTargetOpcode(), N->isMachineOpcode()); - printf("Empty: %d OneUse: %d Size: %d NodeID: %d\n", - N->use_empty(), N->hasOneUse(), (int)N->use_size(), N->getNodeId()); - for (unsigned int i = 0; i < N->getNumOperands(); ++i) { - printf("OperandNum: %d ValueCount: %d ValueType: %d\n", - i, N->getNumValues(), N->getValueType(0) .getSimpleVT().SimpleTy); - printSDValue(N->getOperand(i), 0); - } -} - -void printSDValue(const SDValue &Op, int level) { - printf("\nOp: %p OpCode: %d NumOperands: %d ", (void*)&Op, Op.getOpcode(), - Op.getNumOperands()); - printf("IsTarget: %d IsMachine: %d ", Op.isTargetOpcode(), - Op.isMachineOpcode()); - if (Op.isMachineOpcode()) { - printf("MachineOpcode: %d\n", Op.getMachineOpcode()); - } else { - printf("\n"); - } - EVT vt = Op.getValueType(); - printf("ValueType: %d \n", vt.getSimpleVT().SimpleTy); - printf("UseEmpty: %d OneUse: %d\n", Op.use_empty(), Op.hasOneUse()); - if (level) { - printf("Children for %d:\n", level); - for (unsigned int i = 0; i < Op.getNumOperands(); ++i) { - printf("Child %d->%d:", level, i); - printSDValue(Op.getOperand(i), level - 1); - } - } -} - -bool isPHIMove(unsigned int opcode) { - switch (opcode) { - default: - return false; - ExpandCaseToAllTypes(AMDIL::PHIMOVE); - return true; - } - return false; -} - -bool isMove(unsigned int opcode) { - switch (opcode) { - default: - return false; - ExpandCaseToAllTypes(AMDIL::MOVE); - return true; - } - return false; -} - -bool isMoveOrEquivalent(unsigned int opcode) { - switch (opcode) { - default: - return isMove(opcode) || isPHIMove(opcode); - ExpandCaseToAllScalarTypes(AMDIL::IL_ASCHAR); - ExpandCaseToAllScalarTypes(AMDIL::IL_ASSHORT); - ExpandCaseToAllScalarTypes(AMDIL::IL_ASINT); - ExpandCaseToAllScalarTypes(AMDIL::IL_ASLONG); - ExpandCaseToAllScalarTypes(AMDIL::IL_ASDOUBLE); - ExpandCaseToAllScalarTypes(AMDIL::IL_ASFLOAT); - ExpandCaseToAllScalarTypes(AMDIL::IL_ASV2CHAR); - ExpandCaseToAllScalarTypes(AMDIL::IL_ASV2SHORT); - ExpandCaseToAllScalarTypes(AMDIL::IL_ASV2INT); - ExpandCaseToAllScalarTypes(AMDIL::IL_ASV2FLOAT); - ExpandCaseToAllScalarTypes(AMDIL::IL_ASV2LONG); - ExpandCaseToAllScalarTypes(AMDIL::IL_ASV2DOUBLE); - ExpandCaseToAllScalarTypes(AMDIL::IL_ASV4CHAR); - ExpandCaseToAllScalarTypes(AMDIL::IL_ASV4SHORT); - ExpandCaseToAllScalarTypes(AMDIL::IL_ASV4INT); - ExpandCaseToAllScalarTypes(AMDIL::IL_ASV4FLOAT); - case AMDIL::INTTOANY_i8: - case AMDIL::INTTOANY_i16: - case AMDIL::INTTOANY_i32: - case AMDIL::INTTOANY_f32: - case AMDIL::DLO: - case AMDIL::LLO: - case AMDIL::LLO_v2i64: - return true; - }; - return false; -} - -bool check_type(const Value *ptr, unsigned int addrspace) { - if (!ptr) { - return false; - } - Type *ptrType = ptr->getType(); - return dyn_cast<PointerType>(ptrType)->getAddressSpace() == addrspace; -} - -size_t getTypeSize(Type * const T, bool dereferencePtr) { - size_t size = 0; - if (!T) { - return size; - } - switch (T->getTypeID()) { - case Type::X86_FP80TyID: - case Type::FP128TyID: - case Type::PPC_FP128TyID: - case Type::LabelTyID: - assert(0 && "These types are not supported by this backend"); - default: - case Type::FloatTyID: - case Type::DoubleTyID: - size = T->getPrimitiveSizeInBits() >> 3; - break; - case Type::PointerTyID: - size = getTypeSize(dyn_cast<PointerType>(T), dereferencePtr); - break; - case Type::IntegerTyID: - size = getTypeSize(dyn_cast<IntegerType>(T), dereferencePtr); - break; - case Type::StructTyID: - size = getTypeSize(dyn_cast<StructType>(T), dereferencePtr); - break; - case Type::ArrayTyID: - size = getTypeSize(dyn_cast<ArrayType>(T), dereferencePtr); - break; - case Type::FunctionTyID: - size = getTypeSize(dyn_cast<FunctionType>(T), dereferencePtr); - break; - case Type::VectorTyID: - size = getTypeSize(dyn_cast<VectorType>(T), dereferencePtr); - break; - }; - return size; -} - -size_t getTypeSize(StructType * const ST, bool dereferencePtr) { - size_t size = 0; - if (!ST) { - return size; - } - Type *curType; - StructType::element_iterator eib; - StructType::element_iterator eie; - for (eib = ST->element_begin(), eie = ST->element_end(); eib != eie; ++eib) { - curType = *eib; - size += getTypeSize(curType, dereferencePtr); - } - return size; -} - -size_t getTypeSize(IntegerType * const IT, bool dereferencePtr) { - return IT ? (IT->getBitWidth() >> 3) : 0; -} - -size_t getTypeSize(FunctionType * const FT, bool dereferencePtr) { - assert(0 && "Should not be able to calculate the size of an function type"); - return 0; -} - -size_t getTypeSize(ArrayType * const AT, bool dereferencePtr) { - return (size_t)(AT ? (getTypeSize(AT->getElementType(), - dereferencePtr) * AT->getNumElements()) - : 0); -} - -size_t getTypeSize(VectorType * const VT, bool dereferencePtr) { - return VT ? (VT->getBitWidth() >> 3) : 0; -} - -size_t getTypeSize(PointerType * const PT, bool dereferencePtr) { - if (!PT) { - return 0; - } - Type *CT = PT->getElementType(); - if (CT->getTypeID() == Type::StructTyID && - PT->getAddressSpace() == AMDILAS::PRIVATE_ADDRESS) { - return getTypeSize(dyn_cast<StructType>(CT)); - } else if (dereferencePtr) { - size_t size = 0; - for (size_t x = 0, y = PT->getNumContainedTypes(); x < y; ++x) { - size += getTypeSize(PT->getContainedType(x), dereferencePtr); - } - return size; - } else { - return 4; - } -} - -size_t getTypeSize(OpaqueType * const OT, bool dereferencePtr) { - //assert(0 && "Should not be able to calculate the size of an opaque type"); - return 4; -} - -size_t getNumElements(Type * const T) { - size_t size = 0; - if (!T) { - return size; - } - switch (T->getTypeID()) { - case Type::X86_FP80TyID: - case Type::FP128TyID: - case Type::PPC_FP128TyID: - case Type::LabelTyID: - assert(0 && "These types are not supported by this backend"); - default: - case Type::FloatTyID: - case Type::DoubleTyID: - size = 1; - break; - case Type::PointerTyID: - size = getNumElements(dyn_cast<PointerType>(T)); - break; - case Type::IntegerTyID: - size = getNumElements(dyn_cast<IntegerType>(T)); - break; - case Type::StructTyID: - size = getNumElements(dyn_cast<StructType>(T)); - break; - case Type::ArrayTyID: - size = getNumElements(dyn_cast<ArrayType>(T)); - break; - case Type::FunctionTyID: - size = getNumElements(dyn_cast<FunctionType>(T)); - break; - case Type::VectorTyID: - size = getNumElements(dyn_cast<VectorType>(T)); - break; - }; - return size; -} - -size_t getNumElements(StructType * const ST) { - size_t size = 0; - if (!ST) { - return size; - } - Type *curType; - StructType::element_iterator eib; - StructType::element_iterator eie; - for (eib = ST->element_begin(), eie = ST->element_end(); - eib != eie; ++eib) { - curType = *eib; - size += getNumElements(curType); - } - return size; -} - -size_t getNumElements(IntegerType * const IT) { - return (!IT) ? 0 : 1; -} - -size_t getNumElements(FunctionType * const FT) { - assert(0 && "Should not be able to calculate the number of " - "elements of a function type"); - return 0; -} - -size_t getNumElements(ArrayType * const AT) { - return (!AT) ? 0 - : (size_t)(getNumElements(AT->getElementType()) * - AT->getNumElements()); -} - -size_t getNumElements(VectorType * const VT) { - return (!VT) ? 0 - : VT->getNumElements() * getNumElements(VT->getElementType()); -} - -size_t getNumElements(PointerType * const PT) { - size_t size = 0; - if (!PT) { - return size; - } - for (size_t x = 0, y = PT->getNumContainedTypes(); x < y; ++x) { - size += getNumElements(PT->getContainedType(x)); - } - return size; -} - -const llvm::Value *getBasePointerValue(const llvm::Value *V) -{ - if (!V) { - return NULL; - } - const Value *ret = NULL; - ValueMap<const Value *, bool> ValueBitMap; - std::queue<const Value *, std::list<const Value *> > ValueQueue; - ValueQueue.push(V); - while (!ValueQueue.empty()) { - V = ValueQueue.front(); - if (ValueBitMap.find(V) == ValueBitMap.end()) { - ValueBitMap[V] = true; - if (dyn_cast<Argument>(V) && dyn_cast<PointerType>(V->getType())) { - ret = V; - break; - } else if (dyn_cast<GlobalVariable>(V)) { - ret = V; - break; - } else if (dyn_cast<Constant>(V)) { - const ConstantExpr *CE = dyn_cast<ConstantExpr>(V); - if (CE) { - ValueQueue.push(CE->getOperand(0)); - } - } else if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) { - ret = AI; - break; - } else if (const Instruction *I = dyn_cast<Instruction>(V)) { - uint32_t numOps = I->getNumOperands(); - for (uint32_t x = 0; x < numOps; ++x) { - ValueQueue.push(I->getOperand(x)); - } - } else { - // assert(0 && "Found a Value that we didn't know how to handle!"); - } - } - ValueQueue.pop(); - } - return ret; -} - -const llvm::Value *getBasePointerValue(const llvm::MachineInstr *MI) { - const Value *moVal = NULL; - if (!MI->memoperands_empty()) { - const MachineMemOperand *memOp = (*MI->memoperands_begin()); - moVal = memOp ? memOp->getValue() : NULL; - moVal = getBasePointerValue(moVal); - } - return moVal; -} - -bool commaPrint(int i, llvm::raw_ostream &O) { - O << ":" << i; - return false; -} - -bool isLoadInst(const llvm::TargetInstrInfo * TII, MachineInstr *MI) { - if (strstr(GET_OPCODE_NAME(TII, MI), "LOADCONST")) { - return false; - } - return strstr(GET_OPCODE_NAME(TII, MI), "LOAD"); -} - -bool isSWSExtLoadInst(MachineInstr *MI) -{ -switch (MI->getOpcode()) { - default: - break; - ExpandCaseToByteShortTypes(AMDIL::LOCALLOAD); - ExpandCaseToByteShortTypes(AMDIL::GLOBALLOAD); - ExpandCaseToByteShortTypes(AMDIL::REGIONLOAD); - ExpandCaseToByteShortTypes(AMDIL::PRIVATELOAD); - ExpandCaseToByteShortTypes(AMDIL::CPOOLLOAD); - ExpandCaseToByteShortTypes(AMDIL::CONSTANTLOAD); - return true; - }; - return false; -} - -bool isExtLoadInst(const llvm::TargetInstrInfo * TII, MachineInstr *MI) { - return strstr(GET_OPCODE_NAME(TII, MI), "EXTLOAD"); -} - -bool isSExtLoadInst(const llvm::TargetInstrInfo * TII, MachineInstr *MI) { - return strstr(GET_OPCODE_NAME(TII, MI), "SEXTLOAD"); -} - -bool isAExtLoadInst(const llvm::TargetInstrInfo * TII, MachineInstr *MI) { - return strstr(GET_OPCODE_NAME(TII, MI), "AEXTLOAD"); -} - -bool isZExtLoadInst(const llvm::TargetInstrInfo * TII, MachineInstr *MI) { - return strstr(GET_OPCODE_NAME(TII, MI), "ZEXTLOAD"); -} - -bool isStoreInst(const llvm::TargetInstrInfo * TII, MachineInstr *MI) { - return strstr(GET_OPCODE_NAME(TII, MI), "STORE"); -} - -bool isTruncStoreInst(const llvm::TargetInstrInfo * TII, MachineInstr *MI) { - return strstr(GET_OPCODE_NAME(TII, MI), "TRUNCSTORE"); -} - -bool isAtomicInst(const llvm::TargetInstrInfo * TII, MachineInstr *MI) { - return strstr(GET_OPCODE_NAME(TII, MI), "ATOM"); -} - -bool isVolatileInst(const llvm::TargetInstrInfo * TII, MachineInstr *MI) { - if (!MI->memoperands_empty()) { - for (MachineInstr::mmo_iterator mob = MI->memoperands_begin(), - moe = MI->memoperands_end(); mob != moe; ++mob) { - // If there is a volatile mem operand, this is a volatile instruction. - if ((*mob)->isVolatile()) { - return true; - } - } - } - return false; -} -bool isGlobalInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI) -{ - return strstr(GET_OPCODE_NAME(TII, MI), "GLOBAL"); -} -bool isPrivateInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI) -{ - return strstr(GET_OPCODE_NAME(TII, MI), "PRIVATE"); -} -bool isConstantInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI) -{ - return strstr(GET_OPCODE_NAME(TII, MI), "CONSTANT") - || strstr(GET_OPCODE_NAME(TII, MI), "CPOOL"); -} -bool isRegionInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI) -{ - return strstr(GET_OPCODE_NAME(TII, MI), "REGION"); -} -bool isLocalInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI) -{ - return strstr(GET_OPCODE_NAME(TII, MI), "LOCAL"); -} -bool isImageInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI) -{ - return strstr(GET_OPCODE_NAME(TII, MI), "IMAGE"); -} -bool isAppendInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI) -{ - return strstr(GET_OPCODE_NAME(TII, MI), "APPEND"); -} -bool isRegionAtomic(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI) -{ - return strstr(GET_OPCODE_NAME(TII, MI), "ATOM_R"); -} -bool isLocalAtomic(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI) -{ - return strstr(GET_OPCODE_NAME(TII, MI), "ATOM_L"); -} -bool isGlobalAtomic(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI) -{ - return strstr(GET_OPCODE_NAME(TII, MI), "ATOM_G") - || isArenaAtomic(TII, MI); -} -bool isArenaAtomic(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI) -{ - return strstr(GET_OPCODE_NAME(TII, MI), "ATOM_A"); -} - -const char* getSrcSwizzle(unsigned idx) { - const char *srcSwizzles[] = { - "", ".x000", ".0x00", ".00x0", ".000x", ".y000", ".0y00", ".00y0", ".000y", - ".z000", ".0z00", ".00z0", ".000z", ".w000", ".0w00", ".00w0", ".000w", - ".xy00", ".00xy", ".zw00", ".00zw", ".xyz0", ".0xyz", ".xyzw", ".0000", - ".xxxx", ".yyyy", ".zzzz", ".wwww", ".xyxy", ".zwzw", ".xzxz", ".ywyw", - ".x0y0", ".0x0y", ".xy_neg(y)", "_neg(yw)", "_neg(x)", ".xy_neg(xy)", - "_neg(xyzw)", ".0yzw", ".x0zw", ".xy0w", ".x", ".y", ".z", ".w", ".xy", - ".zw" - }; - assert(idx < sizeof(srcSwizzles)/sizeof(srcSwizzles[0]) - && "Idx passed in is invalid!"); - return srcSwizzles[idx]; -} -const char* getDstSwizzle(unsigned idx) { - const char *dstSwizzles[] = { - "", ".x___", ".xy__", ".xyz_", ".xyzw", "._y__", "._yz_", "._yzw", ".__z_", - ".__zw", ".___w", ".x_zw", ".xy_w", ".x_z_", ".x__w", "._y_w", - }; - assert(idx < sizeof(dstSwizzles)/sizeof(dstSwizzles[0]) - && "Idx passed in is invalid!"); - return dstSwizzles[idx]; -} -/// Helper function to get the currently set flags -void getAsmPrinterFlags(MachineInstr *MI, AMDILAS::InstrResEnc &curRes) -{ - // We need 16 bits of information, but LLVMr127097 cut the field in half. - // So we have to use two different fields to store all of our information. - uint16_t upper = MI->getFlags() << 8; - uint16_t lower = MI->getAsmPrinterFlags(); - curRes.u16all = upper | lower; -} -/// Helper function to clear the currently set flags and add the new flags. -void setAsmPrinterFlags(MachineInstr *MI, AMDILAS::InstrResEnc &curRes) -{ - // We need 16 bits of information, but LLVMr127097 cut the field in half. - // So we have to use two different fields to store all of our information. - MI->clearAsmPrinterFlags(); - MI->setFlags(0); - uint8_t lower = curRes.u16all & 0xFF; - uint8_t upper = (curRes.u16all >> 8) & 0xFF; - MI->setFlags(upper); - MI->setAsmPrinterFlag((llvm::MachineInstr::CommentFlag)lower); -} diff --git a/src/gallium/drivers/radeon/AMDILUtilityFunctions.h b/src/gallium/drivers/radeon/AMDILUtilityFunctions.h index 637c868b55c..66af706bbb3 100644 --- a/src/gallium/drivers/radeon/AMDILUtilityFunctions.h +++ b/src/gallium/drivers/radeon/AMDILUtilityFunctions.h @@ -7,191 +7,12 @@ // //==-----------------------------------------------------------------------===// // -// This file provides declarations for functions that are used across different -// classes and provide various conversions or utility to shorten the code +// This file provides helper macros for expanding case statements. // //===----------------------------------------------------------------------===// #ifndef AMDILUTILITYFUNCTIONS_H_ #define AMDILUTILITYFUNCTIONS_H_ -#include "AMDIL.h" -#include "AMDILTargetMachine.h" -#include "llvm/ADT/SmallVector.h" - -// Utility functions from ID -// -namespace llvm { -class TargetRegisterClass; -class SDValue; -class SDNode; -class Value; -class Type; -class StructType; -class IntegerType; -class FunctionType; -class VectorType; -class ArrayType; -class PointerType; -class OpaqueType; -class MachineInstr; - -} -enum SrcSwizzles { - AMDIL_SRC_SWIZZLE_DEFAULT = 0, - AMDIL_SRC_SWIZZLE_X000, - AMDIL_SRC_SWIZZLE_0X00, - AMDIL_SRC_SWIZZLE_00X0, - AMDIL_SRC_SWIZZLE_000X, - AMDIL_SRC_SWIZZLE_Y000, - AMDIL_SRC_SWIZZLE_0Y00, - AMDIL_SRC_SWIZZLE_00Y0, - AMDIL_SRC_SWIZZLE_000Y, - AMDIL_SRC_SWIZZLE_Z000, - AMDIL_SRC_SWIZZLE_0Z00, - AMDIL_SRC_SWIZZLE_00Z0, - AMDIL_SRC_SWIZZLE_000Z, - AMDIL_SRC_SWIZZLE_W000, - AMDIL_SRC_SWIZZLE_0W00, - AMDIL_SRC_SWIZZLE_00W0, - AMDIL_SRC_SWIZZLE_000W, - AMDIL_SRC_SWIZZLE_XY00, - AMDIL_SRC_SWIZZLE_00XY, - AMDIL_SRC_SWIZZLE_ZW00, - AMDIL_SRC_SWIZZLE_00ZW, - AMDIL_SRC_SWIZZLE_XYZ0, - AMDIL_SRC_SWIZZLE_0XYZ, - AMDIL_SRC_SWIZZLE_XYZW, - AMDIL_SRC_SWIZZLE_0000, - AMDIL_SRC_SWIZZLE_XXXX, - AMDIL_SRC_SWIZZLE_YYYY, - AMDIL_SRC_SWIZZLE_ZZZZ, - AMDIL_SRC_SWIZZLE_WWWW, - AMDIL_SRC_SWIZZLE_XYXY, - AMDIL_SRC_SWIZZLE_ZWZW, - AMDIL_SRC_SWIZZLE_XZXZ, - AMDIL_SRC_SWIZZLE_YWYW, - AMDIL_SRC_SWIZZLE_X0Y0, - AMDIL_SRC_SWIZZLE_0X0Y, - AMDIL_SRC_SWIZZLE_XY_NEGY, - AMDIL_SRC_SWIZZLE_NEGYW, - AMDIL_SRC_SWIZZLE_NEGX, - AMDIL_SRC_SWIZZLE_XY_NEGXY, - AMDIL_SRC_SWIZZLE_NEG_XYZW, - AMDIL_SRC_SWIZZLE_0YZW, - AMDIL_SRC_SWIZZLE_X0ZW, - AMDIL_SRC_SWIZZLE_XY0W, - AMDIL_SRC_SWIZZLE_X, - AMDIL_SRC_SWIZZLE_Y, - AMDIL_SRC_SWIZZLE_Z, - AMDIL_SRC_SWIZZLE_W, - AMDIL_SRC_SWIZZLE_XY, - AMDIL_SRC_SWIZZLE_ZW, - AMDIL_SRC_SWIZZLE_LAST -}; -enum DstSwizzles { - AMDIL_DST_SWIZZLE_DEFAULT = 0, - AMDIL_DST_SWIZZLE_X___, - AMDIL_DST_SWIZZLE_XY__, - AMDIL_DST_SWIZZLE_XYZ_, - AMDIL_DST_SWIZZLE_XYZW, - AMDIL_DST_SWIZZLE__Y__, - AMDIL_DST_SWIZZLE__YZ_, - AMDIL_DST_SWIZZLE__YZW, - AMDIL_DST_SWIZZLE___Z_, - AMDIL_DST_SWIZZLE___ZW, - AMDIL_DST_SWIZZLE____W, - AMDIL_DST_SWIZZLE_X_ZW, - AMDIL_DST_SWIZZLE_XY_W, - AMDIL_DST_SWIZZLE_X_Z_, - AMDIL_DST_SWIZZLE_X__W, - AMDIL_DST_SWIZZLE__Y_W, - AMDIL_DST_SWIZZLE_LAST -}; -// Function to get the correct src swizzle string from ID -const char *getSrcSwizzle(unsigned); - -// Function to get the correct dst swizzle string from ID -const char *getDstSwizzle(unsigned); - -const llvm::TargetRegisterClass *getRegClassFromID(unsigned int ID); - -unsigned int getMoveInstFromID(unsigned int ID); -unsigned int getPHIMoveInstFromID(unsigned int ID); - -// Utility functions from Type. -const llvm::TargetRegisterClass *getRegClassFromType(unsigned int type); -unsigned int getTargetIndependentMoveFromType(unsigned int type); - -// Debug functions for SDNode and SDValue. -void printSDValue(const llvm::SDValue &Op, int level); -void printSDNode(const llvm::SDNode *N); - -// Functions to check if an opcode is a specific type. -bool isMove(unsigned int opcode); -bool isPHIMove(unsigned int opcode); -bool isMoveOrEquivalent(unsigned int opcode); - -// Function to check address space -bool check_type(const llvm::Value *ptr, unsigned int addrspace); - -// Group of functions that recursively calculate the size of a structure based -// on it's sub-types. -size_t getTypeSize(llvm::Type * const T, bool dereferencePtr = false); -size_t -getTypeSize(llvm::StructType * const ST, bool dereferencePtr = false); -size_t -getTypeSize(llvm::IntegerType * const IT, bool dereferencePtr = false); -size_t -getTypeSize(llvm::FunctionType * const FT, bool dereferencePtr = false); -size_t -getTypeSize(llvm::ArrayType * const AT, bool dereferencePtr = false); -size_t -getTypeSize(llvm::VectorType * const VT, bool dereferencePtr = false); -size_t -getTypeSize(llvm::PointerType * const PT, bool dereferencePtr = false); -size_t -getTypeSize(llvm::OpaqueType * const OT, bool dereferencePtr = false); - -// Group of functions that recursively calculate the number of elements of a -// structure based on it's sub-types. -size_t getNumElements(llvm::Type * const T); -size_t getNumElements(llvm::StructType * const ST); -size_t getNumElements(llvm::IntegerType * const IT); -size_t getNumElements(llvm::FunctionType * const FT); -size_t getNumElements(llvm::ArrayType * const AT); -size_t getNumElements(llvm::VectorType * const VT); -size_t getNumElements(llvm::PointerType * const PT); -size_t getNumElements(llvm::OpaqueType * const OT); -const llvm::Value *getBasePointerValue(const llvm::Value *V); -const llvm::Value *getBasePointerValue(const llvm::MachineInstr *MI); - - -int64_t GET_SCALAR_SIZE(llvm::Type* A); - -// Helper functions that check the opcode for status information -bool isLoadInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI); -bool isExtLoadInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI); -bool isSWSExtLoadInst(llvm::MachineInstr *MI); -bool isSExtLoadInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI); -bool isZExtLoadInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI); -bool isAExtLoadInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI); -bool isStoreInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI); -bool isTruncStoreInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI); -bool isAtomicInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI); -bool isVolatileInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI); -bool isGlobalInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI); -bool isPrivateInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI); -bool isConstantInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI); -bool isRegionInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI); -bool isLocalInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI); -bool isImageInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI); -bool isAppendInst(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI); -bool isRegionAtomic(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI); -bool isLocalAtomic(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI); -bool isGlobalAtomic(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI); -bool isArenaAtomic(const llvm::TargetInstrInfo * TII, llvm::MachineInstr *MI); - - // Macros that are used to help with switch statements for various data types // However, these macro's do not return anything unlike the second set below. #define ExpandCaseTo32bitIntTypes(Instr) \ @@ -354,9 +175,4 @@ case Instr##_v4f32: \ case Instr##_v2i64: \ case Instr##_v2f64: -bool commaPrint(int i, llvm::raw_ostream &O); -/// Helper function to get the currently get/set flags. -void getAsmPrinterFlags(llvm::MachineInstr *MI, llvm::AMDILAS::InstrResEnc &curRes); -void setAsmPrinterFlags(llvm::MachineInstr *MI, llvm::AMDILAS::InstrResEnc &curRes); - #endif // AMDILUTILITYFUNCTIONS_H_ diff --git a/src/gallium/drivers/radeon/AMDILVersion.td b/src/gallium/drivers/radeon/AMDILVersion.td index b8b02608d3b..d863b068131 100644 --- a/src/gallium/drivers/radeon/AMDILVersion.td +++ b/src/gallium/drivers/radeon/AMDILVersion.td @@ -1,4 +1,4 @@ -//===-- AMDILVersion.td - TODO: Add brief description -------===// +//===-- AMDILVersion.td - Barrier Instruction/Intrinsic definitions------===// // // The LLVM Compiler Infrastructure // diff --git a/src/gallium/drivers/radeon/Makefile b/src/gallium/drivers/radeon/Makefile index 807dc781c7c..cc409645a6e 100644 --- a/src/gallium/drivers/radeon/Makefile +++ b/src/gallium/drivers/radeon/Makefile @@ -18,6 +18,8 @@ CXXFLAGS := $(filter-out -DDEBUG, $(CXXFLAGS)) tablegen = $(TBLGEN) -I $(LLVM_INCLUDEDIR) $1 $2 -o $3 +HAVE_LLVM_INTRINSICS = $(shell grep IntrinsicsR600.td $(LLVM_INCLUDEDIR)/llvm/Intrinsics.td) + gen: $(GENERATED_SOURCES) SIRegisterInfo.td: SIGenRegisterInfo.pl @@ -26,9 +28,13 @@ SIRegisterInfo.td: SIGenRegisterInfo.pl SIRegisterGetHWRegNum.inc: SIGenRegisterInfo.pl $(PERL) $^ $@ > /dev/null -R600ShaderPatterns.td: AMDGPUGenShaderPatterns.pl - $(PERL) $^ C > $@ - +R600Intrinsics.td: R600IntrinsicsNoOpenCL.td R600IntrinsicsOpenCL.td +ifeq ($(HAVE_LLVM_INTRINSICS),) + cp R600IntrinsicsNoOpenCL.td R600Intrinsics.td +else + cp R600IntrinsicsOpenCL.td R600Intrinsics.td +endif + R600RegisterInfo.td: R600GenRegisterInfo.pl $(PERL) $^ > $@ diff --git a/src/gallium/drivers/radeon/Makefile.sources b/src/gallium/drivers/radeon/Makefile.sources index 7d2932b4dbd..6dc62320f40 100644 --- a/src/gallium/drivers/radeon/Makefile.sources +++ b/src/gallium/drivers/radeon/Makefile.sources @@ -1,6 +1,6 @@ GENERATED_SOURCES := \ - R600ShaderPatterns.td \ + R600Intrinsics.td \ R600RegisterInfo.td \ AMDGPUInstrEnums.td \ SIRegisterInfo.td \ @@ -29,20 +29,16 @@ CPP_SOURCES := \ AMDILISelDAGToDAG.cpp \ AMDILISelLowering.cpp \ AMDILMachinePeephole.cpp \ - AMDILMCCodeEmitter.cpp \ AMDILNIDevice.cpp \ AMDILPeepholeOptimizer.cpp \ AMDILRegisterInfo.cpp \ AMDILSIDevice.cpp \ AMDILSubtarget.cpp \ AMDILTargetMachine.cpp \ - AMDILUtilityFunctions.cpp \ AMDGPUTargetMachine.cpp \ AMDGPUISelLowering.cpp \ AMDGPUConvertToISA.cpp \ AMDGPULowerInstructions.cpp \ - AMDGPULowerShaderInstructions.cpp \ - AMDGPUReorderPreloadInstructions.cpp \ AMDGPUInstrInfo.cpp \ AMDGPURegisterInfo.cpp \ AMDGPUUtil.cpp \ @@ -51,13 +47,12 @@ CPP_SOURCES := \ R600InstrInfo.cpp \ R600KernelParameters.cpp \ R600LowerInstructions.cpp \ - R600LowerShaderInstructions.cpp \ + R600MachineFunctionInfo.cpp \ R600RegisterInfo.cpp \ SIAssignInterpRegs.cpp \ SICodeEmitter.cpp \ SIInstrInfo.cpp \ SIISelLowering.cpp \ - SILowerShaderInstructions.cpp \ SIMachineFunctionInfo.cpp \ SIPropagateImmReads.cpp \ SIRegisterInfo.cpp \ diff --git a/src/gallium/drivers/radeon/R600CodeEmitter.cpp b/src/gallium/drivers/radeon/R600CodeEmitter.cpp index 8faf0deb8c5..421562255f6 100644 --- a/src/gallium/drivers/radeon/R600CodeEmitter.cpp +++ b/src/gallium/drivers/radeon/R600CodeEmitter.cpp @@ -1,4 +1,4 @@ -//===-- R600CodeEmitter.cpp - TODO: Add brief description -------===// +//===-- R600CodeEmitter.cpp - Code Emitter for R600->Cayman GPU families --===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,12 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// This code emitters outputs bytecode that is understood by the r600g driver +// in the Mesa [1] project. The bytecode is very similar to the hardware's ISA, +// except that the size of the instruction fields are rounded up to the +// nearest byte. +// +// [1] http://www.mesa3d.org/ // //===----------------------------------------------------------------------===// @@ -44,8 +49,9 @@ namespace { const R600RegisterInfo * TRI; bool evergreenEncoding; + bool isCube; bool isReduction; - unsigned reductionElement; + unsigned currentElement; bool isLast; unsigned section_start; @@ -53,7 +59,7 @@ namespace { public: R600CodeEmitter(formatted_raw_ostream &OS) : MachineFunctionPass(ID), - _OS(OS), TM(NULL), evergreenEncoding(false), isReduction(false), + _OS(OS), TM(NULL), evergreenEncoding(false), isCube(false), isReduction(false), isLast(true) { } const char *getPassName() const { return "AMDGPU Machine Code Emitter"; } @@ -65,7 +71,7 @@ namespace { private: void emitALUInstr(MachineInstr &MI); - void emitSrc(const MachineOperand & MO); + void emitSrc(const MachineOperand & MO, int chan_override = -1); void emitDst(const MachineOperand & MO); void emitALU(MachineInstr &MI, unsigned numSrc); void emitTexInstr(MachineInstr &MI); @@ -155,10 +161,8 @@ bool R600CodeEmitter::runOnMachineFunction(MachineFunction &MF) { } else { evergreenEncoding = true; } - const AMDGPUTargetMachine *amdtm = - static_cast<const AMDGPUTargetMachine *>(&MF.getTarget()); - if (amdtm->shouldDumpCode()) { + if (STM.dumpCode()) { MF.dump(); } @@ -171,18 +175,26 @@ bool R600CodeEmitter::runOnMachineFunction(MachineFunction &MF) { if (MI.getNumOperands() > 1 && MI.getOperand(0).isReg() && MI.getOperand(0).isDead()) { continue; } - if (isTexOp(MI.getOpcode())) { + if (AMDGPU::isTexOp(MI.getOpcode())) { emitTexInstr(MI); - } else if (isFCOp(MI.getOpcode())){ + } else if (AMDGPU::isFCOp(MI.getOpcode())){ emitFCInstr(MI); - } else if (isReductionOp(MI.getOpcode())) { + } else if (AMDGPU::isReductionOp(MI.getOpcode())) { isReduction = true; isLast = false; - for (reductionElement = 0; reductionElement < 4; reductionElement++) { - isLast = (reductionElement == 3); + for (currentElement = 0; currentElement < 4; currentElement++) { + isLast = (currentElement == 3); emitALUInstr(MI); } isReduction = false; + } else if (AMDGPU::isCubeOp(MI.getOpcode())) { + isCube = true; + isLast = false; + for (currentElement = 0; currentElement < 4; currentElement++) { + isLast = (currentElement == 3); + emitALUInstr(MI); + } + isCube = false; } else if (MI.getOpcode() == AMDIL::RETURN || MI.getOpcode() == AMDIL::BUNDLE || MI.getOpcode() == AMDIL::KILL) { @@ -191,12 +203,7 @@ bool R600CodeEmitter::runOnMachineFunction(MachineFunction &MF) { switch(MI.getOpcode()) { case AMDIL::RAT_WRITE_CACHELESS_eg: { - /* XXX: Support for autoencoding 64-bit instructions was added - * in LLVM 3.1. Until we drop support for 3.0, we will use Magic - * numbers for the high bits. */ - uint64_t high = 0x95c0100000000000; uint64_t inst = getBinaryCodeForInstr(MI); - inst |= high; /* Set End Of Program bit */ /* XXX: Need better check of end of program. EOP should be * encoded in one of the operands of the MI, and it should be @@ -286,7 +293,7 @@ void R600CodeEmitter::emitALUInstr(MachineInstr &MI) /* Some instructions are just place holder instructions that represent * operations that the GPU does automatically. They should be ignored. */ - if (isPlaceHolderOpcode(MI.getOpcode())) { + if (AMDGPU::isPlaceHolderOpcode(MI.getOpcode())) { return; } @@ -309,18 +316,25 @@ void R600CodeEmitter::emitALUInstr(MachineInstr &MI) /* Emit instruction type */ emitByte(0); - unsigned int opIndex; - for (opIndex = 1; opIndex < numOperands; opIndex++) { - /* Literal constants are always stored as the last operand. */ - if (MI.getOperand(opIndex).isImm() || MI.getOperand(opIndex).isFPImm()) { - break; + if (isCube) { + static const int cube_src_swz[] = {2, 2, 0, 1}; + emitSrc(MI.getOperand(1), cube_src_swz[currentElement]); + emitSrc(MI.getOperand(1), cube_src_swz[3-currentElement]); + emitNullBytes(SRC_BYTE_COUNT); + } else { + unsigned int opIndex; + for (opIndex = 1; opIndex < numOperands; opIndex++) { + /* Literal constants are always stored as the last operand. */ + if (MI.getOperand(opIndex).isImm() || MI.getOperand(opIndex).isFPImm()) { + break; + } + emitSrc(MI.getOperand(opIndex)); } - emitSrc(MI.getOperand(opIndex)); - } /* Emit zeros for unused sources */ - for ( ; opIndex < 4; opIndex++) { - emitNullBytes(SRC_BYTE_COUNT); + for ( ; opIndex < 4; opIndex++) { + emitNullBytes(SRC_BYTE_COUNT); + } } emitDst(dstOp); @@ -328,7 +342,7 @@ void R600CodeEmitter::emitALUInstr(MachineInstr &MI) emitALU(MI, numOperands - 1); } -void R600CodeEmitter::emitSrc(const MachineOperand & MO) +void R600CodeEmitter::emitSrc(const MachineOperand & MO, int chan_override /* = -1 */) { uint32_t value = 0; /* Emit the source select (2 bytes). For GPRs, this is the register index. @@ -354,8 +368,10 @@ void R600CodeEmitter::emitSrc(const MachineOperand & MO) } /* Emit the source channel (1 byte) */ - if (isReduction) { - emitByte(reductionElement); + if (chan_override != -1) { + emitByte(chan_override); + } else if (isReduction) { + emitByte(currentElement); } else if (MO.isReg()) { emitByte(TRI->getHWRegChan(MO.getReg())); } else { @@ -397,8 +413,8 @@ void R600CodeEmitter::emitDst(const MachineOperand & MO) emitByte(getHWReg(MO.getReg())); /* Emit the element of the destination register (1 byte)*/ - if (isReduction) { - emitByte(reductionElement); + if (isReduction || isCube) { + emitByte(currentElement); } else { emitByte(TRI->getHWRegChan(MO.getReg())); } @@ -411,7 +427,7 @@ void R600CodeEmitter::emitDst(const MachineOperand & MO) } /* Emit writemask (1 byte). */ - if ((isReduction && reductionElement != TRI->getHWRegChan(MO.getReg())) + if ((isReduction && currentElement != TRI->getHWRegChan(MO.getReg())) || MO.getTargetFlags() & MO_FLAG_MASK) { emitByte(0); } else { @@ -570,6 +586,7 @@ void R600CodeEmitter::emitFCInstr(MachineInstr &MI) case AMDIL::BREAK_LOGICALZ_f32: instr = FC_BREAK; break; + case AMDIL::BREAK_LOGICALNZ_f32: case AMDIL::BREAK_LOGICALNZ_i32: instr = FC_BREAK_NZ_INT; break; @@ -577,6 +594,7 @@ void R600CodeEmitter::emitFCInstr(MachineInstr &MI) instr = FC_BREAK_Z_INT; break; case AMDIL::CONTINUE_LOGICALNZ_f32: + case AMDIL::CONTINUE_LOGICALNZ_i32: instr = FC_CONTINUE; break; /* XXX: This assumes that all IFs will be if (x != 0). If we add @@ -706,44 +724,5 @@ RegElement maskBitToElement(unsigned int maskBit) } } -unsigned int dstSwizzleToWriteMask(unsigned swizzle) -{ - switch(swizzle) { - default: - case AMDIL_DST_SWIZZLE_DEFAULT: - return WRITE_MASK_X | WRITE_MASK_Y | WRITE_MASK_Z | WRITE_MASK_W; - case AMDIL_DST_SWIZZLE_X___: - return WRITE_MASK_X; - case AMDIL_DST_SWIZZLE_XY__: - return WRITE_MASK_X | WRITE_MASK_Y; - case AMDIL_DST_SWIZZLE_XYZ_: - return WRITE_MASK_X | WRITE_MASK_Y | WRITE_MASK_Z; - case AMDIL_DST_SWIZZLE_XYZW: - return WRITE_MASK_X | WRITE_MASK_Y | WRITE_MASK_Z | WRITE_MASK_W; - case AMDIL_DST_SWIZZLE__Y__: - return WRITE_MASK_Y; - case AMDIL_DST_SWIZZLE__YZ_: - return WRITE_MASK_Y | WRITE_MASK_Z; - case AMDIL_DST_SWIZZLE__YZW: - return WRITE_MASK_Y | WRITE_MASK_Z | WRITE_MASK_W; - case AMDIL_DST_SWIZZLE___Z_: - return WRITE_MASK_Z; - case AMDIL_DST_SWIZZLE___ZW: - return WRITE_MASK_Z | WRITE_MASK_W; - case AMDIL_DST_SWIZZLE____W: - return WRITE_MASK_W; - case AMDIL_DST_SWIZZLE_X_ZW: - return WRITE_MASK_X | WRITE_MASK_Z | WRITE_MASK_W; - case AMDIL_DST_SWIZZLE_XY_W: - return WRITE_MASK_X | WRITE_MASK_Y | WRITE_MASK_W; - case AMDIL_DST_SWIZZLE_X_Z_: - return WRITE_MASK_X | WRITE_MASK_Z; - case AMDIL_DST_SWIZZLE_X__W: - return WRITE_MASK_X | WRITE_MASK_W; - case AMDIL_DST_SWIZZLE__Y_W: - return WRITE_MASK_Y | WRITE_MASK_W; - } -} - #include "AMDILGenCodeEmitter.inc" diff --git a/src/gallium/drivers/radeon/R600GenRegisterInfo.pl b/src/gallium/drivers/radeon/R600GenRegisterInfo.pl index cbded115766..406f3dfdd39 100644 --- a/src/gallium/drivers/radeon/R600GenRegisterInfo.pl +++ b/src/gallium/drivers/radeon/R600GenRegisterInfo.pl @@ -1,20 +1,23 @@ -#===-- R600GenRegisterInfo.pl - TODO: Add brief description -------===# +#===-- R600GenRegisterInfo.pl - Script for generating register info files --===# # # The LLVM Compiler Infrastructure # # This file is distributed under the University of Illinois Open Source # License. See LICENSE.TXT for details. # -#===----------------------------------------------------------------------===# +#===------------------------------------------------------------------------===# # -# TODO: Add full description +# This perl script prints to stdout .td code to be used as R600RegisterInfo.td +# it also generates a file called R600HwRegInfo.include, which contains helper +# functions for determining the hw encoding of registers. # -#===----------------------------------------------------------------------===# +#===------------------------------------------------------------------------===# use strict; use warnings; -use AMDGPUConstants; +use constant CONST_REG_COUNT => 256; +use constant TEMP_REG_COUNT => 128; my $CREG_MAX = CONST_REG_COUNT - 1; my $TREG_MAX = TEMP_REG_COUNT - 1; @@ -81,7 +84,7 @@ def R600_Reg32 : RegisterClass <"AMDIL", [f32, i32], 32, (add R600_CReg32, ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF)>; -def R600_Reg128 : RegisterClass<"AMDIL", [v4f32], 128, (add +def R600_Reg128 : RegisterClass<"AMDIL", [v4f32, v4i32], 128, (add $t128_string)> { let SubRegClasses = [(R600_TReg32 sel_x, sel_y, sel_z, sel_w)]; @@ -170,3 +173,24 @@ sub print_reg_defs { return @reg_list; } +#Helper functions +sub get_hw_index { + my ($index) = @_; + return int($index / 4); +} + +sub get_chan_str { + my ($index) = @_; + my $chan = $index % 4; + if ($chan == 0 ) { + return 'X'; + } elsif ($chan == 1) { + return 'Y'; + } elsif ($chan == 2) { + return 'Z'; + } elsif ($chan == 3) { + return 'W'; + } else { + die("Unknown chan value: $chan"); + } +} diff --git a/src/gallium/drivers/radeon/R600ISelLowering.cpp b/src/gallium/drivers/radeon/R600ISelLowering.cpp index f92fe2641a5..e85ac31b34c 100644 --- a/src/gallium/drivers/radeon/R600ISelLowering.cpp +++ b/src/gallium/drivers/radeon/R600ISelLowering.cpp @@ -1,4 +1,4 @@ -//===-- R600ISelLowering.cpp - TODO: Add brief description -------===// +//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===// // // The LLVM Compiler Infrastructure // @@ -7,12 +7,14 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// Most of the DAG lowering is handled in AMDILISelLowering.cpp. This file +// is mostly EmitInstrWithCustomInserter(). // //===----------------------------------------------------------------------===// #include "R600ISelLowering.h" #include "R600InstrInfo.h" +#include "R600MachineFunctionInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" using namespace llvm; @@ -25,9 +27,13 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) : // setSchedulingPreference(Sched::VLIW); addRegisterClass(MVT::v4f32, &AMDIL::R600_Reg128RegClass); addRegisterClass(MVT::f32, &AMDIL::R600_Reg32RegClass); + addRegisterClass(MVT::v4i32, &AMDIL::R600_Reg128RegClass); + addRegisterClass(MVT::i32, &AMDIL::R600_Reg32RegClass); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Legal); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Legal); } MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( @@ -35,10 +41,10 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( { MachineFunction * MF = BB->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); + MachineBasicBlock::iterator I = *MI; switch (MI->getOpcode()) { default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); - /* XXX: Use helper function from AMDGPULowerShaderInstructions here */ case AMDIL::TGID_X: addLiveIn(MI, MF, MRI, TII, AMDIL::T1_X); break; @@ -84,7 +90,49 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( case AMDIL::LOCAL_SIZE_Z: lowerImplicitParameter(MI, *BB, MRI, 8); break; + + case AMDIL::R600_LOAD_CONST: + { + int64_t RegIndex = MI->getOperand(1).getImm(); + unsigned ConstantReg = AMDIL::R600_CReg32RegClass.getRegister(RegIndex); + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDIL::COPY)) + .addOperand(MI->getOperand(0)) + .addReg(ConstantReg); + break; + } + + case AMDIL::LOAD_INPUT: + { + int64_t RegIndex = MI->getOperand(1).getImm(); + addLiveIn(MI, MF, MRI, TII, + AMDIL::R600_TReg32RegClass.getRegister(RegIndex)); + break; + } + case AMDIL::STORE_OUTPUT: + { + int64_t OutputIndex = MI->getOperand(1).getImm(); + unsigned OutputReg = AMDIL::R600_TReg32RegClass.getRegister(OutputIndex); + + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDIL::COPY), OutputReg) + .addOperand(MI->getOperand(0)); + + if (!MRI.isLiveOut(OutputReg)) { + MRI.addLiveOut(OutputReg); + } + break; + } + + case AMDIL::RESERVE_REG: + { + R600MachineFunctionInfo * MFI = MF->getInfo<R600MachineFunctionInfo>(); + int64_t ReservedIndex = MI->getOperand(0).getImm(); + unsigned ReservedReg = + AMDIL::R600_TReg32RegClass.getRegister(ReservedIndex); + MFI->ReservedRegs.push_back(ReservedReg); + break; + } } + MI->eraseFromParent(); return BB; } diff --git a/src/gallium/drivers/radeon/R600ISelLowering.h b/src/gallium/drivers/radeon/R600ISelLowering.h index fd26bf538c4..fdd552a172d 100644 --- a/src/gallium/drivers/radeon/R600ISelLowering.h +++ b/src/gallium/drivers/radeon/R600ISelLowering.h @@ -1,4 +1,4 @@ -//===-- R600ISelLowering.h - TODO: Add brief description -------===// +//===-- R600ISelLowering.h - R600 DAG Lowering Interface -*- C++ -*--------===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// R600 DAG Lowering interface definition // //===----------------------------------------------------------------------===// diff --git a/src/gallium/drivers/radeon/R600InstrInfo.cpp b/src/gallium/drivers/radeon/R600InstrInfo.cpp index 0c7ffc4334d..2bd59fd5e1b 100644 --- a/src/gallium/drivers/radeon/R600InstrInfo.cpp +++ b/src/gallium/drivers/radeon/R600InstrInfo.cpp @@ -1,4 +1,4 @@ -//===-- R600InstrInfo.cpp - TODO: Add brief description -------===// +//===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// R600 Implementation of TargetInstrInfo. // //===----------------------------------------------------------------------===// @@ -73,10 +73,22 @@ unsigned R600InstrInfo::getISAOpcode(unsigned opcode) const case AMDIL::MOVE_i32: return AMDIL::MOV; case AMDIL::SHR_i32: + return getASHRop(); + case AMDIL::USHR_i32: return getLSHRop(); } } +unsigned R600InstrInfo::getASHRop() const +{ + unsigned gen = TM.getSubtarget<AMDILSubtarget>().device()->getGeneration(); + if (gen < AMDILDeviceInfo::HD5XXX) { + return AMDIL::ASHR_r600; + } else { + return AMDIL::ASHR_eg; + } +} + unsigned R600InstrInfo::getLSHRop() const { unsigned gen = TM.getSubtarget<AMDILSubtarget>().device()->getGeneration(); diff --git a/src/gallium/drivers/radeon/R600InstrInfo.h b/src/gallium/drivers/radeon/R600InstrInfo.h index aedaa9f47f3..014eeb0b9f7 100644 --- a/src/gallium/drivers/radeon/R600InstrInfo.h +++ b/src/gallium/drivers/radeon/R600InstrInfo.h @@ -1,4 +1,4 @@ -//===-- R600InstrInfo.h - TODO: Add brief description -------===// +//===-- R600InstrInfo.h - R600 Instruction Info Interface -------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// Interface definition for R600InstrInfo // //===----------------------------------------------------------------------===// @@ -52,6 +52,7 @@ namespace llvm { bool isTrig(const MachineInstr &MI) const; unsigned getLSHRop() const; + unsigned getASHRop() const; unsigned getMULHI_UINT() const; unsigned getMULLO_UINT() const; unsigned getRECIP_UINT() const; diff --git a/src/gallium/drivers/radeon/R600Instructions.td b/src/gallium/drivers/radeon/R600Instructions.td index 02043fdeea5..a18240f09bd 100644 --- a/src/gallium/drivers/radeon/R600Instructions.td +++ b/src/gallium/drivers/radeon/R600Instructions.td @@ -1,4 +1,4 @@ -//===-- R600Instructions.td - TODO: Add brief description -------===// +//===-- R600Instructions.td - R600 Instruction defs -------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// R600 Tablegen instruction definitions // //===----------------------------------------------------------------------===// @@ -84,7 +84,7 @@ class R600_3OP <bits<32> inst, string opName, list<dag> pattern, InstR600 <inst, (outs R600_Reg32:$dst), (ins R600_Reg32:$src0, R600_Reg32:$src1, R600_Reg32:$src2, variable_ops), - !strconcat(opName, "$dst $src0, $src1, $src2"), + !strconcat(opName, " $dst, $src0, $src1, $src2"), pattern, itin>{ @@ -92,7 +92,7 @@ class R600_3OP <bits<32> inst, string opName, list<dag> pattern, } class R600_REDUCTION <bits<32> inst, dag ins, string asm, list<dag> pattern, - InstrItinClass itin = AnyALU> : + InstrItinClass itin = VecALU> : InstR600 <inst, (outs R600_Reg32:$dst), ins, @@ -152,8 +152,6 @@ class EG_CF_RAT <bits <8> cf_inst, bits <6> rat_inst, dag outs, dag ins, let Inst{31-30} = ELEM_SIZE; /* CF_ALLOC_EXPORT_WORD1_BUF */ -/* XXX: We can't have auto encoding of 64-bit instructions until LLVM 3.1 :( */ -/* let Inst{43-32} = ARRAY_SIZE; let Inst{47-44} = COMP_MASK; let Inst{51-48} = BURST_COUNT; @@ -162,7 +160,6 @@ class EG_CF_RAT <bits <8> cf_inst, bits <6> rat_inst, dag outs, dag ins, let Inst{61-54} = cf_inst; let Inst{62} = MARK; let Inst{63} = BARRIER; -*/ } /* @@ -311,6 +308,18 @@ def TRUNC : R600_1OP < [(set R600_Reg32:$dst, (int_AMDGPU_trunc R600_Reg32:$src))] >; +def CEIL : R600_1OP < + 0x12, "CEIL", + [(set R600_Reg32:$dst, (int_AMDIL_round_neginf R600_Reg32:$src))]> { + let AMDILOp = AMDILInst.ROUND_NEGINF_f32; +} + +def RNDNE : R600_1OP < + 0x13, "RNDNE", + [(set R600_Reg32:$dst, (int_AMDIL_round_nearest R600_Reg32:$src))]> { + let AMDILOp = AMDILInst.ROUND_NEAREST_f32; +} + def FLOOR : R600_1OP < 0x14, "FLOOR", [(set R600_Reg32:$dst, (int_AMDGPU_floor R600_Reg32:$src))] @@ -329,64 +338,114 @@ def AND_INT : R600_2OP < let AMDILOp = AMDILInst.AND_i32; } +def OR_INT : R600_2OP < + 0x31, "OR_INT", + []>{ + let AMDILOp = AMDILInst.BINARY_OR_i32; +} + def XOR_INT : R600_2OP < 0x32, "XOR_INT", [] >; +def NOT_INT : R600_1OP < + 0x33, "NOT_INT", + []>{ + let AMDILOp = AMDILInst.BINARY_NOT_i32; +} + def ADD_INT : R600_2OP < - 0x34, "ADD_INT $dst, $src0, $src1", + 0x34, "ADD_INT", []>{ let AMDILOp = AMDILInst.ADD_i32; } def SUB_INT : R600_2OP < - 0x35, "SUB_INT $dst, $src0, $src1", + 0x35, "SUB_INT", [] >; +def MAX_INT : R600_2OP < + 0x36, "MAX_INT", + [(set R600_Reg32:$dst, (int_AMDGPU_imax R600_Reg32:$src0, R600_Reg32:$src1))]>; + +def MIN_INT : R600_2OP < + 0x37, "MIN_INT", + [(set R600_Reg32:$dst, (int_AMDGPU_imin R600_Reg32:$src0, R600_Reg32:$src1))]>; + +def MAX_UINT : R600_2OP < + 0x38, "MAX_UINT", + [(set R600_Reg32:$dst, (int_AMDGPU_umax R600_Reg32:$src0, R600_Reg32:$src1))]>; + +def MIN_UINT : R600_2OP < + 0x39, "MIN_UINT", + [(set R600_Reg32:$dst, (int_AMDGPU_umin R600_Reg32:$src0, R600_Reg32:$src1))]>; + + def SETE_INT : R600_2OP < - 0x3A, "SETE_INT $dst, $src0, $src1", + 0x3A, "SETE_INT", []>{ let AMDILOp = AMDILInst.IEQ; } def SETGT_INT : R600_2OP < - 0x3B, "SGT_INT $dst, $src0, $src1", + 0x3B, "SGT_INT", [] >; def SETGE_INT : R600_2OP < - 0x3C, "SETGE_INT $dst, $src0, $src1", + 0x3C, "SETGE_INT", []>{ let AMDILOp = AMDILInst.IGE; } def SETNE_INT : R600_2OP < - 0x3D, "SETNE_INT $dst, $src0, $src1", + 0x3D, "SETNE_INT", []>{ let AMDILOp = AMDILInst.INE; } def SETGT_UINT : R600_2OP < - 0x3E, "SETGT_UINT $dst, $src0, $src1", + 0x3E, "SETGT_UINT", []>{ let AMDILOp = AMDILInst.UGT; } def SETGE_UINT : R600_2OP < - 0x3F, "SETGE_UINT $dst, $src0, $src1", + 0x3F, "SETGE_UINT", []>{ let AMDILOp = AMDILInst.UGE; } def CNDE_INT : R600_3OP < - 0x1C, "CNDE_INT $dst, $src0, $src1, $src2", + 0x1C, "CNDE_INT", [] >; /* Texture instructions */ + +def TEX_LD : R600_TEX < + 0x03, "TEX_LD", + [(set R600_Reg128:$dst, (int_AMDGPU_txf R600_Reg128:$src0, imm:$src1, imm:$src2))] +>; + +def TEX_GET_TEXTURE_RESINFO : R600_TEX < + 0x04, "TEX_GET_TEXTURE_RESINFO", + [(set R600_Reg128:$dst, (int_AMDGPU_txq R600_Reg128:$src0, imm:$src1, imm:$src2))] +>; + +def TEX_GET_GRADIENTS_H : R600_TEX < + 0x07, "TEX_GET_GRADIENTS_H", + [(set R600_Reg128:$dst, (int_AMDGPU_ddx R600_Reg128:$src0, imm:$src1, imm:$src2))] +>; + +def TEX_GET_GRADIENTS_V : R600_TEX < + 0x08, "TEX_GET_GRADIENTS_V", + [(set R600_Reg128:$dst, (int_AMDGPU_ddy R600_Reg128:$src0, imm:$src1, imm:$src2))] +>; + def TEX_SAMPLE : R600_TEX < 0x10, "TEX_SAMPLE", [(set R600_Reg128:$dst, (int_AMDGPU_tex R600_Reg128:$src0, imm:$src1, imm:$src2))] @@ -434,6 +493,11 @@ def KILP : Pat < (MASK_WRITE (KILLGT (f32 ONE), (f32 ZERO))) >; +def KIL : Pat < + (int_AMDGPU_kill R600_Reg32:$src0), + (MASK_WRITE (KILLGT (f32 ZERO), (f32 R600_Reg32:$src0))) +>; + /* Helper classes for common instructions */ class MUL_LIT_Common <bits<32> inst> : R600_3OP < @@ -470,6 +534,15 @@ class DOT4_Common <bits<32> inst> : R600_REDUCTION < [(set R600_Reg32:$dst, (int_AMDGPU_dp4 R600_Reg128:$src0, R600_Reg128:$src1))] >; +class CUBE_Common <bits<32> inst> : InstR600 < + inst, + (outs R600_Reg128:$dst), + (ins R600_Reg128:$src), + "CUBE $dst $src", + [(set R600_Reg128:$dst, (int_AMDGPU_cube R600_Reg128:$src))], + VecALU +>; + class EXP_IEEE_Common <bits<32> inst> : R600_1OP < inst, "EXP_IEEE", []> { @@ -509,6 +582,12 @@ class LSHR_Common <bits<32> inst> : R600_2OP < let AMDILOp = AMDILInst.USHR_i32; } +class ASHR_Common <bits<32> inst> : R600_2OP < + inst, "ASHR $dst, $src0, $src1", + [] >{ + let AMDILOp = AMDILInst.SHR_i32; +} + class MULHI_INT_Common <bits<32> inst> : R600_2OP < inst, "MULHI_INT $dst, $src0, $src1", [] >{ @@ -608,6 +687,7 @@ let Gen = AMDGPUGen.R600 in { def CNDGT_r600 : CNDGT_Common<0x19>; def CNDGE_r600 : CNDGE_Common<0x1A>; def DOT4_r600 : DOT4_Common<0x50>; + def CUBE_r600 : CUBE_Common<0x52>; def EXP_IEEE_r600 : EXP_IEEE_Common<0x61>; def LOG_CLAMPED_r600 : LOG_CLAMPED_Common<0x62>; def LOG_IEEE_r600 : LOG_IEEE_Common<0x63>; @@ -619,6 +699,7 @@ let Gen = AMDGPUGen.R600 in { def INT_TO_FLT_r600 : INT_TO_FLT_Common<0x6c>; def SIN_r600 : SIN_Common<0x6E>; def COS_r600 : COS_Common<0x6F>; + def ASHR_r600 : ASHR_Common<0x70>; def LSHR_r600 : LSHR_Common<0x71>; def LSHL_r600 : LSHL_Common<0x72>; def MULLO_INT_r600 : MULLO_INT_Common<0x73>; @@ -661,20 +742,12 @@ def RAT_WRITE_CACHELESS_eg : EG_CF_RAT <0x57, 0x2, (outs), (ins R600_TReg32_X:$rw_gpr, R600_TReg32_X:$index_gpr, i32imm:$rat_id), ""> { -/* - let Inst{3-0} = RAT_ID; - let Inst{21-15} = RW_GPR; - let Inst{29-23} = INDEX_GPR; - /* Propery of the UAV */ - let Inst{31-30} = ELEM_SIZE; -*/ let RIM = 0; /* XXX: Have a separate instruction for non-indexed writes. */ let TYPE = 1; let RW_REL = 0; let ELEM_SIZE = 0; -/* let ARRAY_SIZE = 0; let COMP_MASK = 1; let BURST_COUNT = 0; @@ -682,7 +755,6 @@ def RAT_WRITE_CACHELESS_eg : let EOP = 0; let MARK = 0; let BARRIER = 1; -*/ } def VTX_READ_eg : InstR600ISA < (outs R600_TReg32_X:$dst), @@ -789,6 +861,7 @@ class TRIG_eg <InstR600 trig, Intrinsic intr> : Pat< let Gen = AMDGPUGen.EG_CAYMAN in { def MULADD_eg : MULADD_Common<0x14>; + def ASHR_eg : ASHR_Common<0x15>; def LSHR_eg : LSHR_Common<0x16>; def LSHL_eg : LSHL_Common<0x17>; def CNDE_eg : CNDE_Common<0x19>; @@ -812,6 +885,7 @@ let Gen = AMDGPUGen.EG_CAYMAN in { def RECIP_UINT_eg : RECIP_UINT_Common<0x94>; def INT_TO_FLT_eg : INT_TO_FLT_Common<0x9B>; def DOT4_eg : DOT4_Common<0xBE>; + def CUBE_eg : CUBE_Common<0xC0>; } // End AMDGPUGen.EG_CAYMAN @@ -905,6 +979,34 @@ def LOCAL_SIZE_Y : R600PreloadInst <"LOCAL_SIZE_Y", def LOCAL_SIZE_Z : R600PreloadInst <"LOCAL_SIZE_Z", int_r600_read_local_size_z>; +def R600_LOAD_CONST : AMDGPUShaderInst < + (outs R600_Reg32:$dst), + (ins i32imm:$src0), + "R600_LOAD_CONST $dst, $src0", + [(set R600_Reg32:$dst, (int_AMDGPU_load_const imm:$src0))] +>; + +def LOAD_INPUT : AMDGPUShaderInst < + (outs R600_Reg32:$dst), + (ins i32imm:$src), + "LOAD_INPUT $dst, $src", + [(set R600_Reg32:$dst, (int_R600_load_input imm:$src))] +>; + +def RESERVE_REG : AMDGPUShaderInst < + (outs), + (ins i32imm:$src), + "RESERVE_REG $src", + [(int_AMDGPU_reserve_reg imm:$src)] +>; + +def STORE_OUTPUT: AMDGPUShaderInst < + (outs), + (ins R600_Reg32:$src0, i32imm:$src1), + "STORE_OUTPUT $src0, $src1", + [(int_AMDGPU_store_output R600_Reg32:$src0, imm:$src1)] +>; + } // End usesCustomInserter = 1, isPseudo = 1 } // End isCodeGenOnly = 1 @@ -933,15 +1035,14 @@ def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 5, sel_y>; def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 6, sel_z>; def : Insert_Element <f32, v4f32, R600_Reg32, R600_Reg128, 7, sel_w>; +def : Extract_Element <i32, v4i32, R600_Reg128, 0, sel_x>; +def : Extract_Element <i32, v4i32, R600_Reg128, 1, sel_y>; +def : Extract_Element <i32, v4i32, R600_Reg128, 2, sel_z>; +def : Extract_Element <i32, v4i32, R600_Reg128, 3, sel_w>; -include "R600ShaderPatterns.td" - -// We need this pattern to avoid having real registers in PHI nodes. -// For some reason this pattern only works when it comes after the other -// instruction defs. -def : Pat < - (int_R600_load_input imm:$src), - (LOAD_INPUT imm:$src) ->; +def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 4, sel_x>; +def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 5, sel_y>; +def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 6, sel_z>; +def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 7, sel_w>; } // End isR600toCayman Predicate diff --git a/src/gallium/drivers/radeon/R600Intrinsics.td b/src/gallium/drivers/radeon/R600IntrinsicsNoOpenCL.td index 8038fee1a3c..73ef4aae234 100644 --- a/src/gallium/drivers/radeon/R600Intrinsics.td +++ b/src/gallium/drivers/radeon/R600IntrinsicsNoOpenCL.td @@ -1,4 +1,4 @@ -//===-- R600Intrinsics.td - TODO: Add brief description -------===// +//===-- R600Intrinsics.td - R600 Instrinsic defs -------*- tablegen -*-----===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// R600 Intrinsic Definitions // //===----------------------------------------------------------------------===// diff --git a/src/gallium/drivers/radeon/R600InstrFormats.td b/src/gallium/drivers/radeon/R600IntrinsicsOpenCL.td index 0890eb64509..cd761358475 100644 --- a/src/gallium/drivers/radeon/R600InstrFormats.td +++ b/src/gallium/drivers/radeon/R600IntrinsicsOpenCL.td @@ -1,4 +1,4 @@ -//===-- R600InstrFormats.td - TODO: Add brief description -------===// +//===-- R600Intrinsics.td - TODO: Add brief description -------===// // // The LLVM Compiler Infrastructure // @@ -11,6 +11,6 @@ // //===----------------------------------------------------------------------===// - -class ALUInst <bits<10> op, dag outs, dag ins, string asm, list<dag> pattern> - : InstR600 <, outs, ins , asm, pattern> +let TargetPrefix = "R600", isTarget = 1 in { + def int_R600_load_input : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadWriteArgMem]>; +} diff --git a/src/gallium/drivers/radeon/R600KernelParameters.cpp b/src/gallium/drivers/radeon/R600KernelParameters.cpp index 3fdf48a2bf2..53bfebc7364 100644 --- a/src/gallium/drivers/radeon/R600KernelParameters.cpp +++ b/src/gallium/drivers/radeon/R600KernelParameters.cpp @@ -1,4 +1,4 @@ -//===-- R600KernelParameters.cpp - TODO: Add brief description -------===// +//===-- R600KernelParameters.cpp - Lower kernel function arguments --------===// // // The LLVM Compiler Infrastructure // @@ -7,89 +7,83 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// This pass lowers kernel function arguments to loads from the vertex buffer. +// +// Kernel arguemnts are stored in the vertex buffer at an offset of 9 dwords, +// so arg0 needs to be loaded from VTX_BUFFER[9] and arg1 is loaded from +// VTX_BUFFER[10], etc. // //===----------------------------------------------------------------------===// -#include <llvm-c/Core.h> -#include "R600KernelParameters.h" -#include "R600OpenCLUtils.h" +#include "AMDGPU.h" +#include "AMDIL.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/Constants.h" +#include "llvm/Function.h" #include "llvm/Intrinsics.h" +#include "llvm/Metadata.h" +#include "llvm/Module.h" +#include "llvm/Target/TargetData.h" #include "llvm/Support/IRBuilder.h" #include "llvm/Support/TypeBuilder.h" -// #include "llvm/CodeGen/Function.h" - -namespace AMDILAS { -enum AddressSpaces { - PRIVATE_ADDRESS = 0, // Address space for private memory. - GLOBAL_ADDRESS = 1, // Address space for global memory (RAT0, VTX0). - CONSTANT_ADDRESS = 2, // Address space for constant memory. - LOCAL_ADDRESS = 3, // Address space for local memory. - REGION_ADDRESS = 4, // Address space for region memory. - ADDRESS_NONE = 5, // Address space for unknown memory. - PARAM_D_ADDRESS = 6, // Address space for direct addressible parameter memory (CONST0) - PARAM_I_ADDRESS = 7, // Address space for indirect addressible parameter memory (VTX1) - LAST_ADDRESS = 8 -}; -} - #include <map> #include <set> using namespace llvm; -using namespace std; + +namespace { #define CONSTANT_CACHE_SIZE_DW 127 -class R600KernelParameters : public llvm::FunctionPass +class R600KernelParameters : public FunctionPass { - const llvm::TargetData * TD; + const TargetData * TD; LLVMContext* Context; Module *mod; - + struct param { - param() : val(NULL), ptr_val(NULL), offset_in_dw(0), size_in_dw(0), indirect(false), specialID(0) {} - - llvm::Value* val; - llvm::Value* ptr_val; + param() : val(NULL), ptr_val(NULL), offset_in_dw(0), size_in_dw(0), + indirect(false), specialID(0) {} + + Value* val; + Value* ptr_val; int offset_in_dw; int size_in_dw; bool indirect; - - string specialType; + + std::string specialType; int specialID; - + int end() { return offset_in_dw + size_in_dw; } - /* The first 9 dwords are reserved for the grid sizes. */ + // The first 9 dwords are reserved for the grid sizes. int get_rat_offset() { return 9 + offset_in_dw; } }; std::vector<param> params; - int getLastSpecialID(const string& TypeName); - + bool isOpenCLKernel(const Function* fun); + int getLastSpecialID(const std::string& TypeName); + int getListSize(); - void AddParam(llvm::Argument* arg); - int calculateArgumentSize(llvm::Argument* arg); - void RunAna(llvm::Function* fun); - void Replace(llvm::Function* fun); - bool isIndirect(Value* val, set<Value*>& visited); - void Propagate(llvm::Function* fun); - void Propagate(llvm::Value* v, const llvm::Twine& name, bool indirect = false); + void AddParam(Argument* arg); + int calculateArgumentSize(Argument* arg); + void RunAna(Function* fun); + void Replace(Function* fun); + bool isIndirect(Value* val, std::set<Value*>& visited); + void Propagate(Function* fun); + void Propagate(Value* v, const Twine& name, bool indirect = false); Value* ConstantRead(Function* fun, param& p); Value* handleSpecial(Function* fun, param& p); bool isSpecialType(Type*); - string getSpecialTypeName(Type*); + std::string getSpecialTypeName(Type*); public: static char ID; R600KernelParameters() : FunctionPass(ID) {}; - R600KernelParameters(const llvm::TargetData* TD) : FunctionPass(ID), TD(TD) {} -// bool runOnFunction (llvm::Function &F); - bool runOnFunction (llvm::Function &F); + R600KernelParameters(const TargetData* TD) : FunctionPass(ID), TD(TD) {} + bool runOnFunction (Function &F); void getAnalysisUsage(AnalysisUsage &AU) const; const char *getPassName() const; bool doInitialization(Module &M); @@ -98,13 +92,42 @@ public: char R600KernelParameters::ID = 0; -static RegisterPass<R600KernelParameters> X("kerparam", "OpenCL Kernel Parameter conversion", false, false); +static RegisterPass<R600KernelParameters> X("kerparam", + "OpenCL Kernel Parameter conversion", false, false); -int R600KernelParameters::getLastSpecialID(const string& TypeName) +bool R600KernelParameters::isOpenCLKernel(const Function* fun) +{ + Module *mod = const_cast<Function*>(fun)->getParent(); + NamedMDNode * md = mod->getOrInsertNamedMetadata("opencl.kernels"); + + if (!md or !md->getNumOperands()) + { + return false; + } + + for (int i = 0; i < int(md->getNumOperands()); i++) + { + if (!md->getOperand(i) or !md->getOperand(i)->getOperand(0)) + { + continue; + } + + assert(md->getOperand(i)->getNumOperands() == 1); + + if (md->getOperand(i)->getOperand(0)->getName() == fun->getName()) + { + return true; + } + } + + return false; +} + +int R600KernelParameters::getLastSpecialID(const std::string& TypeName) { int lastID = -1; - - for (vector<param>::iterator i = params.begin(); i != params.end(); i++) + + for (std::vector<param>::iterator i = params.begin(); i != params.end(); i++) { if (i->specialType == TypeName) { @@ -125,7 +148,7 @@ int R600KernelParameters::getListSize() return params.back().end(); } -bool R600KernelParameters::isIndirect(Value* val, set<Value*>& visited) +bool R600KernelParameters::isIndirect(Value* val, std::set<Value*>& visited) { if (isa<LoadInst>(val)) { @@ -144,7 +167,7 @@ bool R600KernelParameters::isIndirect(Value* val, set<Value*>& visited) } visited.insert(val); - + if (isa<GetElementPtrInst>(val)) { GetElementPtrInst* GEP = dyn_cast<GetElementPtrInst>(val); @@ -158,7 +181,7 @@ bool R600KernelParameters::isIndirect(Value* val, set<Value*>& visited) } } } - + for (Value::use_iterator i = val->use_begin(); i != val->use_end(); i++) { Value* v2 = dyn_cast<Value>(*i); @@ -175,24 +198,24 @@ bool R600KernelParameters::isIndirect(Value* val, set<Value*>& visited) return false; } -void R600KernelParameters::AddParam(llvm::Argument* arg) +void R600KernelParameters::AddParam(Argument* arg) { param p; - + p.val = dyn_cast<Value>(arg); p.offset_in_dw = getListSize(); p.size_in_dw = calculateArgumentSize(arg); if (isa<PointerType>(arg->getType()) and arg->hasByValAttr()) { - set<Value*> visited; + std::set<Value*> visited; p.indirect = isIndirect(p.val, visited); } - + params.push_back(p); } -int R600KernelParameters::calculateArgumentSize(llvm::Argument* arg) +int R600KernelParameters::calculateArgumentSize(Argument* arg) { Type* t = arg->getType(); @@ -200,16 +223,16 @@ int R600KernelParameters::calculateArgumentSize(llvm::Argument* arg) { t = dyn_cast<PointerType>(t)->getElementType(); } - + int store_size_in_dw = (TD->getTypeStoreSize(t) + 3)/4; assert(store_size_in_dw); - + return store_size_in_dw; } -void R600KernelParameters::RunAna(llvm::Function* fun) +void R600KernelParameters::RunAna(Function* fun) { assert(isOpenCLKernel(fun)); @@ -220,7 +243,7 @@ void R600KernelParameters::RunAna(llvm::Function* fun) } -void R600KernelParameters::Replace(llvm::Function* fun) +void R600KernelParameters::Replace(Function* fun) { for (std::vector<param>::iterator i = params.begin(); i != params.end(); i++) { @@ -237,11 +260,11 @@ void R600KernelParameters::Replace(llvm::Function* fun) if (new_val) { i->val->replaceAllUsesWith(new_val); - } + } } } -void R600KernelParameters::Propagate(llvm::Function* fun) +void R600KernelParameters::Propagate(Function* fun) { for (std::vector<param>::iterator i = params.begin(); i != params.end(); i++) { @@ -256,8 +279,8 @@ void R600KernelParameters::Propagate(Value* v, const Twine& name, bool indirect) { LoadInst* load = dyn_cast<LoadInst>(v); GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(v); - - unsigned addrspace; + + unsigned addrspace; if (indirect) { @@ -274,49 +297,54 @@ void R600KernelParameters::Propagate(Value* v, const Twine& name, bool indirect) if (dyn_cast<PointerType>(op->getType())->getAddressSpace() != addrspace) { - op = new BitCastInst(op, PointerType::get(dyn_cast<PointerType>(op->getType())->getElementType(), addrspace), name, dyn_cast<Instruction>(v)); + op = new BitCastInst(op, PointerType::get(dyn_cast<PointerType>( + op->getType())->getElementType(), addrspace), + name, dyn_cast<Instruction>(v)); } - vector<Value*> params(GEP->idx_begin(), GEP->idx_end()); - - GetElementPtrInst* GEP2 = GetElementPtrInst::Create(op, params, name, dyn_cast<Instruction>(v)); + std::vector<Value*> params(GEP->idx_begin(), GEP->idx_end()); + + GetElementPtrInst* GEP2 = GetElementPtrInst::Create(op, params, name, + dyn_cast<Instruction>(v)); GEP2->setIsInBounds(GEP->isInBounds()); v = dyn_cast<Value>(GEP2); GEP->replaceAllUsesWith(GEP2); GEP->eraseFromParent(); load = NULL; } - + if (load) { - if (load->getPointerAddressSpace() != addrspace) ///normally at this point we have the right address space + ///normally at this point we have the right address space + if (load->getPointerAddressSpace() != addrspace) { Value *orig_ptr = load->getPointerOperand(); PointerType *orig_ptr_type = dyn_cast<PointerType>(orig_ptr->getType()); - - Type* new_ptr_type = PointerType::get(orig_ptr_type->getElementType(), addrspace); + + Type* new_ptr_type = PointerType::get(orig_ptr_type->getElementType(), + addrspace); Value* new_ptr = orig_ptr; - + if (orig_ptr->getType() != new_ptr_type) { new_ptr = new BitCastInst(orig_ptr, new_ptr_type, "prop_cast", load); } - + Value* new_load = new LoadInst(new_ptr, name, load); load->replaceAllUsesWith(new_load); load->eraseFromParent(); } - + return; } - vector<User*> users(v->use_begin(), v->use_end()); - + std::vector<User*> users(v->use_begin(), v->use_end()); + for (int i = 0; i < int(users.size()); i++) { Value* v2 = dyn_cast<Value>(users[i]); - + if (v2) { Propagate(v2, name, indirect); @@ -327,7 +355,7 @@ void R600KernelParameters::Propagate(Value* v, const Twine& name, bool indirect) Value* R600KernelParameters::ConstantRead(Function* fun, param& p) { assert(fun->front().begin() != fun->front().end()); - + Instruction *first_inst = fun->front().begin(); IRBuilder <> builder (first_inst); /* First 3 dwords are reserved for the dimmension info */ @@ -346,43 +374,54 @@ Value* R600KernelParameters::ConstantRead(Function* fun, param& p) { addrspace = AMDILAS::PARAM_D_ADDRESS; } - + Argument *arg = dyn_cast<Argument>(p.val); Type * argType = p.val->getType(); PointerType * argPtrType = dyn_cast<PointerType>(p.val->getType()); - + if (argPtrType and arg->hasByValAttr()) { - Value* param_addr_space_ptr = ConstantPointerNull::get(PointerType::get(Type::getInt32Ty(*Context), addrspace)); - Value* param_ptr = GetElementPtrInst::Create(param_addr_space_ptr, ConstantInt::get(Type::getInt32Ty(*Context), p.get_rat_offset()), arg->getName(), first_inst); - param_ptr = new BitCastInst(param_ptr, PointerType::get(argPtrType->getElementType(), addrspace), arg->getName(), first_inst); + Value* param_addr_space_ptr = ConstantPointerNull::get( + PointerType::get(Type::getInt32Ty(*Context), + addrspace)); + Value* param_ptr = GetElementPtrInst::Create(param_addr_space_ptr, + ConstantInt::get(Type::getInt32Ty(*Context), + p.get_rat_offset()), arg->getName(), + first_inst); + param_ptr = new BitCastInst(param_ptr, + PointerType::get(argPtrType->getElementType(), + addrspace), + arg->getName(), first_inst); p.ptr_val = param_ptr; return param_ptr; } else { - Value* param_addr_space_ptr = ConstantPointerNull::get(PointerType::get(argType, addrspace)); - + Value* param_addr_space_ptr = ConstantPointerNull::get(PointerType::get( + argType, addrspace)); + Value* param_ptr = builder.CreateGEP(param_addr_space_ptr, - ConstantInt::get(Type::getInt32Ty(*Context), p.get_rat_offset()), arg->getName()); - + ConstantInt::get(Type::getInt32Ty(*Context), p.get_rat_offset()), + arg->getName()); + Value* param_value = builder.CreateLoad(param_ptr, arg->getName()); - + return param_value; } } Value* R600KernelParameters::handleSpecial(Function* fun, param& p) { - string name = getSpecialTypeName(p.val->getType()); + std::string name = getSpecialTypeName(p.val->getType()); int ID; assert(!name.empty()); - + if (name == "image2d_t" or name == "image3d_t") { - int lastID = max(getLastSpecialID("image2d_t"), getLastSpecialID("image3d_t")); - + int lastID = std::max(getLastSpecialID("image2d_t"), + getLastSpecialID("image3d_t")); + if (lastID == -1) { ID = 2; ///ID0 and ID1 are used internally by the driver @@ -403,20 +442,22 @@ Value* R600KernelParameters::handleSpecial(Function* fun, param& p) else { ID = lastID + 1; - } + } } else { ///TODO: give some error message return NULL; } - + p.specialType = name; p.specialID = ID; Instruction *first_inst = fun->front().begin(); - return new IntToPtrInst(ConstantInt::get(Type::getInt32Ty(*Context), p.specialID), p.val->getType(), "resourceID", first_inst); + return new IntToPtrInst(ConstantInt::get(Type::getInt32Ty(*Context), + p.specialID), p.val->getType(), + "resourceID", first_inst); } @@ -425,7 +466,7 @@ bool R600KernelParameters::isSpecialType(Type* t) return !getSpecialTypeName(t).empty(); } -string R600KernelParameters::getSpecialTypeName(Type* t) +std::string R600KernelParameters::getSpecialTypeName(Type* t) { PointerType *pt = dyn_cast<PointerType>(t); StructType *st = NULL; @@ -437,9 +478,9 @@ string R600KernelParameters::getSpecialTypeName(Type* t) if (st) { - string prefix = "struct.opencl_builtin_type_"; - - string name = st->getName().str(); + std::string prefix = "struct.opencl_builtin_type_"; + + std::string name = st->getName().str(); if (name.substr(0, prefix.length()) == prefix) { @@ -458,19 +499,15 @@ bool R600KernelParameters::runOnFunction (Function &F) return false; } -// F.dump(); - RunAna(&F); Replace(&F); Propagate(&F); - - mod->dump(); + return false; } void R600KernelParameters::getAnalysisUsage(AnalysisUsage &AU) const { -// AU.addRequired<FunctionAnalysis>(); FunctionPass::getAnalysisUsage(AU); AU.setPreservesAll(); } @@ -484,7 +521,7 @@ bool R600KernelParameters::doInitialization(Module &M) { Context = &M.getContext(); mod = &M; - + return false; } @@ -493,10 +530,12 @@ bool R600KernelParameters::doFinalization(Module &M) return false; } -llvm::FunctionPass* createR600KernelParametersPass(const llvm::TargetData* TD) +} // End anonymous namespace + +FunctionPass* llvm::createR600KernelParametersPass(const TargetData* TD) { FunctionPass *p = new R600KernelParameters(TD); - + return p; } diff --git a/src/gallium/drivers/radeon/R600KernelParameters.h b/src/gallium/drivers/radeon/R600KernelParameters.h deleted file mode 100644 index 904a469a5f0..00000000000 --- a/src/gallium/drivers/radeon/R600KernelParameters.h +++ /dev/null @@ -1,28 +0,0 @@ -//===-- R600KernelParameters.h - TODO: Add brief description -------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// TODO: Add full description -// -//===----------------------------------------------------------------------===// - -#ifndef KERNELPARAMETERS_H -#define KERNELPARAMETERS_H - -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/Function.h" -#include "llvm/Pass.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Value.h" - -#include <vector> - -llvm::FunctionPass* createR600KernelParametersPass(const llvm::TargetData* TD); - - -#endif diff --git a/src/gallium/drivers/radeon/R600LowerInstructions.cpp b/src/gallium/drivers/radeon/R600LowerInstructions.cpp index fb5431d0eef..dca1fe195cc 100644 --- a/src/gallium/drivers/radeon/R600LowerInstructions.cpp +++ b/src/gallium/drivers/radeon/R600LowerInstructions.cpp @@ -1,4 +1,4 @@ -//===-- R600LowerInstructions.cpp - TODO: Add brief description -------===// +//===-- R600LowerInstructions.cpp - Lower unsupported AMDIL instructions --===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,8 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// This pass lowers AMDIL MachineInstrs that aren't supported by the R600 +// target to either supported AMDIL MachineInstrs or R600 MachineInstrs. // //===----------------------------------------------------------------------===// @@ -93,8 +94,8 @@ bool R600LowerInstructionsPass::runOnMachineFunction(MachineFunction &MF) &AMDIL::R600_TReg32RegClass); BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SETGE_INT), setgt) - .addOperand(MI.getOperand(1)) - .addReg(AMDIL::ZERO); + .addReg(AMDIL::ZERO) + .addOperand(MI.getOperand(1)); unsigned add_int = MRI->createVirtualRegister( &AMDIL::R600_TReg32RegClass); @@ -311,7 +312,8 @@ bool R600LowerInstructionsPass::runOnMachineFunction(MachineFunction &MF) MachineInstr * defInstr = MRI->getVRegDef(maskedRegister); MachineOperand * def = defInstr->findRegisterDefOperand(maskedRegister); def->addTargetFlag(MO_FLAG_MASK); - break; + /* Continue so the instruction is not erased */ + continue; } case AMDIL::NEGATE_i32: @@ -342,6 +344,13 @@ bool R600LowerInstructionsPass::runOnMachineFunction(MachineFunction &MF) break; } + case AMDIL::ULT: + BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::SETGT_UINT)) + .addOperand(MI.getOperand(0)) + .addOperand(MI.getOperand(2)) + .addOperand(MI.getOperand(1)); + break; + default: continue; } diff --git a/src/gallium/drivers/radeon/R600LowerShaderInstructions.cpp b/src/gallium/drivers/radeon/R600LowerShaderInstructions.cpp deleted file mode 100644 index 394ee7006ce..00000000000 --- a/src/gallium/drivers/radeon/R600LowerShaderInstructions.cpp +++ /dev/null @@ -1,143 +0,0 @@ -//===-- R600LowerShaderInstructions.cpp - TODO: Add brief description -------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// TODO: Add full description -// -//===----------------------------------------------------------------------===// - -#include "AMDGPU.h" -#include "AMDGPULowerShaderInstructions.h" -#include "AMDIL.h" -#include "AMDILInstrInfo.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" - -using namespace llvm; - -namespace { - class R600LowerShaderInstructionsPass : public MachineFunctionPass, - public AMDGPULowerShaderInstructionsPass { - - private: - static char ID; - TargetMachine &TM; - - void lowerEXPORT_REG_FAKE(MachineInstr &MI, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I); - void lowerLOAD_INPUT(MachineInstr & MI); - bool lowerSTORE_OUTPUT(MachineInstr & MI, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I); - - public: - R600LowerShaderInstructionsPass(TargetMachine &tm) : - MachineFunctionPass(ID), TM(tm) { } - - bool runOnMachineFunction(MachineFunction &MF); - - const char *getPassName() const { return "R600 Lower Shader Instructions"; } - }; -} /* End anonymous namespace */ - -char R600LowerShaderInstructionsPass::ID = 0; - -FunctionPass *llvm::createR600LowerShaderInstructionsPass(TargetMachine &tm) { - return new R600LowerShaderInstructionsPass(tm); -} - -#define INSTR_CASE_FLOAT_V(inst) \ - case AMDIL:: inst##_v4f32: \ - -#define INSTR_CASE_FLOAT_S(inst) \ - case AMDIL:: inst##_f32: - -#define INSTR_CASE_FLOAT(inst) \ - INSTR_CASE_FLOAT_V(inst) \ - INSTR_CASE_FLOAT_S(inst) -bool R600LowerShaderInstructionsPass::runOnMachineFunction(MachineFunction &MF) -{ - MRI = &MF.getRegInfo(); - - - for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); - BB != BB_E; ++BB) { - MachineBasicBlock &MBB = *BB; - for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end();) { - MachineInstr &MI = *I; - bool deleteInstr = false; - switch (MI.getOpcode()) { - - default: break; - - case AMDIL::RESERVE_REG: - case AMDIL::EXPORT_REG: - deleteInstr = true; - break; - - case AMDIL::LOAD_INPUT: - lowerLOAD_INPUT(MI); - deleteInstr = true; - break; - - case AMDIL::STORE_OUTPUT: - deleteInstr = lowerSTORE_OUTPUT(MI, MBB, I); - break; - - } - - ++I; - - if (deleteInstr) { - MI.eraseFromParent(); - } - } - } - - return false; -} - -/* The goal of this function is to replace the virutal destination register of - * a LOAD_INPUT instruction with the correct physical register that will. - * - * XXX: I don't think this is the right way things assign physical registers, - * but I'm not sure of another way to do this. - */ -void R600LowerShaderInstructionsPass::lowerLOAD_INPUT(MachineInstr &MI) -{ - MachineOperand &dst = MI.getOperand(0); - MachineOperand &arg = MI.getOperand(1); - int64_t inputIndex = arg.getImm(); - const TargetRegisterClass * inputClass = TM.getRegisterInfo()->getRegClass(AMDIL::R600_TReg32RegClassID); - unsigned newRegister = inputClass->getRegister(inputIndex); - unsigned dstReg = dst.getReg(); - - preloadRegister(MI.getParent()->getParent(), TM.getInstrInfo(), newRegister, - dstReg); -} - -bool R600LowerShaderInstructionsPass::lowerSTORE_OUTPUT(MachineInstr &MI, - MachineBasicBlock &MBB, MachineBasicBlock::iterator I) -{ - MachineOperand &valueOp = MI.getOperand(1); - MachineOperand &indexOp = MI.getOperand(2); - unsigned valueReg = valueOp.getReg(); - int64_t outputIndex = indexOp.getImm(); - const TargetRegisterClass * outputClass = TM.getRegisterInfo()->getRegClass(AMDIL::R600_TReg32RegClassID); - unsigned newRegister = outputClass->getRegister(outputIndex); - - BuildMI(MBB, I, MBB.findDebugLoc(I), TM.getInstrInfo()->get(AMDIL::COPY), - newRegister) - .addReg(valueReg); - - if (!MRI->isLiveOut(newRegister)) - MRI->addLiveOut(newRegister); - - return true; - -} diff --git a/src/gallium/drivers/radeon/R600MachineFunctionInfo.cpp b/src/gallium/drivers/radeon/R600MachineFunctionInfo.cpp new file mode 100644 index 00000000000..48443fb57d8 --- /dev/null +++ b/src/gallium/drivers/radeon/R600MachineFunctionInfo.cpp @@ -0,0 +1,16 @@ +//===-- R600MachineFunctionInfo.cpp - R600 Machine Function Info-*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "R600MachineFunctionInfo.h" + +using namespace llvm; + +R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF) + : MachineFunctionInfo() + { } diff --git a/src/gallium/drivers/radeon/R600MachineFunctionInfo.h b/src/gallium/drivers/radeon/R600MachineFunctionInfo.h new file mode 100644 index 00000000000..948e1924272 --- /dev/null +++ b/src/gallium/drivers/radeon/R600MachineFunctionInfo.h @@ -0,0 +1,33 @@ +//===-- R600MachineFunctionInfo.h - R600 Machine Function Info ----*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// R600MachineFunctionInfo is used for keeping track of which registers have +// been reserved by the llvm.AMDGPU.reserve.reg intrinsic. +// +//===----------------------------------------------------------------------===// + +#ifndef R600MACHINEFUNCTIONINFO_H +#define R600MACHINEFUNCTIONINFO_H + +#include "llvm/CodeGen/MachineFunction.h" +#include <vector> + +namespace llvm { + +class R600MachineFunctionInfo : public MachineFunctionInfo { + +public: + R600MachineFunctionInfo(const MachineFunction &MF); + std::vector<unsigned> ReservedRegs; + +}; + +} // End llvm namespace + +#endif //R600MACHINEFUNCTIONINFO_H diff --git a/src/gallium/drivers/radeon/R600OpenCLUtils.h b/src/gallium/drivers/radeon/R600OpenCLUtils.h deleted file mode 100644 index 91e41d63d0d..00000000000 --- a/src/gallium/drivers/radeon/R600OpenCLUtils.h +++ /dev/null @@ -1,49 +0,0 @@ -//===-- OpenCLUtils.h - TODO: Add brief description -------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// TODO: Add full description -// -//===----------------------------------------------------------------------===// -#ifndef OPENCLUTILS_H -#define OPENCLUTILS_H - -#include "llvm/Function.h" - -#include <llvm/Module.h> - -static bool isOpenCLKernel(const llvm::Function* fun) -{ - llvm::Module *mod = const_cast<llvm::Function*>(fun)->getParent(); - llvm::NamedMDNode * md = mod->getOrInsertNamedMetadata("opencl.kernels"); - - if (!md or !md->getNumOperands()) - { - return false; - } - - for (int i = 0; i < int(md->getNumOperands()); i++) - { - if (!md->getOperand(i) or !md->getOperand(i)->getOperand(0)) - { - continue; - } - - assert(md->getOperand(i)->getNumOperands() == 1); - - if (md->getOperand(i)->getOperand(0)->getName() == fun->getName()) - { - return true; - } - } - - return false; -} - - -#endif diff --git a/src/gallium/drivers/radeon/R600RegisterInfo.cpp b/src/gallium/drivers/radeon/R600RegisterInfo.cpp index 96507b104cf..de559bd2dfa 100644 --- a/src/gallium/drivers/radeon/R600RegisterInfo.cpp +++ b/src/gallium/drivers/radeon/R600RegisterInfo.cpp @@ -1,4 +1,4 @@ -//===-- R600RegisterInfo.cpp - TODO: Add brief description -------===// +//===-- R600RegisterInfo.cpp - R600 Register Information ------------------===// // // The LLVM Compiler Infrastructure // @@ -7,12 +7,13 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// The file contains the R600 implementation of the TargetRegisterInfo class. // //===----------------------------------------------------------------------===// #include "R600RegisterInfo.h" #include "AMDGPUTargetMachine.h" +#include "R600MachineFunctionInfo.h" using namespace llvm; @@ -26,6 +27,8 @@ R600RegisterInfo::R600RegisterInfo(AMDGPUTargetMachine &tm, BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); + const R600MachineFunctionInfo * MFI = MF.getInfo<R600MachineFunctionInfo>(); + Reserved.set(AMDIL::ZERO); Reserved.set(AMDIL::HALF); Reserved.set(AMDIL::ONE); @@ -40,19 +43,11 @@ BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const Reserved.set(*I); } - for (MachineFunction::const_iterator BB = MF.begin(), - BB_E = MF.end(); BB != BB_E; ++BB) { - const MachineBasicBlock &MBB = *BB; - for (MachineBasicBlock::const_iterator I = MBB.begin(), E = MBB.end(); - I != E; ++I) { - const MachineInstr &MI = *I; - if (MI.getOpcode() == AMDIL::RESERVE_REG) { - if (!TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg())) { - Reserved.set(MI.getOperand(0).getReg()); - } - } - } + for (std::vector<unsigned>::const_iterator I = MFI->ReservedRegs.begin(), + E = MFI->ReservedRegs.end(); I != E; ++I) { + Reserved.set(*I); } + return Reserved; } diff --git a/src/gallium/drivers/radeon/R600RegisterInfo.h b/src/gallium/drivers/radeon/R600RegisterInfo.h index 95a44f971a0..89a11f9333b 100644 --- a/src/gallium/drivers/radeon/R600RegisterInfo.h +++ b/src/gallium/drivers/radeon/R600RegisterInfo.h @@ -1,4 +1,4 @@ -//===-- R600RegisterInfo.h - TODO: Add brief description -------===// +//===-- R600RegisterInfo.h - R600 Register Info Interface ------*- C++ -*--===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// Interface definition for R600RegisterInfo // //===----------------------------------------------------------------------===// diff --git a/src/gallium/drivers/radeon/R600Schedule.td b/src/gallium/drivers/radeon/R600Schedule.td index c6b1ca61bb5..d1957903d87 100644 --- a/src/gallium/drivers/radeon/R600Schedule.td +++ b/src/gallium/drivers/radeon/R600Schedule.td @@ -1,4 +1,4 @@ -//===-- R600Schedule.td - TODO: Add brief description -------===// +//===-- R600Schedule.td - R600 Scheduling definitions ------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,9 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// R600 has a VLIW architecture. On pre-cayman cards there are 5 instruction +// slots ALU.X, ALU.Y, ALU.Z, ALU.W, and TRANS. For cayman cards, the TRANS +// slot has been removed. // //===----------------------------------------------------------------------===// diff --git a/src/gallium/drivers/radeon/SIAssignInterpRegs.cpp b/src/gallium/drivers/radeon/SIAssignInterpRegs.cpp index b0bdf701a74..1ef097f7b1e 100644 --- a/src/gallium/drivers/radeon/SIAssignInterpRegs.cpp +++ b/src/gallium/drivers/radeon/SIAssignInterpRegs.cpp @@ -1,4 +1,4 @@ -//===-- SIAssignInterpRegs.cpp - TODO: Add brief description -------===// +//===-- SIAssignInterpRegs.cpp - Assign interpolation registers -----------===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,12 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// This pass maps the pseudo interpolation registers to the correct physical +// registers. Prior to executing a fragment shader, the GPU loads interpolation +// parameters into physical registers. The specific physical register that each +// interpolation parameter ends up in depends on the type of the interpolation +// parameter as well as how many interpolation parameters are used by the +// shader. // //===----------------------------------------------------------------------===// diff --git a/src/gallium/drivers/radeon/SICodeEmitter.cpp b/src/gallium/drivers/radeon/SICodeEmitter.cpp index ad494fae7c6..6970d9f0875 100644 --- a/src/gallium/drivers/radeon/SICodeEmitter.cpp +++ b/src/gallium/drivers/radeon/SICodeEmitter.cpp @@ -1,4 +1,4 @@ -//===-- SICodeEmitter.cpp - TODO: Add brief description -------===// +//===-- SICodeEmitter.cpp - SI Code Emitter -------------------------------===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,8 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// The SI code emitter produces machine code that can be executed directly on +// the GPU device. // //===----------------------------------------------------------------------===// @@ -144,8 +145,6 @@ bool SICodeEmitter::runOnMachineFunction(MachineFunction &MF) { MF.dump(); TM = &MF.getTarget(); - const AMDGPUInstrInfo * TII = - static_cast<const AMDGPUInstrInfo*>(TM->getInstrInfo()); emitState(MF); @@ -155,8 +154,7 @@ bool SICodeEmitter::runOnMachineFunction(MachineFunction &MF) for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I) { MachineInstr &MI = *I; - if (!TII->isRegPreload(MI) && MI.getOpcode() != AMDIL::KILL - && MI.getOpcode() != AMDIL::RETURN) { + if (MI.getOpcode() != AMDIL::KILL && MI.getOpcode() != AMDIL::RETURN) { emitInstr(MI); } } diff --git a/src/gallium/drivers/radeon/SIGenRegisterInfo.pl b/src/gallium/drivers/radeon/SIGenRegisterInfo.pl index 644daa1bc22..bb5ebbd67e6 100644 --- a/src/gallium/drivers/radeon/SIGenRegisterInfo.pl +++ b/src/gallium/drivers/radeon/SIGenRegisterInfo.pl @@ -1,16 +1,17 @@ -#===-- SIGenRegisterInfo.pl - TODO: Add brief description -------===# +#===-- SIGenRegisterInfo.pl - Script for generating register info files ----===# # # The LLVM Compiler Infrastructure # # This file is distributed under the University of Illinois Open Source # License. See LICENSE.TXT for details. # -#===----------------------------------------------------------------------===# +#===------------------------------------------------------------------------===# # -# TODO: Add full description +# This perl script prints to stdout .td code to be used as SIRegisterInfo.td +# it also generates a file called SIHwRegInfo.include, which contains helper +# functions for determining the hw encoding of registers. # -#===----------------------------------------------------------------------===# - +#===------------------------------------------------------------------------===# use strict; use warnings; diff --git a/src/gallium/drivers/radeon/SIISelLowering.cpp b/src/gallium/drivers/radeon/SIISelLowering.cpp index 1a4b47ecbf5..441a4a07290 100644 --- a/src/gallium/drivers/radeon/SIISelLowering.cpp +++ b/src/gallium/drivers/radeon/SIISelLowering.cpp @@ -1,4 +1,4 @@ -//===-- SIISelLowering.cpp - TODO: Add brief description -------===// +//===-- SIISelLowering.cpp - SI DAG Lowering Implementation ---------------===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,8 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// Most of the DAG lowering is handled in AMDILISelLowering.cpp. This file is +// mostly EmitInstrWithCustomInserter(). // //===----------------------------------------------------------------------===// diff --git a/src/gallium/drivers/radeon/SIISelLowering.h b/src/gallium/drivers/radeon/SIISelLowering.h index e7a79f8e215..229e682ef51 100644 --- a/src/gallium/drivers/radeon/SIISelLowering.h +++ b/src/gallium/drivers/radeon/SIISelLowering.h @@ -1,4 +1,4 @@ -//===-- SIISelLowering.h - TODO: Add brief description -------===// +//===-- SIISelLowering.h - SI DAG Lowering Interface ------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// SI DAG Lowering interface definition // //===----------------------------------------------------------------------===// diff --git a/src/gallium/drivers/radeon/SIInstrFormats.td b/src/gallium/drivers/radeon/SIInstrFormats.td index caf9b0ef120..de0d4fa39d2 100644 --- a/src/gallium/drivers/radeon/SIInstrFormats.td +++ b/src/gallium/drivers/radeon/SIInstrFormats.td @@ -1,4 +1,4 @@ -//===-- SIInstrFormats.td - TODO: Add brief description -------===// +//===-- SIInstrFormats.td - SI Instruction Formats ------------------------===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,17 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// SI Instruction format definitions. +// +// Instructions with _32 take 32-bit operands. +// Instructions with _64 take 64-bit operands. +// +// VOP_* instructions can use either a 32-bit or 64-bit encoding. The 32-bit +// encoding is the standard encoding, but instruction that make use of +// any of the instruction modifiers must use the 64-bit encoding. +// +// Instructions with _e32 use the 32-bit encoding. +// Instructions with _e64 use the 64-bit encoding. // //===----------------------------------------------------------------------===// diff --git a/src/gallium/drivers/radeon/SIInstrInfo.cpp b/src/gallium/drivers/radeon/SIInstrInfo.cpp index 6f92e96c6e7..0cb97643a7f 100644 --- a/src/gallium/drivers/radeon/SIInstrInfo.cpp +++ b/src/gallium/drivers/radeon/SIInstrInfo.cpp @@ -1,4 +1,4 @@ -//===-- SIInstrInfo.cpp - TODO: Add brief description -------===// +//===-- SIInstrInfo.cpp - SI Instruction Information ---------------------===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// SI Implementation of TargetInstrInfo. // //===----------------------------------------------------------------------===// @@ -107,6 +107,8 @@ unsigned SIInstrInfo::getISAOpcode(unsigned AMDILopcode) const { switch (AMDILopcode) { case AMDIL::MAD_f32: return AMDIL::V_MAD_LEGACY_F32; + //XXX We need a better way of detecting end of program + case AMDIL::RETURN: return AMDIL::S_ENDPGM; default: return AMDGPUInstrInfo::getISAOpcode(AMDILopcode); } } diff --git a/src/gallium/drivers/radeon/SIInstrInfo.h b/src/gallium/drivers/radeon/SIInstrInfo.h index bd76c3f94aa..68940ea3ca4 100644 --- a/src/gallium/drivers/radeon/SIInstrInfo.h +++ b/src/gallium/drivers/radeon/SIInstrInfo.h @@ -1,4 +1,4 @@ -//===-- SIInstrInfo.h - TODO: Add brief description -------===// +//===-- SIInstrInfo.h - SI Instruction Info Interface ---------------------===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// Interface definition for SIInstrInfo. // //===----------------------------------------------------------------------===// diff --git a/src/gallium/drivers/radeon/SIInstrInfo.td b/src/gallium/drivers/radeon/SIInstrInfo.td index 65b28ec84ad..435948ff1de 100644 --- a/src/gallium/drivers/radeon/SIInstrInfo.td +++ b/src/gallium/drivers/radeon/SIInstrInfo.td @@ -1,4 +1,4 @@ -//===-- SIInstrInfo.td - TODO: Add brief description -------===// +//===-- SIInstrInfo.td - SI Instruction Encodings ---------*- tablegen -*--===// // // The LLVM Compiler Infrastructure // @@ -6,10 +6,6 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// TODO: Add full description -// -//===----------------------------------------------------------------------===// @@ -464,11 +460,4 @@ def IMM12bit : ImmLeaf < include "SIInstrFormats.td" -def LOAD_CONST : AMDGPUShaderInst < - (outs GPRF32:$dst), - (ins i32imm:$src), - "LOAD_CONST $dst, $src", - [(set GPRF32:$dst, (int_AMDGPU_load_const imm:$src))] ->; - include "SIInstructions.td" diff --git a/src/gallium/drivers/radeon/SIInstructions.td b/src/gallium/drivers/radeon/SIInstructions.td index 27a8b31757a..57bbc7a5d5a 100644 --- a/src/gallium/drivers/radeon/SIInstructions.td +++ b/src/gallium/drivers/radeon/SIInstructions.td @@ -1,4 +1,4 @@ -//===-- SIInstructions.td - TODO: Add brief description -------===// +//===-- SIInstructions.td - SI Instruction Defintions ---------------------===// // // The LLVM Compiler Infrastructure // @@ -6,10 +6,6 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// TODO: Add full description -// -//===----------------------------------------------------------------------===// def isSI : Predicate<"Subtarget.device()" @@ -800,6 +796,13 @@ def CONFIG_WRITE : InstSI < field bits<32> Inst = 0; } +def LOAD_CONST : AMDGPUShaderInst < + (outs GPRF32:$dst), + (ins i32imm:$src), + "LOAD_CONST $dst, $src", + [(set GPRF32:$dst, (int_AMDGPU_load_const imm:$src))] +>; + let usesCustomInserter = 1 in { def SI_V_CNDLT : InstSI < @@ -833,7 +836,6 @@ def USE_SGPR_32 : InstSI < > { field bits<32> Inst = 0; - let PreloadReg = 1; } def USE_SGPR_64 : InstSI < @@ -844,7 +846,6 @@ def USE_SGPR_64 : InstSI < > { field bits<32> Inst = 0; - let PreloadReg = 1; } def VS_LOAD_BUFFER_INDEX : InstSI < @@ -854,7 +855,6 @@ def VS_LOAD_BUFFER_INDEX : InstSI < [(set VReg_32:$dst, (int_SI_vs_load_buffer_index))]> { field bits<32> Inst = 0; - let PreloadReg = 1; } } // end usesCustomInserter diff --git a/src/gallium/drivers/radeon/SIIntrinsics.td b/src/gallium/drivers/radeon/SIIntrinsics.td index e3014e13916..4d23072d4f1 100644 --- a/src/gallium/drivers/radeon/SIIntrinsics.td +++ b/src/gallium/drivers/radeon/SIIntrinsics.td @@ -1,4 +1,4 @@ -//===-- SIIntrinsics.td - TODO: Add brief description -------===// +//===-- SIIntrinsics.td - SI Intrinsic defs ----------------*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// SI Intrinsic Definitions // //===----------------------------------------------------------------------===// diff --git a/src/gallium/drivers/radeon/SILowerShaderInstructions.cpp b/src/gallium/drivers/radeon/SILowerShaderInstructions.cpp deleted file mode 100644 index 5d49d88dc7c..00000000000 --- a/src/gallium/drivers/radeon/SILowerShaderInstructions.cpp +++ /dev/null @@ -1,90 +0,0 @@ -//===-- SILowerShaderInstructions.cpp - TODO: Add brief description -------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// TODO: Add full description -// -//===----------------------------------------------------------------------===// - - -#include "AMDGPU.h" -#include "AMDGPULowerShaderInstructions.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" - -using namespace llvm; - -namespace { - class SILowerShaderInstructionsPass : public MachineFunctionPass, - public AMDGPULowerShaderInstructionsPass { - - private: - static char ID; - TargetMachine &TM; - - public: - SILowerShaderInstructionsPass(TargetMachine &tm) : - MachineFunctionPass(ID), TM(tm) { } - - bool runOnMachineFunction(MachineFunction &MF); - - const char *getPassName() const { return "SI Lower Shader Instructions"; } - - void lowerRETURN(MachineBasicBlock &MBB, MachineBasicBlock::iterator I); - void lowerSET_M0(MachineInstr &MI, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I); - }; -} /* End anonymous namespace */ - -char SILowerShaderInstructionsPass::ID = 0; - -FunctionPass *llvm::createSILowerShaderInstructionsPass(TargetMachine &tm) { - return new SILowerShaderInstructionsPass(tm); -} - -bool SILowerShaderInstructionsPass::runOnMachineFunction(MachineFunction &MF) -{ - MRI = &MF.getRegInfo(); - for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); - BB != BB_E; ++BB) { - MachineBasicBlock &MBB = *BB; - for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I); - I != MBB.end(); I = Next, Next = llvm::next(I) ) { - MachineInstr &MI = *I; - switch (MI.getOpcode()) { - case AMDIL::RETURN: - lowerRETURN(MBB, I); - break; - case AMDIL::SET_M0: - lowerSET_M0(MI, MBB, I); - break; - default: continue; - } - MI.removeFromParent(); - } - } - - return false; -} - -void SILowerShaderInstructionsPass::lowerRETURN(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) -{ - const struct TargetInstrInfo * TII = TM.getInstrInfo(); - BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::S_ENDPGM)); -} - -void SILowerShaderInstructionsPass::lowerSET_M0(MachineInstr &MI, - MachineBasicBlock &MBB, MachineBasicBlock::iterator I) -{ - const struct TargetInstrInfo * TII = TM.getInstrInfo(); - BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDIL::S_MOV_IMM_I32)) - .addReg(AMDIL::M0) - .addOperand(MI.getOperand(1)); -} diff --git a/src/gallium/drivers/radeon/SIMachineFunctionInfo.cpp b/src/gallium/drivers/radeon/SIMachineFunctionInfo.cpp index eace40c226c..40ba76f1f86 100644 --- a/src/gallium/drivers/radeon/SIMachineFunctionInfo.cpp +++ b/src/gallium/drivers/radeon/SIMachineFunctionInfo.cpp @@ -1,4 +1,4 @@ -//===-- SIMachineFunctionInfo.cpp - TODO: Add brief description -------===// +//===-- SIMachineFunctionInfo.cpp - SI Machine Function Info -------===// // // The LLVM Compiler Infrastructure // @@ -6,10 +6,6 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// TODO: Add full description -// -//===----------------------------------------------------------------------===// #include "SIMachineFunctionInfo.h" diff --git a/src/gallium/drivers/radeon/SIMachineFunctionInfo.h b/src/gallium/drivers/radeon/SIMachineFunctionInfo.h index 5647de9d81f..46a021f3613 100644 --- a/src/gallium/drivers/radeon/SIMachineFunctionInfo.h +++ b/src/gallium/drivers/radeon/SIMachineFunctionInfo.h @@ -1,4 +1,4 @@ -//===-- SIMachineFunctionInfo.h - TODO: Add brief description -------===// +//===- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface -*- C++ -*-==// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,9 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// SIMachineFunctionInfo is used to keep track of the spi_sp_input_addr config +// register, which is to tell the hardware which interpolation parameters to +// load. // //===----------------------------------------------------------------------===// diff --git a/src/gallium/drivers/radeon/SIPropagateImmReads.cpp b/src/gallium/drivers/radeon/SIPropagateImmReads.cpp index 4f925d5de1c..6a165488831 100644 --- a/src/gallium/drivers/radeon/SIPropagateImmReads.cpp +++ b/src/gallium/drivers/radeon/SIPropagateImmReads.cpp @@ -1,4 +1,4 @@ -//===-- SIPropagateImmReads.cpp - TODO: Add brief description -------===// +//===-- SIPropagateImmReads.cpp - Lower Immediate Reads Pass --------------===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,9 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// We can't do this in the ConvertToISA pass, because later passes might +// create LOADCONST_* instructions that we would miss. This is why we need +// a separate pass for this. // //===----------------------------------------------------------------------===// diff --git a/src/gallium/drivers/radeon/SIRegisterInfo.cpp b/src/gallium/drivers/radeon/SIRegisterInfo.cpp index da2ec36a773..2d530a4f022 100644 --- a/src/gallium/drivers/radeon/SIRegisterInfo.cpp +++ b/src/gallium/drivers/radeon/SIRegisterInfo.cpp @@ -1,4 +1,4 @@ -//===-- SIRegisterInfo.cpp - TODO: Add brief description -------===// +//===-- SIRegisterInfo.cpp - SI Register Information ---------------------===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// This file contains the SI implementation of the TargetRegisterInfo class. // //===----------------------------------------------------------------------===// diff --git a/src/gallium/drivers/radeon/SIRegisterInfo.h b/src/gallium/drivers/radeon/SIRegisterInfo.h index c797e3c8ace..77f3261efc5 100644 --- a/src/gallium/drivers/radeon/SIRegisterInfo.h +++ b/src/gallium/drivers/radeon/SIRegisterInfo.h @@ -1,4 +1,4 @@ -//===-- SIRegisterInfo.h - TODO: Add brief description -------===// +//===-- SIRegisterInfo.h - SI Register Info Interface ----------*- C++ -*--===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// Interface definition for SIRegisterInfo // //===----------------------------------------------------------------------===// diff --git a/src/gallium/drivers/radeon/SISchedule.td b/src/gallium/drivers/radeon/SISchedule.td index 9e99268e9ca..28b65b82585 100644 --- a/src/gallium/drivers/radeon/SISchedule.td +++ b/src/gallium/drivers/radeon/SISchedule.td @@ -1,4 +1,4 @@ -//===-- SISchedule.td - TODO: Add brief description -------===// +//===-- SISchedule.td - SI Scheduling definitons -------------------------===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// TODO: Add full description +// TODO: This is just a place holder for now. // //===----------------------------------------------------------------------===// diff --git a/src/gallium/drivers/radeon/radeon_llvm.h b/src/gallium/drivers/radeon/radeon_llvm.h index 9be7f90c3e6..4a706397fdd 100644 --- a/src/gallium/drivers/radeon/radeon_llvm.h +++ b/src/gallium/drivers/radeon/radeon_llvm.h @@ -36,6 +36,8 @@ #define RADEON_LLVM_MAX_BRANCH_DEPTH 16 #define RADEON_LLVM_MAX_LOOP_DEPTH 16 +#define RADEON_LLVM_MAX_SYSTEM_VALUES 4 + struct radeon_llvm_branch { LLVMBasicBlockRef endif_block; LLVMBasicBlockRef if_block; @@ -78,6 +80,9 @@ struct radeon_llvm_context { unsigned input_index, const struct tgsi_full_declaration *decl); + void (*load_system_value)(struct radeon_llvm_context *, + unsigned index, + const struct tgsi_full_declaration *decl); /** User data to use with the callbacks */ void * userdata; @@ -90,6 +95,8 @@ struct radeon_llvm_context { LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS][TGSI_NUM_CHANNELS]; unsigned output_reg_count; + LLVMValueRef system_values[RADEON_LLVM_MAX_SYSTEM_VALUES]; + unsigned reserved_reg_count; /*=== Private Members ===*/ @@ -105,6 +112,37 @@ struct radeon_llvm_context { struct gallivm_state gallivm; }; +static inline LLVMValueRef bitcast( + struct lp_build_tgsi_context * bld_base, + enum tgsi_opcode_type type, + LLVMValueRef value +) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + LLVMContextRef ctx = bld_base->base.gallivm->context; + LLVMTypeRef dst_type; + + switch (type) { + case TGSI_TYPE_UNSIGNED: + case TGSI_TYPE_SIGNED: + dst_type = LLVMInt32TypeInContext(ctx); + break; + case TGSI_TYPE_UNTYPED: + case TGSI_TYPE_FLOAT: + dst_type = LLVMFloatTypeInContext(ctx); + break; + default: + dst_type = 0; + break; + } + + if (dst_type) + return LLVMBuildBitCast(builder, value, dst_type, ""); + else + return value; +} + + void radeon_llvm_context_init(struct radeon_llvm_context * ctx); void radeon_llvm_dispose(struct radeon_llvm_context * ctx); diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.cpp b/src/gallium/drivers/radeon/radeon_llvm_emit.cpp index b409cb2175e..ebc32106b52 100644 --- a/src/gallium/drivers/radeon/radeon_llvm_emit.cpp +++ b/src/gallium/drivers/radeon/radeon_llvm_emit.cpp @@ -93,19 +93,20 @@ radeon_llvm_compile(LLVMModuleRef M, unsigned char ** bytes, AMDGPUTriple.setArch(Arch); Module * mod = unwrap(M); - std::string FS = gpu_family; + std::string FS; TargetOptions TO; + if (dump) { + mod->dump(); + FS += "+DumpCode"; + } + std::auto_ptr<TargetMachine> tm(AMDGPUTarget->createTargetMachine( - AMDGPUTriple.getTriple(), gpu_family, "" /* Features */, + AMDGPUTriple.getTriple(), gpu_family, FS, TO, Reloc::Default, CodeModel::Default, CodeGenOpt::Default )); TargetMachine &AMDGPUTargetMachine = *tm.get(); - /* XXX: Use TargetMachine.Options in 3.0 */ - if (dump) { - mod->dump(); - } PassManager PM; PM.add(new TargetData(*AMDGPUTargetMachine.getTargetData())); PM.add(createPromoteMemoryToRegisterPass()); diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c index 62de9da28de..6e6fc3d12cd 100644 --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c @@ -29,6 +29,7 @@ #include "gallivm/lp_bld_gather.h" #include "gallivm/lp_bld_flow.h" #include "gallivm/lp_bld_init.h" +#include "gallivm/lp_bld_intr.h" #include "gallivm/lp_bld_swizzle.h" #include "tgsi/tgsi_info.h" #include "tgsi/tgsi_parse.h" @@ -112,8 +113,25 @@ emit_fetch_immediate( enum tgsi_opcode_type type, unsigned swizzle) { + LLVMTypeRef ctype; + LLVMContextRef ctx = bld_base->base.gallivm->context; + + switch (type) { + case TGSI_TYPE_UNSIGNED: + case TGSI_TYPE_SIGNED: + ctype = LLVMInt32TypeInContext(ctx); + break; + case TGSI_TYPE_UNTYPED: + case TGSI_TYPE_FLOAT: + ctype = LLVMFloatTypeInContext(ctx); + break; + default: + ctype = 0; + break; + } + struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); - return bld->immediates[reg->Register.Index][swizzle]; + return LLVMConstBitCast(bld->immediates[reg->Register.Index][swizzle], ctype); } static LLVMValueRef @@ -134,7 +152,7 @@ emit_fetch_input( return lp_build_gather_values(bld_base->base.gallivm, values, TGSI_NUM_CHANNELS); } else { - return ctx->inputs[radeon_llvm_reg_index_soa(reg->Register.Index, swizzle)]; + return bitcast(bld_base, type, ctx->inputs[radeon_llvm_reg_index_soa(reg->Register.Index, swizzle)]); } } @@ -155,7 +173,7 @@ emit_fetch_temporary( } else { LLVMValueRef temp_ptr; temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle); - return LLVMBuildLoad(builder, temp_ptr, ""); + return bitcast(bld_base,type,LLVMBuildLoad(builder, temp_ptr, "")); } } @@ -213,6 +231,15 @@ static void emit_declaration( } break; + case TGSI_FILE_SYSTEM_VALUE: + { + unsigned idx; + for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) { + ctx->load_system_value(ctx, idx, decl); + } + } + break; + case TGSI_FILE_OUTPUT: { unsigned idx; @@ -304,6 +331,9 @@ emit_store( default: return; } + + value = bitcast(bld_base, TGSI_TYPE_FLOAT, value); + LLVMBuildStore(builder, value, temp_ptr); } } @@ -444,8 +474,10 @@ static void if_emit( struct gallivm_state * gallivm = bld_base->base.gallivm; LLVMValueRef cond; LLVMBasicBlockRef if_block, else_block, endif_block; - cond = LLVMBuildFCmp(gallivm->builder, LLVMRealOEQ, emit_data->args[0], - bld_base->base.one, ""); + + cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, + bitcast(bld_base, TGSI_TYPE_UNSIGNED, emit_data->args[0]), + bld_base->int_bld.zero, ""); endif_block = LLVMAppendBasicBlockInContext(gallivm->context, ctx->main_fn, "ENDIF"); @@ -463,6 +495,101 @@ static void if_emit( ctx->branch[ctx->branch_depth - 1].has_else = 0; } +static void kil_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + unsigned i; + for (i = 0; i < emit_data->arg_count; i++) { + emit_data->output[i] = lp_build_intrinsic_unary( + bld_base->base.gallivm->builder, + action->intr_name, + emit_data->dst_type, emit_data->args[i]); + } +} + + +static void emit_prepare_cube_coords( + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + boolean shadowcube = (emit_data->inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE); + struct gallivm_state * gallivm = bld_base->base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + LLVMTypeRef type = bld_base->base.elem_type; + LLVMValueRef coords[4]; + LLVMValueRef mad_args[3]; + unsigned i, cnt; + + LLVMValueRef v = lp_build_intrinsic(builder, "llvm.AMDGPU.cube", + LLVMVectorType(type, 4), + &emit_data->args[0],1); + + /* save src.w for shadow cube */ + cnt = shadowcube ? 3 : 4; + + for (i = 0; i < cnt; ++i) { + LLVMValueRef idx = lp_build_const_int32(gallivm, i); + coords[i] = LLVMBuildExtractElement(builder, v, idx, ""); + } + + coords[2] = lp_build_intrinsic(builder, "llvm.AMDIL.fabs.", + type, &coords[2], 1); + coords[2] = lp_build_intrinsic(builder, "llvm.AMDGPU.rcp", + type, &coords[2], 1); + + mad_args[1] = coords[2]; + mad_args[2] = LLVMConstReal(type, 1.5); + + mad_args[0] = coords[0]; + coords[0] = lp_build_intrinsic(builder, "llvm.AMDIL.mad.", + type, mad_args, 3); + + mad_args[0] = coords[1]; + coords[1] = lp_build_intrinsic(builder, "llvm.AMDIL.mad.", + type, mad_args, 3); + + /* apply yxwy swizzle to cooords */ + coords[2] = coords[3]; + coords[3] = coords[1]; + coords[1] = coords[0]; + coords[0] = coords[3]; + + emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm, + coords, 4); +} + +static void txp_fetch_args( + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + const struct tgsi_full_instruction * inst = emit_data->inst; + LLVMValueRef src_w; + unsigned chan; + LLVMValueRef coords[4]; + + emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4); + src_w = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W); + + for (chan = 0; chan < 3; chan++ ) { + LLVMValueRef arg = lp_build_emit_fetch(bld_base, + emit_data->inst, 0, chan); + coords[chan] = lp_build_emit_llvm_binary(bld_base, + TGSI_OPCODE_DIV, arg, src_w); + } + coords[3] = bld_base->base.one; + emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm, + coords, 4); + emit_data->arg_count = 1; + + if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE || + inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE) && + inst->Instruction.Opcode != TGSI_OPCODE_TXQ) { + emit_prepare_cube_coords(bld_base, emit_data); + } +} + static void tex_fetch_args( struct lp_build_tgsi_context * bld_base, struct lp_build_emit_data * emit_data) @@ -475,16 +602,261 @@ static void tex_fetch_args( */ + const struct tgsi_full_instruction * inst = emit_data->inst; + LLVMValueRef coords[4]; unsigned chan; for (chan = 0; chan < 4; chan++) { - coords[chan] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, chan); + coords[chan] = lp_build_emit_fetch(bld_base, inst, 0, chan); } emit_data->arg_count = 1; emit_data->args[0] = lp_build_gather_values(bld_base->base.gallivm, coords, 4); emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4); + + if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE || + inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE) && + inst->Instruction.Opcode != TGSI_OPCODE_TXQ) { + emit_prepare_cube_coords(bld_base, emit_data); + } +} + +static void emit_icmp( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + unsigned pred; + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + LLVMContextRef context = bld_base->base.gallivm->context; + + switch (emit_data->inst->Instruction.Opcode) { + case TGSI_OPCODE_USEQ: pred = LLVMIntEQ; break; + case TGSI_OPCODE_USNE: pred = LLVMIntNE; break; + case TGSI_OPCODE_USGE: pred = LLVMIntUGE; break; + case TGSI_OPCODE_USLT: pred = LLVMIntULT; break; + case TGSI_OPCODE_ISGE: pred = LLVMIntSGE; break; + case TGSI_OPCODE_ISLT: pred = LLVMIntSLT; break; + default: + assert(!"unknown instruction"); + } + + LLVMValueRef v = LLVMBuildICmp(builder, pred, + emit_data->args[0], emit_data->args[1],""); + + v = LLVMBuildSExtOrBitCast(builder, v, + LLVMInt32TypeInContext(context), ""); + + emit_data->output[emit_data->chan] = v; +} + +static void emit_not( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + LLVMValueRef v = bitcast(bld_base, TGSI_TYPE_UNSIGNED, + emit_data->args[0]); + emit_data->output[emit_data->chan] = LLVMBuildNot(builder, v, ""); +} + +static void emit_and( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + emit_data->output[emit_data->chan] = LLVMBuildAnd(builder, + emit_data->args[0], emit_data->args[1], ""); +} + +static void emit_or( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + emit_data->output[emit_data->chan] = LLVMBuildOr(builder, + emit_data->args[0], emit_data->args[1], ""); +} + +static void emit_uadd( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + emit_data->output[emit_data->chan] = LLVMBuildAdd(builder, + emit_data->args[0], emit_data->args[1], ""); +} + +static void emit_udiv( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + emit_data->output[emit_data->chan] = LLVMBuildUDiv(builder, + emit_data->args[0], emit_data->args[1], ""); +} + +static void emit_idiv( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + emit_data->output[emit_data->chan] = LLVMBuildSDiv(builder, + emit_data->args[0], emit_data->args[1], ""); +} + +static void emit_mod( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + emit_data->output[emit_data->chan] = LLVMBuildSRem(builder, + emit_data->args[0], emit_data->args[1], ""); +} + +static void emit_umod( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + emit_data->output[emit_data->chan] = LLVMBuildURem(builder, + emit_data->args[0], emit_data->args[1], ""); +} + +static void emit_shl( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + emit_data->output[emit_data->chan] = LLVMBuildShl(builder, + emit_data->args[0], emit_data->args[1], ""); +} + +static void emit_ushr( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + emit_data->output[emit_data->chan] = LLVMBuildLShr(builder, + emit_data->args[0], emit_data->args[1], ""); +} +static void emit_ishr( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + emit_data->output[emit_data->chan] = LLVMBuildAShr(builder, + emit_data->args[0], emit_data->args[1], ""); +} + +static void emit_xor( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + emit_data->output[emit_data->chan] = LLVMBuildXor(builder, + emit_data->args[0], emit_data->args[1], ""); +} + +static void emit_ssg( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + + LLVMValueRef cmp, val; + + if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ISSG) { + cmp = LLVMBuildICmp(builder, LLVMIntSGT, emit_data->args[0], bld_base->int_bld.zero, ""); + val = LLVMBuildSelect(builder, cmp, bld_base->int_bld.one, emit_data->args[0], ""); + cmp = LLVMBuildICmp(builder, LLVMIntSGE, val, bld_base->int_bld.zero, ""); + val = LLVMBuildSelect(builder, cmp, val, LLVMConstInt(bld_base->int_bld.elem_type, -1, true), ""); + } else { // float SSG + cmp = LLVMBuildFCmp(builder, LLVMRealUGT, emit_data->args[0], bld_base->int_bld.zero, ""); + val = LLVMBuildSelect(builder, cmp, bld_base->base.one, emit_data->args[0], ""); + cmp = LLVMBuildFCmp(builder, LLVMRealUGE, val, bld_base->base.zero, ""); + val = LLVMBuildSelect(builder, cmp, val, LLVMConstReal(bld_base->base.elem_type, -1), ""); + } + + emit_data->output[emit_data->chan] = val; +} + +static void emit_ineg( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + emit_data->output[emit_data->chan] = LLVMBuildNeg(builder, + emit_data->args[0], ""); +} + +static void emit_f2i( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + emit_data->output[emit_data->chan] = LLVMBuildFPToSI(builder, + emit_data->args[0], bld_base->int_bld.elem_type, ""); +} + +static void emit_f2u( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + emit_data->output[emit_data->chan] = LLVMBuildFPToUI(builder, + emit_data->args[0], bld_base->uint_bld.elem_type, ""); +} + +static void emit_i2f( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + emit_data->output[emit_data->chan] = LLVMBuildSIToFP(builder, + emit_data->args[0], bld_base->base.elem_type, ""); +} + +static void emit_u2f( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + emit_data->output[emit_data->chan] = LLVMBuildUIToFP(builder, + emit_data->args[0], bld_base->base.elem_type, ""); +} + +static void emit_immediate(struct lp_build_tgsi_context * bld_base, + const struct tgsi_full_immediate *imm) +{ + unsigned i; + struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); + + for (i = 0; i < 4; ++i) { + ctx->soa.immediates[ctx->soa.num_immediates][i] = + LLVMConstInt(bld_base->uint_bld.elem_type, imm->u[i].Uint, false ); + } + + ctx->soa.num_immediates++; } void radeon_llvm_context_init(struct radeon_llvm_context * ctx) @@ -526,12 +898,13 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx) lp_build_context_init(&bld_base->base, &ctx->gallivm, type); lp_build_context_init(&ctx->soa.bld_base.uint_bld, &ctx->gallivm, lp_uint_type(type)); + lp_build_context_init(&ctx->soa.bld_base.int_bld, &ctx->gallivm, lp_int_type(type)); bld_base->soa = 1; bld_base->emit_store = emit_store; bld_base->emit_swizzle = emit_swizzle; bld_base->emit_declaration = emit_declaration; - bld_base->emit_immediate = lp_emit_immediate_soa; + bld_base->emit_immediate = emit_immediate; bld_base->emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate; bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input; @@ -545,6 +918,60 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx) lp_set_default_actions(bld_base); + bld_base->op_actions[TGSI_OPCODE_IABS].emit = lp_build_tgsi_intrinsic; + bld_base->op_actions[TGSI_OPCODE_IABS].intr_name = "llvm.AMDIL.abs."; + bld_base->op_actions[TGSI_OPCODE_NOT].emit = emit_not; + bld_base->op_actions[TGSI_OPCODE_AND].emit = emit_and; + bld_base->op_actions[TGSI_OPCODE_XOR].emit = emit_xor; + bld_base->op_actions[TGSI_OPCODE_OR].emit = emit_or; + bld_base->op_actions[TGSI_OPCODE_UADD].emit = emit_uadd; + bld_base->op_actions[TGSI_OPCODE_UDIV].emit = emit_udiv; + bld_base->op_actions[TGSI_OPCODE_IDIV].emit = emit_idiv; + bld_base->op_actions[TGSI_OPCODE_MOD].emit = emit_mod; + bld_base->op_actions[TGSI_OPCODE_UMOD].emit = emit_umod; + bld_base->op_actions[TGSI_OPCODE_INEG].emit = emit_ineg; + bld_base->op_actions[TGSI_OPCODE_SHL].emit = emit_shl; + bld_base->op_actions[TGSI_OPCODE_ISHR].emit = emit_ishr; + bld_base->op_actions[TGSI_OPCODE_USHR].emit = emit_ushr; + bld_base->op_actions[TGSI_OPCODE_SSG].emit = emit_ssg; + bld_base->op_actions[TGSI_OPCODE_ISSG].emit = emit_ssg; + bld_base->op_actions[TGSI_OPCODE_I2F].emit = emit_i2f; + bld_base->op_actions[TGSI_OPCODE_U2F].emit = emit_u2f; + bld_base->op_actions[TGSI_OPCODE_F2I].emit = emit_f2i; + bld_base->op_actions[TGSI_OPCODE_F2U].emit = emit_f2u; + bld_base->op_actions[TGSI_OPCODE_DDX].intr_name = "llvm.AMDGPU.ddx"; + bld_base->op_actions[TGSI_OPCODE_DDX].fetch_args = tex_fetch_args; + bld_base->op_actions[TGSI_OPCODE_DDY].intr_name = "llvm.AMDGPU.ddy"; + bld_base->op_actions[TGSI_OPCODE_DDY].fetch_args = tex_fetch_args; + bld_base->op_actions[TGSI_OPCODE_USEQ].emit = emit_icmp; + bld_base->op_actions[TGSI_OPCODE_USGE].emit = emit_icmp; + bld_base->op_actions[TGSI_OPCODE_USLT].emit = emit_icmp; + bld_base->op_actions[TGSI_OPCODE_USNE].emit = emit_icmp; + bld_base->op_actions[TGSI_OPCODE_ISGE].emit = emit_icmp; + bld_base->op_actions[TGSI_OPCODE_ISLT].emit = emit_icmp; + bld_base->op_actions[TGSI_OPCODE_ROUND].emit = lp_build_tgsi_intrinsic; + bld_base->op_actions[TGSI_OPCODE_ROUND].intr_name = "llvm.AMDIL.round.nearest."; + bld_base->op_actions[TGSI_OPCODE_MIN].emit = lp_build_tgsi_intrinsic; + bld_base->op_actions[TGSI_OPCODE_MIN].intr_name = "llvm.AMDIL.min."; + bld_base->op_actions[TGSI_OPCODE_MAX].emit = lp_build_tgsi_intrinsic; + bld_base->op_actions[TGSI_OPCODE_MAX].intr_name = "llvm.AMDIL.max."; + bld_base->op_actions[TGSI_OPCODE_IMIN].emit = lp_build_tgsi_intrinsic; + bld_base->op_actions[TGSI_OPCODE_IMIN].intr_name = "llvm.AMDGPU.imin"; + bld_base->op_actions[TGSI_OPCODE_IMAX].emit = lp_build_tgsi_intrinsic; + bld_base->op_actions[TGSI_OPCODE_IMAX].intr_name = "llvm.AMDGPU.imax"; + bld_base->op_actions[TGSI_OPCODE_UMIN].emit = lp_build_tgsi_intrinsic; + bld_base->op_actions[TGSI_OPCODE_UMIN].intr_name = "llvm.AMDGPU.umin"; + bld_base->op_actions[TGSI_OPCODE_UMAX].emit = lp_build_tgsi_intrinsic; + bld_base->op_actions[TGSI_OPCODE_UMAX].intr_name = "llvm.AMDGPU.umax"; + bld_base->op_actions[TGSI_OPCODE_TXF].fetch_args = tex_fetch_args; + bld_base->op_actions[TGSI_OPCODE_TXF].intr_name = "llvm.AMDGPU.txf"; + bld_base->op_actions[TGSI_OPCODE_TXQ].fetch_args = tex_fetch_args; + bld_base->op_actions[TGSI_OPCODE_TXQ].intr_name = "llvm.AMDGPU.txq"; + bld_base->op_actions[TGSI_OPCODE_CEIL].emit = lp_build_tgsi_intrinsic; + bld_base->op_actions[TGSI_OPCODE_CEIL].intr_name = "llvm.AMDIL.round.neginf."; + + + bld_base->op_actions[TGSI_OPCODE_ABS].emit = lp_build_tgsi_intrinsic; bld_base->op_actions[TGSI_OPCODE_ABS].intr_name = "llvm.AMDIL.fabs."; bld_base->op_actions[TGSI_OPCODE_ARL].emit = lp_build_tgsi_intrinsic; @@ -558,10 +985,6 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx) bld_base->op_actions[TGSI_OPCODE_CMP].intr_name = "llvm.AMDGPU.cndlt"; bld_base->op_actions[TGSI_OPCODE_COS].emit = lp_build_tgsi_intrinsic; bld_base->op_actions[TGSI_OPCODE_COS].intr_name = "llvm.AMDGPU.cos"; - bld_base->op_actions[TGSI_OPCODE_DDX].emit = lp_build_tgsi_intrinsic; - bld_base->op_actions[TGSI_OPCODE_DDX].intr_name = "llvm.AMDGPU.ddx"; - bld_base->op_actions[TGSI_OPCODE_DDY].emit = lp_build_tgsi_intrinsic; - bld_base->op_actions[TGSI_OPCODE_DDY].intr_name = "llvm.AMDGPU.ddy"; bld_base->op_actions[TGSI_OPCODE_DIV].emit = lp_build_tgsi_intrinsic; bld_base->op_actions[TGSI_OPCODE_DIV].intr_name = "llvm.AMDGPU.div"; bld_base->op_actions[TGSI_OPCODE_ELSE].emit = else_emit; @@ -574,7 +997,7 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx) bld_base->op_actions[TGSI_OPCODE_FRC].emit = lp_build_tgsi_intrinsic; bld_base->op_actions[TGSI_OPCODE_FRC].intr_name = "llvm.AMDIL.fraction."; bld_base->op_actions[TGSI_OPCODE_IF].emit = if_emit; - bld_base->op_actions[TGSI_OPCODE_KIL].emit = lp_build_tgsi_intrinsic; + bld_base->op_actions[TGSI_OPCODE_KIL].emit = kil_emit; bld_base->op_actions[TGSI_OPCODE_KIL].intr_name = "llvm.AMDGPU.kill"; bld_base->op_actions[TGSI_OPCODE_KILP].emit = lp_build_tgsi_intrinsic; bld_base->op_actions[TGSI_OPCODE_KILP].intr_name = "llvm.AMDGPU.kilp"; @@ -597,7 +1020,7 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx) bld_base->op_actions[TGSI_OPCODE_SSG].emit = lp_build_tgsi_intrinsic; bld_base->op_actions[TGSI_OPCODE_SSG].intr_name = "llvm.AMDGPU.ssg"; bld_base->op_actions[TGSI_OPCODE_SGE].emit = lp_build_tgsi_intrinsic; - bld_base->op_actions[TGSI_OPCODE_SGE].intr_name = "llvm.AMDGPU.sge."; + bld_base->op_actions[TGSI_OPCODE_SGE].intr_name = "llvm.AMDGPU.sge"; bld_base->op_actions[TGSI_OPCODE_SEQ].emit = lp_build_tgsi_intrinsic; bld_base->op_actions[TGSI_OPCODE_SEQ].intr_name = "llvm.AMDGPU.seq"; bld_base->op_actions[TGSI_OPCODE_SLE].fetch_args = radeon_llvm_fetch_args_2_reverse_soa; @@ -620,6 +1043,7 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx) bld_base->op_actions[TGSI_OPCODE_TXD].intr_name = "llvm.AMDGPU.txd"; bld_base->op_actions[TGSI_OPCODE_TXL].fetch_args = tex_fetch_args; bld_base->op_actions[TGSI_OPCODE_TXL].intr_name = "llvm.AMDGPU.txl"; + bld_base->op_actions[TGSI_OPCODE_TXP].fetch_args = txp_fetch_args; bld_base->op_actions[TGSI_OPCODE_TXP].intr_name = "llvm.AMDGPU.tex"; bld_base->op_actions[TGSI_OPCODE_TRUNC].emit = lp_build_tgsi_intrinsic; bld_base->op_actions[TGSI_OPCODE_TRUNC].intr_name = "llvm.AMDGPU.trunc"; diff --git a/src/gallium/drivers/radeonsi/evergreen_state.c b/src/gallium/drivers/radeonsi/evergreen_state.c index 75d6cadc6cc..b094248fee1 100644 --- a/src/gallium/drivers/radeonsi/evergreen_state.c +++ b/src/gallium/drivers/radeonsi/evergreen_state.c @@ -1166,24 +1166,6 @@ static void si_delete_sampler_state(struct pipe_context *ctx, free(state); } -static unsigned si_map_swizzle(unsigned swizzle) -{ - switch (swizzle) { - case UTIL_FORMAT_SWIZZLE_Y: - return V_008F1C_SQ_SEL_Y; - case UTIL_FORMAT_SWIZZLE_Z: - return V_008F1C_SQ_SEL_Z; - case UTIL_FORMAT_SWIZZLE_W: - return V_008F1C_SQ_SEL_W; - case UTIL_FORMAT_SWIZZLE_0: - return V_008F1C_SQ_SEL_0; - case UTIL_FORMAT_SWIZZLE_1: - return V_008F1C_SQ_SEL_1; - default: /* UTIL_FORMAT_SWIZZLE_X */ - return V_008F1C_SQ_SEL_X; - } -} - static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_context *ctx, struct pipe_resource *texture, const struct pipe_sampler_view *state) @@ -1259,9 +1241,9 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte va = r600_resource_va(ctx->screen, texture); view->state[0] = (va + tmp->offset[0]) >> 8; - view->state[1] = ((va + tmp->offset[0]) >> 40) & 0xff; - view->state[1] |= (S_008F14_DATA_FORMAT(format) | - S_008F14_NUM_FORMAT(num_format)); + view->state[1] = (S_008F14_BASE_ADDRESS_HI((va + tmp->offset[0]) >> 40) | + S_008F14_DATA_FORMAT(format) | + S_008F14_NUM_FORMAT(num_format)); view->state[2] = (S_008F18_WIDTH(texture->width0 - 1) | S_008F18_HEIGHT(height - 1)); view->state[3] = (S_008F1C_DST_SEL_X(si_map_swizzle(swizzle[0])) | @@ -2087,9 +2069,9 @@ void si_pipe_shader_vs(struct pipe_context *ctx, struct si_pipe_shader *shader) r600_pipe_state_add_reg(rstate, R_02870C_SPI_SHADER_POS_FORMAT, S_02870C_POS0_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP) | - S_02870C_POS1_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP) | - S_02870C_POS2_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP) | - S_02870C_POS3_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP), + S_02870C_POS1_EXPORT_FORMAT(V_02870C_SPI_SHADER_NONE) | + S_02870C_POS2_EXPORT_FORMAT(V_02870C_SPI_SHADER_NONE) | + S_02870C_POS3_EXPORT_FORMAT(V_02870C_SPI_SHADER_NONE), NULL, 0); va = r600_resource_va(ctx->screen, (void *)shader->bo); diff --git a/src/gallium/drivers/radeonsi/r600_state_common.c b/src/gallium/drivers/radeonsi/r600_state_common.c index 53a34ef519c..06eb96b9ee8 100644 --- a/src/gallium/drivers/radeonsi/r600_state_common.c +++ b/src/gallium/drivers/radeonsi/r600_state_common.c @@ -628,12 +628,15 @@ static void r600_vertex_buffer_update(struct r600_context *rctx) ptr[0] = va & 0xFFFFFFFF; ptr[1] = (S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(vertex_buffer->stride)); - ptr[2] = (vertex_buffer->buffer->width0 - offset) / vertex_buffer->stride; - /* XXX: Hardcoding RGBA */ - ptr[3] = (S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | - S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | - S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | - S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | + if (vertex_buffer->stride > 0) + ptr[2] = ((vertex_buffer->buffer->width0 - offset) / + vertex_buffer->stride); + else + ptr[2] = vertex_buffer->buffer->width0 - offset; + ptr[3] = (S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) | + S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) | + S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) | + S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) | S_008F0C_NUM_FORMAT(num_format) | S_008F0C_DATA_FORMAT(data_format)); diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.h b/src/gallium/drivers/radeonsi/radeonsi_pipe.h index ab30892d51a..bba4cf23691 100644 --- a/src/gallium/drivers/radeonsi/radeonsi_pipe.h +++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.h @@ -37,6 +37,7 @@ #include "r600.h" #include "radeonsi_public.h" #include "r600_resource.h" +#include "sid.h" #define R600_MAX_CONST_BUFFERS 1 #define R600_MAX_CONST_BUFFER_SIZE 4096 @@ -467,6 +468,24 @@ static INLINE uint32_t S_FIXED(float value, uint32_t frac_bits) } #define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y)) +static INLINE unsigned si_map_swizzle(unsigned swizzle) +{ + switch (swizzle) { + case UTIL_FORMAT_SWIZZLE_Y: + return V_008F0C_SQ_SEL_Y; + case UTIL_FORMAT_SWIZZLE_Z: + return V_008F0C_SQ_SEL_Z; + case UTIL_FORMAT_SWIZZLE_W: + return V_008F0C_SQ_SEL_W; + case UTIL_FORMAT_SWIZZLE_0: + return V_008F0C_SQ_SEL_0; + case UTIL_FORMAT_SWIZZLE_1: + return V_008F0C_SQ_SEL_1; + default: /* UTIL_FORMAT_SWIZZLE_X */ + return V_008F0C_SQ_SEL_X; + } +} + static inline unsigned r600_tex_aniso_filter(unsigned filter) { if (filter <= 1) return 0; diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.c b/src/gallium/drivers/radeonsi/radeonsi_shader.c index 6425c352d28..0e1a97bba3e 100644 --- a/src/gallium/drivers/radeonsi/radeonsi_shader.c +++ b/src/gallium/drivers/radeonsi/radeonsi_shader.c @@ -199,7 +199,7 @@ static void declare_input_fs( LLVMValueRef attr_number = lp_build_const_int32(gallivm, input_index); /* XXX: Handle all possible interpolation modes */ - switch (decl->Declaration.Interpolate) { + switch (decl->Interp.Interpolate) { case TGSI_INTERPOLATE_COLOR: if (si_shader_ctx->rctx->rasterizer->flatshade) intr_name = "llvm.SI.fs.interp.constant"; @@ -331,14 +331,14 @@ static void si_llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base) i = shader->ninput++; shader->input[i].name = d->Semantic.Name; shader->input[i].sid = d->Semantic.Index; - shader->input[i].interpolate = d->Declaration.Interpolate; - shader->input[i].centroid = d->Declaration.Centroid; + shader->input[i].interpolate = d->Interp.Interpolate; + shader->input[i].centroid = d->Interp.Centroid; break; case TGSI_FILE_OUTPUT: i = shader->noutput++; shader->output[i].name = d->Semantic.Name; shader->output[i].sid = d->Semantic.Index; - shader->output[i].interpolate = d->Declaration.Interpolate; + shader->output[i].interpolate = d->Interp.Interpolate; break; } diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index d54e02e40cd..d4c01759dbe 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -799,7 +799,8 @@ get_texel_2d_array(const struct sp_sampler_variant *samp, const struct pipe_resource *texture = samp->view->texture; unsigned level = addr.bits.level; - assert(layer < texture->array_size); + assert(layer < (int) texture->array_size); + assert(layer >= 0); if (x < 0 || x >= (int) u_minify(texture->width0, level) || y < 0 || y >= (int) u_minify(texture->height0, level)) { @@ -1787,9 +1788,9 @@ img_filter_2d_ewa(struct tgsi_sampler *tgsi_sampler, float weight_buffer[TGSI_QUAD_SIZE]; unsigned buffer_next; int j; - float den;// = 0.0F; + float den; /* = 0.0F; */ float ddq; - float U;// = u0 - tex_u; + float U; /* = u0 - tex_u; */ int v; /* Scale ellipse formula to directly index the Filter Lookup Table. @@ -1805,8 +1806,8 @@ img_filter_2d_ewa(struct tgsi_sampler *tgsi_sampler, * also the same. Note that texel/image access can only be performed using * a quad, i.e. it is not possible to get the pixel value for a single * tex coord. In order to have a better performance, the access is buffered - * using the s_buffer/t_buffer and weight_buffer. Only when the buffer is full, - * then the pixel values are read from the image. + * using the s_buffer/t_buffer and weight_buffer. Only when the buffer is + * full, then the pixel values are read from the image. */ ddq = 2 * A; @@ -1834,7 +1835,9 @@ img_filter_2d_ewa(struct tgsi_sampler *tgsi_sampler, int u; for (u = u0; u <= u1; ++u) { - /* Note that the ellipse has been pre-scaled so F = WEIGHT_LUT_SIZE - 1 */ + /* Note that the ellipse has been pre-scaled so F = + * WEIGHT_LUT_SIZE - 1 + */ if (q < WEIGHT_LUT_SIZE) { /* as a LUT is used, q must never be negative; * should not happen, though @@ -1873,10 +1876,11 @@ img_filter_2d_ewa(struct tgsi_sampler *tgsi_sampler, } } - /* if the tex coord buffer contains unread values, we will read them now. - * Note that in most cases we have to read more pixel values than required, - * however, as the img_filter_2d_nearest function(s) does not have a count - * parameter, we need to read the whole quad and ignore the unused values + /* if the tex coord buffer contains unread values, we will read + * them now. Note that in most cases we have to read more pixel + * values than required, however, as the img_filter_2d_nearest + * function(s) does not have a count parameter, we need to read + * the whole quad and ignore the unused values */ if (buffer_next > 0) { unsigned jj; @@ -1895,11 +1899,9 @@ img_filter_2d_ewa(struct tgsi_sampler *tgsi_sampler, } if (den <= 0.0F) { - /* Reaching this place would mean - * that no pixels intersected the ellipse. - * This should never happen because - * the filter we use always - * intersects at least one pixel. + /* Reaching this place would mean that no pixels intersected + * the ellipse. This should never happen because the filter + * we use always intersects at least one pixel. */ /*rgba[0]=0; @@ -1907,7 +1909,8 @@ img_filter_2d_ewa(struct tgsi_sampler *tgsi_sampler, rgba[2]=0; rgba[3]=0;*/ /* not enough pixels in resampling, resort to direct interpolation */ - samp->min_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba_temp); + samp->min_img_filter(tgsi_sampler, s, t, p, NULL, + tgsi_sampler_lod_bias, rgba_temp); den = 1; num[0] = rgba_temp[0][j]; num[1] = rgba_temp[1][j]; @@ -2020,7 +2023,6 @@ mip_filter_linear_aniso(struct tgsi_sampler *tgsi_sampler, } - /** * Specialized version of mip_filter_linear with hard-wired calls to * 2d lambda calculation and 2d_linear_repeat_POT img filters. @@ -2090,7 +2092,6 @@ mip_filter_linear_2d_linear_repeat_POT( } - /** * Do shadow/depth comparisons. */ @@ -2287,9 +2288,11 @@ sample_cube(struct tgsi_sampler *tgsi_sampler, samp->compare(tgsi_sampler, ssss, tttt, NULL, c0, control, rgba); } -static void do_swizzling(const struct sp_sampler_variant *samp, - float in[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE], - float out[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]) + +static void +do_swizzling(const struct sp_sampler_variant *samp, + float in[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE], + float out[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]) { int j; const unsigned swizzle_r = samp->key.bits.swizzle_r; @@ -2358,6 +2361,7 @@ static void do_swizzling(const struct sp_sampler_variant *samp, } } + static void sample_swizzle(struct tgsi_sampler *tgsi_sampler, const float s[TGSI_QUAD_SIZE], @@ -2464,6 +2468,19 @@ get_linear_wrap(unsigned mode) } +/** + * Is swizzling needed for the given state key? + */ +static INLINE bool +any_swizzle(union sp_sampler_key key) +{ + return (key.bits.swizzle_r != PIPE_SWIZZLE_RED || + key.bits.swizzle_g != PIPE_SWIZZLE_GREEN || + key.bits.swizzle_b != PIPE_SWIZZLE_BLUE || + key.bits.swizzle_a != PIPE_SWIZZLE_ALPHA); +} + + static compute_lambda_func get_lambda_func(const union sp_sampler_key key) { @@ -2590,6 +2607,7 @@ sp_sampler_variant_destroy( struct sp_sampler_variant *samp ) FREE(samp); } + static void sample_get_dims(struct tgsi_sampler *tgsi_sampler, int level, int dims[4]) @@ -2630,35 +2648,43 @@ sample_get_dims(struct tgsi_sampler *tgsi_sampler, int level, } } -/* this function is only used for unfiltered texel gets - via the TGSI TXF opcode. */ +/** + * This function is only used for getting unfiltered texels via the + * TXF opcode. The GL spec says that out-of-bounds texel fetches + * produce undefined results. Instead of crashing, lets just clamp + * coords to the texture image size. + */ static void sample_get_texels(struct tgsi_sampler *tgsi_sampler, - const int v_i[TGSI_QUAD_SIZE], - const int v_j[TGSI_QUAD_SIZE], - const int v_k[TGSI_QUAD_SIZE], - const int lod[TGSI_QUAD_SIZE], - const int8_t offset[3], - float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]) + const int v_i[TGSI_QUAD_SIZE], + const int v_j[TGSI_QUAD_SIZE], + const int v_k[TGSI_QUAD_SIZE], + const int lod[TGSI_QUAD_SIZE], + const int8_t offset[3], + float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]) { const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler); union tex_tile_address addr; const struct pipe_resource *texture = samp->view->texture; int j, c; const float *tx; - bool need_swizzle = (samp->key.bits.swizzle_r != PIPE_SWIZZLE_RED || - samp->key.bits.swizzle_g != PIPE_SWIZZLE_GREEN || - samp->key.bits.swizzle_b != PIPE_SWIZZLE_BLUE || - samp->key.bits.swizzle_a != PIPE_SWIZZLE_ALPHA); + const bool need_swizzle = any_swizzle(samp->key); + int width, height, depth, layers; addr.value = 0; /* TODO write a better test for LOD */ addr.bits.level = lod[0]; + width = u_minify(texture->width0, addr.bits.level); + height = u_minify(texture->height0, addr.bits.level); + depth = u_minify(texture->depth0, addr.bits.level); + layers = texture->array_size; + switch(texture->target) { case PIPE_TEXTURE_1D: for (j = 0; j < TGSI_QUAD_SIZE; j++) { - tx = get_texel_2d(samp, addr, v_i[j] + offset[0], 0); + int x = CLAMP(v_i[j] + offset[0], 0, width - 1); + tx = get_texel_2d(samp, addr, x, 0); for (c = 0; c < 4; c++) { rgba[c][j] = tx[c]; } @@ -2666,8 +2692,9 @@ sample_get_texels(struct tgsi_sampler *tgsi_sampler, break; case PIPE_TEXTURE_1D_ARRAY: for (j = 0; j < TGSI_QUAD_SIZE; j++) { - tx = get_texel_1d_array(samp, addr, v_i[j] + offset[0], - v_j[j] + offset[1]); + int x = CLAMP(v_i[j] + offset[0], 0, width - 1); + int y = CLAMP(v_j[j] + offset[1], 0, layers - 1); + tx = get_texel_1d_array(samp, addr, x, y); for (c = 0; c < 4; c++) { rgba[c][j] = tx[c]; } @@ -2676,8 +2703,9 @@ sample_get_texels(struct tgsi_sampler *tgsi_sampler, case PIPE_TEXTURE_2D: case PIPE_TEXTURE_RECT: for (j = 0; j < TGSI_QUAD_SIZE; j++) { - tx = get_texel_2d(samp, addr, v_i[j] + offset[0], - v_j[j] + offset[1]); + int x = CLAMP(v_i[j] + offset[0], 0, width - 1); + int y = CLAMP(v_j[j] + offset[1], 0, height - 1); + tx = get_texel_2d(samp, addr, x, y); for (c = 0; c < 4; c++) { rgba[c][j] = tx[c]; } @@ -2685,9 +2713,10 @@ sample_get_texels(struct tgsi_sampler *tgsi_sampler, break; case PIPE_TEXTURE_2D_ARRAY: for (j = 0; j < TGSI_QUAD_SIZE; j++) { - tx = get_texel_2d_array(samp, addr, v_i[j] + offset[0], - v_j[j] + offset[1], - v_k[j] + offset[2]); + int x = CLAMP(v_i[j] + offset[0], 0, width - 1); + int y = CLAMP(v_j[j] + offset[1], 0, height - 1); + int layer = CLAMP(v_k[j] + offset[2], 0, layers - 1); + tx = get_texel_2d_array(samp, addr, x, y, layer); for (c = 0; c < 4; c++) { rgba[c][j] = tx[c]; } @@ -2695,9 +2724,11 @@ sample_get_texels(struct tgsi_sampler *tgsi_sampler, break; case PIPE_TEXTURE_3D: for (j = 0; j < TGSI_QUAD_SIZE; j++) { - tx = get_texel_3d(samp, addr, v_i[j] + offset[0], - v_j[j] + offset[1], - v_k[j] + offset[2]); + int x = CLAMP(v_i[j] + offset[0], 0, width - 1); + int y = CLAMP(v_j[j] + offset[1], 0, height - 1); + int z = CLAMP(v_k[j] + offset[2], 0, depth - 1); + + tx = get_texel_3d(samp, addr, x, y, z); for (c = 0; c < 4; c++) { rgba[c][j] = tx[c]; } @@ -2715,6 +2746,8 @@ sample_get_texels(struct tgsi_sampler *tgsi_sampler, do_swizzling(samp, rgba_temp, rgba); } } + + /** * Create a sampler variant for a given set of non-orthogonal state. */ @@ -2830,10 +2863,7 @@ sp_create_sampler_variant( const struct pipe_sampler_state *sampler, samp->sample_target = samp->compare; } - if (key.bits.swizzle_r != PIPE_SWIZZLE_RED || - key.bits.swizzle_g != PIPE_SWIZZLE_GREEN || - key.bits.swizzle_b != PIPE_SWIZZLE_BLUE || - key.bits.swizzle_a != PIPE_SWIZZLE_ALPHA) { + if (any_swizzle(key)) { samp->base.get_samples = sample_swizzle; } else { diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c index ac2d35e5ea4..64ec658b80e 100644 --- a/src/gallium/drivers/svga/svga_screen.c +++ b/src/gallium/drivers/svga/svga_screen.c @@ -241,7 +241,11 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param) case PIPE_CAP_TGSI_CAN_COMPACT_VARYINGS: case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS: case PIPE_CAP_GLSL_FEATURE_LEVEL: + case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY: + case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY: return 0; + case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY: + return 1; default: debug_printf("Unexpected PIPE_CAP_ query %u\n", param); diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c index 5e6d1fbc904..a68912608bc 100644 --- a/src/gallium/drivers/svga/svga_tgsi_insn.c +++ b/src/gallium/drivers/svga/svga_tgsi_insn.c @@ -871,6 +871,31 @@ static boolean emit_floor(struct svga_shader_emitter *emit, } +/* Translate the following TGSI CEIL instruction. + * CEIL DST, SRC + * To the following SVGA3D instruction sequence. + * FRC TMP, -SRC + * ADD DST, SRC, TMP + */ +static boolean emit_ceil(struct svga_shader_emitter *emit, + const struct tgsi_full_instruction *insn) +{ + SVGA3dShaderDestToken dst = translate_dst_register(emit, insn, 0); + const struct src_register src0 = translate_src_register(emit, &insn->Src[0]); + SVGA3dShaderDestToken temp = get_temp(emit); + + /* FRC TMP, -SRC */ + if (!submit_op1(emit, inst_token(SVGA3DOP_FRC), temp, negate(src0))) + return FALSE; + + /* ADD DST, SRC, TMP */ + if (!submit_op2(emit, inst_token(SVGA3DOP_ADD), dst, src0, src(temp))) + return FALSE; + + return TRUE; +} + + /* Translate the following TGSI CMP instruction. * CMP DST, SRC0, SRC1, SRC2 * To the following SVGA3D instruction sequence. @@ -2435,6 +2460,9 @@ static boolean svga_emit_instruction( struct svga_shader_emitter *emit, case TGSI_OPCODE_TRUNC: /* should be TRUNC, not FLR */ return emit_floor( emit, insn ); + case TGSI_OPCODE_CEIL: + return emit_ceil( emit, insn ); + case TGSI_OPCODE_CMP: return emit_cmp( emit, insn ); |