diff options
Diffstat (limited to 'src/mesa/drivers/dri/i965')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.cpp | 355 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm.c | 24 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm.h | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_wm_state.c | 18 |
4 files changed, 381 insertions, 17 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 9a6ee7a0100..5dada65909d 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -39,6 +39,20 @@ extern "C" { #include "../glsl/ir_optimization.h" #include "../glsl/ir_print_visitor.h" +enum register_file { + ARF = BRW_ARCHITECTURE_REGISTER_FILE, + GRF = BRW_GENERAL_REGISTER_FILE, + MRF = BRW_MESSAGE_REGISTER_FILE, + IMM = BRW_IMMEDIATE_VALUE, + BAD_FILE +}; + +enum fs_opcodes { + FS_OPCODE_FB_WRITE = 256, +}; + +static int using_new_fs = -1; + struct gl_shader * brw_new_shader(GLcontext *ctx, GLuint name, GLuint type) { @@ -77,18 +91,31 @@ brw_compile_shader(GLcontext *ctx, struct gl_shader *shader) GLboolean brw_link_shader(GLcontext *ctx, struct gl_shader_program *prog) { - static int using_new_fs = -1; - if (using_new_fs == -1) using_new_fs = getenv("INTEL_NEW_FS") != NULL; for (unsigned i = 0; i < prog->_NumLinkedShaders; i++) { - struct gl_shader *shader = prog->_LinkedShaders[i]; + struct brw_shader *shader = (struct brw_shader *)prog->_LinkedShaders[i]; + + if (using_new_fs && shader->base.Type == GL_FRAGMENT_SHADER) { + void *mem_ctx = talloc_new(NULL); + bool progress; + + shader->ir = new(shader) exec_list; + clone_ir_list(mem_ctx, shader->ir, shader->base.ir); - if (using_new_fs && shader->Type == GL_FRAGMENT_SHADER) { do_mat_op_to_vec(shader->ir); brw_do_channel_expressions(shader->ir); brw_do_vector_splitting(shader->ir); + + do { + progress = false; + + progress = do_common_optimization(shader->ir, true) || progress; + } while (progress); + + reparent_ir(shader->ir, shader); + talloc_free(mem_ctx); } } @@ -97,3 +124,323 @@ brw_link_shader(GLcontext *ctx, struct gl_shader_program *prog) return GL_TRUE; } + +class fs_reg { +public: + fs_reg() + { + this->file = BAD_FILE; + this->reg = 0; + this->hw_reg = -1; + } + + fs_reg(float f) + { + this->file = IMM; + this->reg = 0; + this->hw_reg = 0; + this->type = BRW_REGISTER_TYPE_F; + this->imm.f = f; + } + + fs_reg(int32_t i) + { + this->file = IMM; + this->reg = 0; + this->hw_reg = 0; + this->type = BRW_REGISTER_TYPE_D; + this->imm.i = i; + } + + fs_reg(uint32_t u) + { + this->file = IMM; + this->reg = 0; + this->hw_reg = 0; + this->type = BRW_REGISTER_TYPE_UD; + this->imm.u = u; + } + + fs_reg(enum register_file file, int hw_reg) + { + this->file = file; + this->reg = 0; + this->hw_reg = hw_reg; + this->type = BRW_REGISTER_TYPE_F; + } + + /** Register file: ARF, GRF, MRF, IMM. */ + enum register_file file; + /** Abstract register number. 0 = fixed hw reg */ + int reg; + /** HW register number. Generally unset until register allocation. */ + int hw_reg; + /** Register type. BRW_REGISTER_TYPE_* */ + int type; + + /** Value for file == BRW_IMMMEDIATE_FILE */ + union { + int32_t i; + uint32_t u; + float f; + } imm; +}; + +static const fs_reg reg_undef(BAD_FILE, -1); +static const fs_reg reg_null(ARF, BRW_ARF_NULL); + +class fs_inst : public exec_node { +public: + /* Callers of this talloc-based new need not call delete. It's + * easier to just talloc_free 'ctx' (or any of its ancestors). */ + static void* operator new(size_t size, void *ctx) + { + void *node; + + node = talloc_size(ctx, size); + assert(node != NULL); + + return node; + } + + fs_inst() + { + this->opcode = BRW_OPCODE_NOP; + this->dst = reg_undef; + this->src[0] = reg_undef; + this->src[1] = reg_undef; + } + fs_inst(int opcode, fs_reg dst, fs_reg src0) + { + this->opcode = opcode; + this->dst = dst; + this->src[0] = src0; + this->src[1] = reg_undef; + } + fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1) + { + this->opcode = opcode; + this->dst = dst; + this->src[0] = src0; + this->src[1] = src1; + } + + int opcode; /* BRW_OPCODE_* or FS_OPCODE_* */ + fs_reg dst; + fs_reg src[2]; +}; + +class fs_visitor : public ir_hierarchical_visitor +{ +public: + + fs_visitor(struct brw_wm_compile *c, struct brw_shader *shader) + { + this->c = c; + this->p = &c->func; + this->mem_ctx = talloc_new(NULL); + this->shader = shader; + } + ~fs_visitor() + { + talloc_free(this->mem_ctx); + } + + fs_inst *emit(fs_inst inst); + void generate_code(); + void generate_fb_write(fs_inst *inst); + + void emit_dummy_fs(); + + struct brw_wm_compile *c; + struct brw_compile *p; + struct brw_shader *shader; + void *mem_ctx; + exec_list instructions; + + int grf_used; + +}; + +fs_inst * +fs_visitor::emit(fs_inst inst) +{ + fs_inst *list_inst = new(mem_ctx) fs_inst; + *list_inst = inst; + + this->instructions.push_tail(list_inst); + + return list_inst; +} + +/** Emits a dummy fragment shader consisting of magenta for bringup purposes. */ +void +fs_visitor::emit_dummy_fs() +{ + /* Everyone's favorite color. */ + emit(fs_inst(BRW_OPCODE_MOV, + fs_reg(MRF, 2), + fs_reg(1.0f))); + emit(fs_inst(BRW_OPCODE_MOV, + fs_reg(MRF, 3), + fs_reg(0.0f))); + emit(fs_inst(BRW_OPCODE_MOV, + fs_reg(MRF, 4), + fs_reg(1.0f))); + emit(fs_inst(BRW_OPCODE_MOV, + fs_reg(MRF, 5), + fs_reg(0.0f))); + + fs_inst *write; + write = emit(fs_inst(FS_OPCODE_FB_WRITE, + fs_reg(0), + fs_reg(0))); +} + +void +fs_visitor::generate_fb_write(fs_inst *inst) +{ + GLboolean eot = 1; /* FINISHME: MRT */ + /* FINISHME: AADS */ + + /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied + * move, here's g1. + */ + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_MOV(p, + brw_message_reg(1), + brw_vec8_grf(1, 0)); + brw_pop_insn_state(p); + + int nr = 2 + 4; + + brw_fb_WRITE(p, + 8, /* dispatch_width */ + retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW), + 0, /* base MRF */ + retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW), + 0, /* FINISHME: MRT target */ + nr, + 0, + eot); +} + +void +fs_visitor::generate_code() +{ + this->grf_used = 2; /* header */ + + foreach_iter(exec_list_iterator, iter, this->instructions) { + fs_inst *inst = (fs_inst *)iter.get(); + struct brw_reg src[2], dst; + + for (unsigned int i = 0; i < 2; i++) { + switch (inst->src[i].file) { + case GRF: + case ARF: + case MRF: + src[i] = brw_vec8_reg(inst->src[i].file, + inst->src[i].hw_reg, 0); + src[i] = retype(src[i], inst->src[i].type); + break; + case IMM: + switch (inst->src[i].type) { + case BRW_REGISTER_TYPE_F: + src[i] = brw_imm_f(inst->src[i].imm.f); + break; + case BRW_REGISTER_TYPE_D: + src[i] = brw_imm_f(inst->src[i].imm.i); + break; + case BRW_REGISTER_TYPE_UD: + src[i] = brw_imm_f(inst->src[i].imm.u); + break; + default: + assert(!"not reached"); + break; + } + break; + case BAD_FILE: + /* Probably unused. */ + src[i] = brw_null_reg(); + } + } + dst = brw_vec8_reg(inst->dst.file, inst->dst.hw_reg, 0); + + switch (inst->opcode) { + case BRW_OPCODE_MOV: + brw_MOV(p, dst, src[0]); + break; + case FS_OPCODE_FB_WRITE: + generate_fb_write(inst); + break; + default: + assert(!"not reached"); + } + } +} + +GLboolean +brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c) +{ + struct brw_compile *p = &c->func; + struct intel_context *intel = &brw->intel; + GLcontext *ctx = &intel->ctx; + struct brw_shader *shader = NULL; + struct gl_shader_program *prog = ctx->Shader.CurrentProgram; + + if (!prog) + return GL_FALSE; + + if (!using_new_fs) + return GL_FALSE; + + for (unsigned int i = 0; i < prog->_NumLinkedShaders; i++) { + if (prog->_LinkedShaders[i]->Type == GL_FRAGMENT_SHADER) { + shader = (struct brw_shader *)prog->_LinkedShaders[i]; + break; + } + } + if (!shader) + return GL_FALSE; + + /* We always use 8-wide mode, at least for now. For one, flow + * control only works in 8-wide. Also, when we're fragment shader + * bound, we're almost always under register pressure as well, so + * 8-wide would save us from the performance cliff of spilling + * regs. + */ + c->dispatch_width = 8; + + if (INTEL_DEBUG & DEBUG_WM) { + printf("GLSL IR for native fragment shader %d:\n", prog->Name); + _mesa_print_ir(shader->ir, NULL); + printf("\n"); + } + + /* Now the main event: Visit the shader IR and generate our FS IR for it. + */ + fs_visitor v(c, shader); + visit_list_elements(&v, shader->ir); + + v.emit_dummy_fs(); + + v.generate_code(); + + if (INTEL_DEBUG & DEBUG_WM) { + printf("Native code for fragment shader %d:\n", prog->Name); + for (unsigned int i = 0; i < p->nr_insn; i++) + brw_disasm(stdout, &p->store[i], intel->gen); + printf("\n"); + } + + c->prog_data.nr_params = 0; /* FINISHME */ + c->prog_data.first_curbe_grf = c->key.nr_payload_regs; + c->prog_data.urb_read_length = 1; /* FINISHME: attrs */ + c->prog_data.curb_read_length = 0; /* FINISHME */ + c->prog_data.total_grf = v.grf_used; + c->prog_data.total_scratch = 0; + + return GL_TRUE; +} diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index 34cefeea32a..899e9b1dfb5 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -177,17 +177,19 @@ static void do_wm_prog( struct brw_context *brw, /* temporary sanity check assertion */ ASSERT(fp->isGLSL == brw_wm_is_glsl(&c->fp->program)); - /* - * Shader which use GLSL features such as flow control are handled - * differently from "simple" shaders. - */ - if (fp->isGLSL) { - c->dispatch_width = 8; - brw_wm_glsl_emit(brw, c); - } - else { - c->dispatch_width = 16; - brw_wm_non_glsl_emit(brw, c); + if (!brw_wm_fs_emit(brw, c)) { + /* + * Shader which use GLSL features such as flow control are handled + * differently from "simple" shaders. + */ + if (fp->isGLSL) { + c->dispatch_width = 8; + brw_wm_glsl_emit(brw, c); + } + else { + c->dispatch_width = 16; + brw_wm_non_glsl_emit(brw, c); + } } if (INTEL_DEBUG & DEBUG_WM) diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index 6a761e723b4..2639d4f26b3 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -306,6 +306,7 @@ void brw_wm_lookup_iz( GLuint line_aa, GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp); void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c); +GLboolean brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c); /* brw_wm_emit.c */ void emit_alu1(struct brw_compile *p, diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index c1cf4db1cae..6699d0a73e6 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -104,8 +104,22 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) key->uses_kill = fp->UsesKill || ctx->Color.AlphaEnabled; key->is_glsl = bfp->isGLSL; - /* temporary sanity check assertion */ - ASSERT(bfp->isGLSL == brw_wm_is_glsl(fp)); + /* If using the fragment shader backend, the program is always + * 8-wide. + */ + if (ctx->Shader.CurrentProgram) { + int i; + + for (i = 0; i < ctx->Shader.CurrentProgram->_NumLinkedShaders; i++) { + struct brw_shader *shader = + (struct brw_shader *)ctx->Shader.CurrentProgram->_LinkedShaders[i];; + + if (shader->base.Type == GL_FRAGMENT_SHADER && + shader->ir != NULL) { + key->is_glsl = GL_TRUE; + } + } + } /* _NEW_DEPTH */ key->stats_wm = intel->stats_wm; |