diff options
author | Kenneth Graunke <[email protected]> | 2012-11-26 21:46:27 -0800 |
---|---|---|
committer | Kenneth Graunke <[email protected]> | 2012-11-28 18:15:26 -0800 |
commit | dd50c88386c8220f4631115b68a10930378ead6c (patch) | |
tree | 54f1d672cd67b28e385d8fa353df9e5188ea84a3 /src/mesa/drivers/dri/i965/brw_vec4.cpp | |
parent | 9947470655bbf8f4a9c98fe6d93ff5c3486f1124 (diff) |
i965/vs: Move some functions from brw_vec4_emit.cpp to brw_vec4.cpp.
This leaves only the final code generation stage in brw_vec4_emit.cpp,
moving the payload setup, run(), and brw_vs_emit functions to brw_vec4.cpp.
The fragment shader backend puts these functions in brw_fs.cpp, so this
patch also helps with consistency.
Reviewed-by: Eric Anholt <[email protected]>
Reviewed-by: Anuj Phogat <[email protected]>
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_vec4.cpp')
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4.cpp | 265 |
1 files changed, 265 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 10a8310ff88..227accae45f 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -22,13 +22,18 @@ */ #include "brw_vec4.h" +#include "glsl/ir_print_visitor.h" + extern "C" { #include "main/macros.h" +#include "program/prog_print.h" #include "program/prog_parameter.h" } #define MAX_INSTRUCTION (1 << 30) +using namespace brw; + namespace brw { /** @@ -913,4 +918,264 @@ vec4_visitor::dump_instructions() } } +int +vec4_visitor::setup_attributes(int payload_reg) +{ + int nr_attributes; + int attribute_map[VERT_ATTRIB_MAX + 1]; + + nr_attributes = 0; + for (int i = 0; i < VERT_ATTRIB_MAX; i++) { + if (prog_data->inputs_read & BITFIELD64_BIT(i)) { + attribute_map[i] = payload_reg + nr_attributes; + nr_attributes++; + } + } + + /* VertexID is stored by the VF as the last vertex element, but we + * don't represent it with a flag in inputs_read, so we call it + * VERT_ATTRIB_MAX. + */ + if (prog_data->uses_vertexid) { + attribute_map[VERT_ATTRIB_MAX] = payload_reg + nr_attributes; + nr_attributes++; + } + + foreach_list(node, &this->instructions) { + vec4_instruction *inst = (vec4_instruction *)node; + + /* We have to support ATTR as a destination for GL_FIXED fixup. */ + if (inst->dst.file == ATTR) { + int grf = attribute_map[inst->dst.reg + inst->dst.reg_offset]; + + struct brw_reg reg = brw_vec8_grf(grf, 0); + reg.type = inst->dst.type; + reg.dw1.bits.writemask = inst->dst.writemask; + + inst->dst.file = HW_REG; + inst->dst.fixed_hw_reg = reg; + } + + for (int i = 0; i < 3; i++) { + if (inst->src[i].file != ATTR) + continue; + + int grf = attribute_map[inst->src[i].reg + inst->src[i].reg_offset]; + + struct brw_reg reg = brw_vec8_grf(grf, 0); + reg.dw1.bits.swizzle = inst->src[i].swizzle; + reg.type = inst->src[i].type; + if (inst->src[i].abs) + reg = brw_abs(reg); + if (inst->src[i].negate) + reg = negate(reg); + + inst->src[i].file = HW_REG; + inst->src[i].fixed_hw_reg = reg; + } + } + + /* The BSpec says we always have to read at least one thing from + * the VF, and it appears that the hardware wedges otherwise. + */ + if (nr_attributes == 0) + nr_attributes = 1; + + prog_data->urb_read_length = (nr_attributes + 1) / 2; + + unsigned vue_entries = MAX2(nr_attributes, c->prog_data.vue_map.num_slots); + + if (intel->gen == 6) + c->prog_data.urb_entry_size = ALIGN(vue_entries, 8) / 8; + else + c->prog_data.urb_entry_size = ALIGN(vue_entries, 4) / 4; + + return payload_reg + nr_attributes; +} + +int +vec4_visitor::setup_uniforms(int reg) +{ + /* The pre-gen6 VS requires that some push constants get loaded no + * matter what, or the GPU would hang. + */ + if (intel->gen < 6 && this->uniforms == 0) { + this->uniform_vector_size[this->uniforms] = 1; + + for (unsigned int i = 0; i < 4; i++) { + unsigned int slot = this->uniforms * 4 + i; + static float zero = 0.0; + c->prog_data.param[slot] = &zero; + } + + this->uniforms++; + reg++; + } else { + reg += ALIGN(uniforms, 2) / 2; + } + + c->prog_data.nr_params = this->uniforms * 4; + + c->prog_data.curb_read_length = reg - 1; + + return reg; +} + +void +vec4_visitor::setup_payload(void) +{ + int reg = 0; + + /* The payload always contains important data in g0, which contains + * the URB handles that are passed on to the URB write at the end + * of the thread. So, we always start push constants at g1. + */ + reg++; + + reg = setup_uniforms(reg); + + reg = setup_attributes(reg); + + this->first_non_payload_grf = reg; +} + +bool +vec4_visitor::run() +{ + emit_attribute_fixups(); + + /* Generate VS IR for main(). (the visitor only descends into + * functions called "main"). + */ + if (shader) { + visit_instructions(shader->ir); + } else { + emit_vertex_program_code(); + } + + if (c->key.userclip_active && !c->key.uses_clip_distance) + setup_uniform_clipplane_values(); + + emit_urb_writes(); + + /* Before any optimization, push array accesses out to scratch + * space where we need them to be. This pass may allocate new + * virtual GRFs, so we want to do it early. It also makes sure + * that we have reladdr computations available for CSE, since we'll + * often do repeated subexpressions for those. + */ + if (shader) { + move_grf_array_access_to_scratch(); + move_uniform_array_access_to_pull_constants(); + } else { + /* The ARB_vertex_program frontend emits pull constant loads directly + * rather than using reladdr, so we don't need to walk through all the + * instructions looking for things to move. There isn't anything. + * + * We do still need to split things to vec4 size. + */ + split_uniform_registers(); + } + pack_uniform_registers(); + move_push_constants_to_pull_constants(); + split_virtual_grfs(); + + bool progress; + do { + progress = false; + progress = dead_code_eliminate() || progress; + progress = opt_copy_propagation() || progress; + progress = opt_algebraic() || progress; + progress = opt_compute_to_mrf() || progress; + } while (progress); + + + if (failed) + return false; + + setup_payload(); + + if (false) { + /* Debug of register spilling: Go spill everything. */ + const int grf_count = virtual_grf_count; + float spill_costs[virtual_grf_count]; + bool no_spill[virtual_grf_count]; + evaluate_spill_costs(spill_costs, no_spill); + for (int i = 0; i < grf_count; i++) { + if (no_spill[i]) + continue; + spill_reg(i); + } + } + + while (!reg_allocate()) { + if (failed) + break; + } + + if (failed) + return false; + + brw_set_access_mode(p, BRW_ALIGN_16); + + generate_code(); + + return !failed; +} + } /* namespace brw */ + +extern "C" { + +bool +brw_vs_emit(struct gl_shader_program *prog, struct brw_vs_compile *c) +{ + struct brw_context *brw = c->func.brw; + struct intel_context *intel = &c->func.brw->intel; + bool start_busy = false; + float start_time = 0; + + if (unlikely(INTEL_DEBUG & DEBUG_PERF)) { + start_busy = (intel->batch.last_bo && + drm_intel_bo_busy(intel->batch.last_bo)); + start_time = get_time(); + } + + struct brw_shader *shader = NULL; + if (prog) + shader = (brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX]; + + if (unlikely(INTEL_DEBUG & DEBUG_VS)) { + if (shader) { + printf("GLSL IR for native vertex shader %d:\n", prog->Name); + _mesa_print_ir(shader->ir, NULL); + printf("\n\n"); + } else { + printf("ARB_vertex_program %d for native vertex shader\n", + c->vp->program.Base.Id); + _mesa_print_program(&c->vp->program.Base); + } + } + + if (unlikely(INTEL_DEBUG & DEBUG_PERF) && shader) { + if (shader->compiled_once) { + brw_vs_debug_recompile(brw, prog, &c->key); + } + if (start_busy && !drm_intel_bo_busy(intel->batch.last_bo)) { + perf_debug("VS compile took %.03f ms and stalled the GPU\n", + (get_time() - start_time) * 1000); + } + shader->compiled_once = true; + } + + vec4_visitor v(c, prog, shader); + if (!v.run()) { + prog->LinkStatus = false; + ralloc_strcat(&prog->InfoLog, v.fail_msg); + return false; + } + + return true; +} + +} /* extern "C" */ |