From a8b86459a1bb74cfdf0d63572a9fe194b2b5b53f Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 22 Jul 2011 16:45:15 -0700 Subject: i965/fs: Optimize a * 1.0 -> a. This appears in our instruction stream as a result of the brw_vs_constval.c handling. --- src/mesa/drivers/dri/i965/brw_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'src/mesa/drivers/dri/i965/brw_fs.h') diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 2bf850e5dea..89d6cda7e4f 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -485,6 +485,7 @@ public: void setup_pull_constants(); void calculate_live_intervals(); bool propagate_constants(); + bool opt_algebraic(); bool register_coalesce(); bool compute_to_mrf(); bool dead_code_eliminate(); -- cgit v1.2.3 From ee0373b833155804bb8846c6f05f897b9ee5afa6 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 25 Jul 2011 18:13:04 -0700 Subject: i965/fs: Don't upload unused uniform components. This saves both register space and upload bandwidth for unused values. Note that previously we were relying on the visitor not initially generating references to different sets of uniforms between the 8-wide and 16-wide code generation, and now we're relying on them dead-code eliminating the same stuff, too. --- src/mesa/drivers/dri/i965/brw_fs.cpp | 89 ++++++++++++++++++++++++++++++++++-- src/mesa/drivers/dri/i965/brw_fs.h | 10 +++- 2 files changed, 95 insertions(+), 4 deletions(-) (limited to 'src/mesa/drivers/dri/i965/brw_fs.h') diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 02041b3bc03..f55be022f72 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -242,11 +242,12 @@ import_uniforms_callback(const void *key, * This brings in those uniform definitions */ void -fs_visitor::import_uniforms(struct hash_table *src_variable_ht) +fs_visitor::import_uniforms(fs_visitor *v) { - hash_table_call_foreach(src_variable_ht, + hash_table_call_foreach(v->variable_ht, import_uniforms_callback, variable_ht); + this->params_remap = v->params_remap; } /* Our support for uniforms is piggy-backed on the struct @@ -798,6 +799,86 @@ fs_visitor::split_virtual_grfs() this->live_intervals_valid = false; } +bool +fs_visitor::remove_dead_constants() +{ + if (c->dispatch_width == 8) { + this->params_remap = ralloc_array(mem_ctx, int, c->prog_data.nr_params); + + for (unsigned int i = 0; i < c->prog_data.nr_params; i++) + this->params_remap[i] = -1; + + /* Find which params are still in use. */ + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; + + for (int i = 0; i < 3; i++) { + int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset; + + if (inst->src[i].file != UNIFORM) + continue; + + assert(constant_nr < (int)c->prog_data.nr_params); + + /* For now, set this to non-negative. We'll give it the + * actual new number in a moment, in order to keep the + * register numbers nicely ordered. + */ + this->params_remap[constant_nr] = 0; + } + } + + /* Figure out what the new numbers for the params will be. At some + * point when we're doing uniform array access, we're going to want + * to keep the distinction between .reg and .reg_offset, but for + * now we don't care. + */ + unsigned int new_nr_params = 0; + for (unsigned int i = 0; i < c->prog_data.nr_params; i++) { + if (this->params_remap[i] != -1) { + this->params_remap[i] = new_nr_params++; + } + } + + /* Update the list of params to be uploaded to match our new numbering. */ + for (unsigned int i = 0; i < c->prog_data.nr_params; i++) { + int remapped = this->params_remap[i]; + + if (remapped == -1) + continue; + + /* We've already done setup_paramvalues_refs() so no need to worry + * about param_index and param_offset. + */ + c->prog_data.param[remapped] = c->prog_data.param[i]; + c->prog_data.param_convert[remapped] = c->prog_data.param_convert[i]; + } + + c->prog_data.nr_params = new_nr_params; + } else { + /* This should have been generated in the 8-wide pass already. */ + assert(this->params_remap); + } + + /* Now do the renumbering of the shader to remove unused params. */ + foreach_list(node, &this->instructions) { + fs_inst *inst = (fs_inst *)node; + + for (int i = 0; i < 3; i++) { + int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset; + + if (inst->src[i].file != UNIFORM) + continue; + + assert(this->params_remap[constant_nr] != -1); + inst->src[i].hw_reg = this->params_remap[constant_nr]; + inst->src[i].reg_offset = 0; + } + } + + return true; +} + /** * Choose accesses from the UNIFORM file to demote to using the pull * constant buffer. @@ -1624,6 +1705,8 @@ fs_visitor::run() progress = dead_code_eliminate() || progress; } while (progress); + remove_dead_constants(); + schedule_instructions(); assign_curb_setup(); @@ -1702,7 +1785,7 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c, if (intel->gen >= 5 && c->prog_data.nr_pull_params == 0) { c->dispatch_width = 16; fs_visitor v2(c, prog, shader); - v2.import_uniforms(v.variable_ht); + v2.import_uniforms(&v); v2.run(); } diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 89d6cda7e4f..96e1420038f 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -421,7 +421,7 @@ public: fs_reg *variable_storage(ir_variable *var); int virtual_grf_alloc(int size); - void import_uniforms(struct hash_table *src_variable_ht); + void import_uniforms(fs_visitor *v); void visit(ir_variable *ir); void visit(ir_assignment *ir); @@ -489,6 +489,7 @@ public: bool register_coalesce(); bool compute_to_mrf(); bool dead_code_eliminate(); + bool remove_dead_constants(); bool remove_duplicate_mrf_writes(); bool virtual_grf_interferes(int a, int b); void schedule_instructions(); @@ -566,6 +567,13 @@ public: int *virtual_grf_use; bool live_intervals_valid; + /* This is the map from UNIFORM hw_reg + reg_offset as generated by + * the visitor to the packed uniform number after + * remove_dead_constants() that represents the actual uploaded + * uniform index. + */ + int *params_remap; + struct hash_table *variable_ht; ir_variable *frag_color, *frag_data, *frag_depth; int first_non_payload_grf; -- cgit v1.2.3 From b76378d46a211521582cfab56dc05031a57502a6 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 4 May 2011 13:50:13 -0700 Subject: i965/fs: Eliminate the magic nature of virtual GRF 0. This was a debugging aid at one point -- virtual grf 0 should never be allocated, and it would be used if undefined register access occurred in codegen. However, it made the confusing register allocation code even more confusing by indexing things off of 1 all over. --- src/mesa/drivers/dri/i965/brw_fs.cpp | 9 +++---- src/mesa/drivers/dri/i965/brw_fs.h | 2 +- src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 33 +++++++++-------------- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 4 +-- 4 files changed, 17 insertions(+), 31 deletions(-) (limited to 'src/mesa/drivers/dri/i965/brw_fs.h') diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index f55be022f72..d57a67cc4fc 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -181,9 +181,6 @@ fs_visitor::virtual_grf_alloc(int size) virtual_grf_array_size *= 2; virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int, virtual_grf_array_size); - - /* This slot is always unused. */ - virtual_grf_sizes[0] = 0; } virtual_grf_sizes[virtual_grf_next] = size; return virtual_grf_next++; @@ -985,7 +982,7 @@ fs_visitor::calculate_live_intervals() } } else { for (unsigned int i = 0; i < 3; i++) { - if (inst->src[i].file == GRF && inst->src[i].reg != 0) { + if (inst->src[i].file == GRF) { int reg = inst->src[i].reg; if (!loop_depth) { @@ -1001,7 +998,7 @@ fs_visitor::calculate_live_intervals() } } } - if (inst->dst.file == GRF && inst->dst.reg != 0) { + if (inst->dst.file == GRF) { int reg = inst->dst.reg; if (!loop_depth) { @@ -1715,7 +1712,7 @@ fs_visitor::run() if (0) { /* Debug of register spilling: Go spill everything. */ int virtual_grf_count = virtual_grf_next; - for (int i = 1; i < virtual_grf_count; i++) { + for (int i = 0; i < virtual_grf_count; i++) { spill_reg(i); } } diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 96e1420038f..0375f672bec 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -402,7 +402,7 @@ public: this->base_ir = NULL; this->virtual_grf_sizes = NULL; - this->virtual_grf_next = 1; + this->virtual_grf_next = 0; this->virtual_grf_array_size = 0; this->virtual_grf_def = NULL; this->virtual_grf_use = NULL; diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index f246ac49660..83dd629aafb 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -50,7 +50,7 @@ extern "C" { static void assign_reg(int *reg_hw_locations, fs_reg *reg, int reg_width) { - if (reg->file == GRF && reg->reg != 0) { + if (reg->file == GRF) { assert(reg->reg_offset >= 0); reg->hw_reg = reg_hw_locations[reg->reg] + reg->reg_offset * reg_width; reg->reg = 0; @@ -60,20 +60,17 @@ assign_reg(int *reg_hw_locations, fs_reg *reg, int reg_width) void fs_visitor::assign_regs_trivial() { - int last_grf = 0; - int hw_reg_mapping[this->virtual_grf_next]; + int hw_reg_mapping[this->virtual_grf_next + 1]; int i; int reg_width = c->dispatch_width / 8; - hw_reg_mapping[0] = 0; /* Note that compressed instructions require alignment to 2 registers. */ - hw_reg_mapping[1] = ALIGN(this->first_non_payload_grf, reg_width); - for (i = 2; i < this->virtual_grf_next; i++) { + hw_reg_mapping[0] = ALIGN(this->first_non_payload_grf, reg_width); + for (i = 1; i <= this->virtual_grf_next; i++) { hw_reg_mapping[i] = (hw_reg_mapping[i - 1] + this->virtual_grf_sizes[i - 1] * reg_width); } - last_grf = hw_reg_mapping[i - 1] + (this->virtual_grf_sizes[i - 1] * - reg_width); + this->grf_used = hw_reg_mapping[this->virtual_grf_next]; foreach_list(node, &this->instructions) { fs_inst *inst = (fs_inst *)node; @@ -83,12 +80,11 @@ fs_visitor::assign_regs_trivial() assign_reg(hw_reg_mapping, &inst->src[1], reg_width); } - if (last_grf >= BRW_MAX_GRF) { + if (this->grf_used >= BRW_MAX_GRF) { fail("Ran out of regs on trivial allocator (%d/%d)\n", - last_grf, BRW_MAX_GRF); + this->grf_used, BRW_MAX_GRF); } - this->grf_used = last_grf + reg_width; } bool @@ -101,7 +97,7 @@ fs_visitor::assign_regs() * for reg_width == 2. */ int reg_width = c->dispatch_width / 8; - int hw_reg_mapping[this->virtual_grf_next + 1]; + int hw_reg_mapping[this->virtual_grf_next]; int first_assigned_grf = ALIGN(this->first_non_payload_grf, reg_width); int base_reg_count = (BRW_MAX_GRF - first_assigned_grf) / reg_width; int class_sizes[base_reg_count]; @@ -125,7 +121,7 @@ fs_visitor::assign_regs() */ class_sizes[class_count++] = 2; } - for (int r = 1; r < this->virtual_grf_next; r++) { + for (int r = 0; r < this->virtual_grf_next; r++) { int i; for (i = 0; i < class_count; i++) { @@ -195,12 +191,8 @@ fs_visitor::assign_regs() struct ra_graph *g = ra_alloc_interference_graph(regs, this->virtual_grf_next); - /* Node 0 is just a placeholder to keep virtual_grf[] mapping 1:1 - * with nodes. - */ - ra_set_node_class(g, 0, classes[0]); - for (int i = 1; i < this->virtual_grf_next; i++) { + for (int i = 0; i < this->virtual_grf_next; i++) { for (int c = 0; c < class_count; c++) { if (class_sizes[c] == this->virtual_grf_sizes[i]) { if (aligned_pair_class >= 0 && @@ -213,7 +205,7 @@ fs_visitor::assign_regs() } } - for (int j = 1; j < i; j++) { + for (int j = 0; j < i; j++) { if (virtual_grf_interferes(i, j)) { ra_add_node_interference(g, i, j); } @@ -248,8 +240,7 @@ fs_visitor::assign_regs() * numbers. */ this->grf_used = first_assigned_grf; - hw_reg_mapping[0] = 0; /* unused */ - for (int i = 1; i < this->virtual_grf_next; i++) { + for (int i = 0; i < this->virtual_grf_next; i++) { int reg = ra_get_node_reg(g, i); int hw_reg = -1; diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 2b769ccbba1..2e3f9be75b4 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -142,9 +142,7 @@ fs_visitor::visit(ir_dereference_array *ir) this->result.type = brw_type_for_base_type(ir->type); if (index) { - assert(this->result.file == UNIFORM || - (this->result.file == GRF && - this->result.reg != 0)); + assert(this->result.file == UNIFORM || this->result.file == GRF); this->result.reg_offset += index->value.i[0] * element_size; } else { assert(!"FINISHME: non-constant array element"); -- cgit v1.2.3 From c9e81fe14f36933617c862efb15ae09194485eab Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Sun, 15 May 2011 09:36:19 -0700 Subject: i965: Drop the reg/hw_reg distinction. "reg" was set in only one case, virtual GRFs pre register allocation, and would be unset and have hw_reg set after allocation. Since we never bothered with looking at virtual GRF number after allocation anyway, just use the same storage and avoid confusion. --- src/mesa/drivers/dri/i965/brw_fs.cpp | 34 +++++++++++----------- src/mesa/drivers/dri/i965/brw_fs.h | 21 +++++++------ src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 6 ++-- src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 4 +-- .../dri/i965/brw_fs_schedule_instructions.cpp | 8 ++--- 5 files changed, 37 insertions(+), 36 deletions(-) (limited to 'src/mesa/drivers/dri/i965/brw_fs.h') diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index d57a67cc4fc..cafb7092ac8 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -187,20 +187,20 @@ fs_visitor::virtual_grf_alloc(int size) } /** Fixed HW reg constructor. */ -fs_reg::fs_reg(enum register_file file, int hw_reg) +fs_reg::fs_reg(enum register_file file, int reg) { init(); this->file = file; - this->hw_reg = hw_reg; + this->reg = reg; this->type = BRW_REGISTER_TYPE_F; } /** Fixed HW reg constructor. */ -fs_reg::fs_reg(enum register_file file, int hw_reg, uint32_t type) +fs_reg::fs_reg(enum register_file file, int reg, uint32_t type) { init(); this->file = file; - this->hw_reg = hw_reg; + this->reg = reg; this->type = type; } @@ -636,7 +636,7 @@ fs_visitor::assign_curb_setup() for (unsigned int i = 0; i < 3; i++) { if (inst->src[i].file == UNIFORM) { - int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset; + int constant_nr = inst->src[i].reg + inst->src[i].reg_offset; struct brw_reg brw_reg = brw_vec1_grf(c->nr_payload_regs + constant_nr / 8, constant_nr % 8); @@ -810,7 +810,7 @@ fs_visitor::remove_dead_constants() fs_inst *inst = (fs_inst *)node; for (int i = 0; i < 3; i++) { - int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset; + int constant_nr = inst->src[i].reg + inst->src[i].reg_offset; if (inst->src[i].file != UNIFORM) continue; @@ -862,13 +862,13 @@ fs_visitor::remove_dead_constants() fs_inst *inst = (fs_inst *)node; for (int i = 0; i < 3; i++) { - int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset; + int constant_nr = inst->src[i].reg + inst->src[i].reg_offset; if (inst->src[i].file != UNIFORM) continue; assert(this->params_remap[constant_nr] != -1); - inst->src[i].hw_reg = this->params_remap[constant_nr]; + inst->src[i].reg = this->params_remap[constant_nr]; inst->src[i].reg_offset = 0; } } @@ -912,7 +912,7 @@ fs_visitor::setup_pull_constants() if (inst->src[i].file != UNIFORM) continue; - int uniform_nr = inst->src[i].hw_reg + inst->src[i].reg_offset; + int uniform_nr = inst->src[i].reg + inst->src[i].reg_offset; if (uniform_nr < pull_uniform_base) continue; @@ -1374,9 +1374,9 @@ fs_visitor::compute_to_mrf() /* Work out which hardware MRF registers are written by this * instruction. */ - int mrf_low = inst->dst.hw_reg & ~BRW_MRF_COMPR4; + int mrf_low = inst->dst.reg & ~BRW_MRF_COMPR4; int mrf_high; - if (inst->dst.hw_reg & BRW_MRF_COMPR4) { + if (inst->dst.reg & BRW_MRF_COMPR4) { mrf_high = mrf_low + 4; } else if (c->dispatch_width == 16 && (!inst->force_uncompressed && !inst->force_sechalf)) { @@ -1443,7 +1443,7 @@ fs_visitor::compute_to_mrf() if (scan_inst->dst.reg_offset == inst->src[0].reg_offset) { /* Found the creator of our MRF's source value. */ scan_inst->dst.file = MRF; - scan_inst->dst.hw_reg = inst->dst.hw_reg; + scan_inst->dst.reg = inst->dst.reg; scan_inst->saturate |= inst->saturate; inst->remove(); progress = true; @@ -1480,10 +1480,10 @@ fs_visitor::compute_to_mrf() /* If somebody else writes our MRF here, we can't * compute-to-MRF before that. */ - int scan_mrf_low = scan_inst->dst.hw_reg & ~BRW_MRF_COMPR4; + int scan_mrf_low = scan_inst->dst.reg & ~BRW_MRF_COMPR4; int scan_mrf_high; - if (scan_inst->dst.hw_reg & BRW_MRF_COMPR4) { + if (scan_inst->dst.reg & BRW_MRF_COMPR4) { scan_mrf_high = scan_mrf_low + 4; } else if (c->dispatch_width == 16 && (!scan_inst->force_uncompressed && @@ -1555,7 +1555,7 @@ fs_visitor::remove_duplicate_mrf_writes() if (inst->opcode == BRW_OPCODE_MOV && inst->dst.file == MRF) { - fs_inst *prev_inst = last_mrf_move[inst->dst.hw_reg]; + fs_inst *prev_inst = last_mrf_move[inst->dst.reg]; if (prev_inst && inst->equals(prev_inst)) { inst->remove(); progress = true; @@ -1565,7 +1565,7 @@ fs_visitor::remove_duplicate_mrf_writes() /* Clear out the last-write records for MRFs that were overwritten. */ if (inst->dst.file == MRF) { - last_mrf_move[inst->dst.hw_reg] = NULL; + last_mrf_move[inst->dst.reg] = NULL; } if (inst->mlen > 0) { @@ -1591,7 +1591,7 @@ fs_visitor::remove_duplicate_mrf_writes() inst->dst.file == MRF && inst->src[0].file == GRF && !inst->predicated) { - last_mrf_move[inst->dst.hw_reg] = inst; + last_mrf_move[inst->dst.reg] = inst; } } diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 0375f672bec..4ec649014de 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -51,7 +51,7 @@ enum register_file { MRF = BRW_MESSAGE_REGISTER_FILE, IMM = BRW_IMMEDIATE_VALUE, FIXED_HW_REG, /* a struct brw_reg */ - UNIFORM, /* prog_data->params[hw_reg] */ + UNIFORM, /* prog_data->params[reg] */ BAD_FILE }; @@ -99,7 +99,6 @@ public: void init() { memset(this, 0, sizeof(*this)); - this->hw_reg = -1; this->smear = -1; } @@ -146,8 +145,8 @@ public: this->type = fixed_hw_reg.type; } - fs_reg(enum register_file file, int hw_reg); - fs_reg(enum register_file file, int hw_reg, uint32_t type); + fs_reg(enum register_file file, int reg); + fs_reg(enum register_file file, int reg, uint32_t type); fs_reg(class fs_visitor *v, const struct glsl_type *type); bool equals(fs_reg *r) @@ -155,7 +154,6 @@ public: return (file == r->file && reg == r->reg && reg_offset == r->reg_offset && - hw_reg == r->hw_reg && type == r->type && negate == r->negate && abs == r->abs && @@ -167,12 +165,17 @@ public: /** Register file: ARF, GRF, MRF, IMM. */ enum register_file file; - /** virtual register number. 0 = fixed hw reg */ + /** + * Register number. For ARF/MRF, it's the hardware register. For + * GRF, it's a virtual register number until register allocation + */ int reg; - /** Offset within the virtual register. */ + /** + * For virtual registers, this is a hardware register offset from + * the start of the register block (for example, a constant index + * in an array access). + */ int reg_offset; - /** HW register number. Generally unset until register allocation. */ - int hw_reg; /** Register type. BRW_REGISTER_TYPE_* */ int type; bool negate; diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp index 9fb0153d1f8..e168e541bef 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp @@ -538,11 +538,9 @@ brw_reg_from_fs_reg(fs_reg *reg) case ARF: case MRF: if (reg->smear == -1) { - brw_reg = brw_vec8_reg(reg->file, - reg->hw_reg, 0); + brw_reg = brw_vec8_reg(reg->file, reg->reg, 0); } else { - brw_reg = brw_vec1_reg(reg->file, - reg->hw_reg, reg->smear); + brw_reg = brw_vec1_reg(reg->file, reg->reg, reg->smear); } brw_reg = retype(brw_reg, reg->type); if (reg->sechalf) diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index 8e44a010576..5c9cba99ae5 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -52,8 +52,8 @@ assign_reg(int *reg_hw_locations, fs_reg *reg, int reg_width) { if (reg->file == GRF) { assert(reg->reg_offset >= 0); - reg->hw_reg = reg_hw_locations[reg->reg] + reg->reg_offset * reg_width; - reg->reg = 0; + reg->reg = reg_hw_locations[reg->reg] + reg->reg_offset * reg_width; + reg->reg_offset = 0; } } diff --git a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp index 9ec3f502764..f1a88fcfa79 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp @@ -321,12 +321,12 @@ instruction_scheduler::calculate_deps() add_dep(last_grf_write[inst->dst.reg], n); last_grf_write[inst->dst.reg] = n; } else if (inst->dst.file == MRF) { - int reg = inst->dst.hw_reg & ~BRW_MRF_COMPR4; + int reg = inst->dst.reg & ~BRW_MRF_COMPR4; add_dep(last_mrf_write[reg], n); last_mrf_write[reg] = n; if (is_compressed(inst)) { - if (inst->dst.hw_reg & BRW_MRF_COMPR4) + if (inst->dst.reg & BRW_MRF_COMPR4) reg += 4; else reg++; @@ -401,12 +401,12 @@ instruction_scheduler::calculate_deps() if (inst->dst.file == GRF) { last_grf_write[inst->dst.reg] = n; } else if (inst->dst.file == MRF) { - int reg = inst->dst.hw_reg & ~BRW_MRF_COMPR4; + int reg = inst->dst.reg & ~BRW_MRF_COMPR4; last_mrf_write[reg] = n; if (is_compressed(inst)) { - if (inst->dst.hw_reg & BRW_MRF_COMPR4) + if (inst->dst.reg & BRW_MRF_COMPR4) reg += 4; else reg++; -- cgit v1.2.3 From 6034b9a5124475d300d0678bd2fb6160865fa972 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 3 May 2011 10:55:50 -0700 Subject: i965: Create a shared enum for hardware and compiler-internal opcodes. This should make gdbing more pleasant, and it might be used in sharing part of the codegen between the VS and FS backends. --- src/mesa/drivers/dri/i965/brw_defines.h | 134 +++++++++++++-------- src/mesa/drivers/dri/i965/brw_fs.cpp | 11 +- src/mesa/drivers/dri/i965/brw_fs.h | 56 +++------ src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 6 + src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 20 +-- .../dri/i965/brw_fs_schedule_instructions.cpp | 15 --- src/mesa/drivers/dri/i965/brw_shader.h | 4 + 7 files changed, 120 insertions(+), 126 deletions(-) (limited to 'src/mesa/drivers/dri/i965/brw_fs.h') diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 0a3027d04ad..fe5d29c4328 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -557,58 +557,88 @@ #define BRW_WE_ALL 1 /** @} */ -#define BRW_OPCODE_MOV 1 -#define BRW_OPCODE_SEL 2 -#define BRW_OPCODE_NOT 4 -#define BRW_OPCODE_AND 5 -#define BRW_OPCODE_OR 6 -#define BRW_OPCODE_XOR 7 -#define BRW_OPCODE_SHR 8 -#define BRW_OPCODE_SHL 9 -#define BRW_OPCODE_RSR 10 -#define BRW_OPCODE_RSL 11 -#define BRW_OPCODE_ASR 12 -#define BRW_OPCODE_CMP 16 -#define BRW_OPCODE_CMPN 17 -#define BRW_OPCODE_JMPI 32 -#define BRW_OPCODE_IF 34 -#define BRW_OPCODE_IFF 35 -#define BRW_OPCODE_ELSE 36 -#define BRW_OPCODE_ENDIF 37 -#define BRW_OPCODE_DO 38 -#define BRW_OPCODE_WHILE 39 -#define BRW_OPCODE_BREAK 40 -#define BRW_OPCODE_CONTINUE 41 -#define BRW_OPCODE_HALT 42 -#define BRW_OPCODE_MSAVE 44 -#define BRW_OPCODE_MRESTORE 45 -#define BRW_OPCODE_PUSH 46 -#define BRW_OPCODE_POP 47 -#define BRW_OPCODE_WAIT 48 -#define BRW_OPCODE_SEND 49 -#define BRW_OPCODE_SENDC 50 -#define BRW_OPCODE_MATH 56 -#define BRW_OPCODE_ADD 64 -#define BRW_OPCODE_MUL 65 -#define BRW_OPCODE_AVG 66 -#define BRW_OPCODE_FRC 67 -#define BRW_OPCODE_RNDU 68 -#define BRW_OPCODE_RNDD 69 -#define BRW_OPCODE_RNDE 70 -#define BRW_OPCODE_RNDZ 71 -#define BRW_OPCODE_MAC 72 -#define BRW_OPCODE_MACH 73 -#define BRW_OPCODE_LZD 74 -#define BRW_OPCODE_SAD2 80 -#define BRW_OPCODE_SADA2 81 -#define BRW_OPCODE_DP4 84 -#define BRW_OPCODE_DPH 85 -#define BRW_OPCODE_DP3 86 -#define BRW_OPCODE_DP2 87 -#define BRW_OPCODE_DPA2 88 -#define BRW_OPCODE_LINE 89 -#define BRW_OPCODE_PLN 90 -#define BRW_OPCODE_NOP 126 +enum opcode { + /* These are the actual hardware opcodes. */ + BRW_OPCODE_MOV = 1, + BRW_OPCODE_SEL = 2, + BRW_OPCODE_NOT = 4, + BRW_OPCODE_AND = 5, + BRW_OPCODE_OR = 6, + BRW_OPCODE_XOR = 7, + BRW_OPCODE_SHR = 8, + BRW_OPCODE_SHL = 9, + BRW_OPCODE_RSR = 10, + BRW_OPCODE_RSL = 11, + BRW_OPCODE_ASR = 12, + BRW_OPCODE_CMP = 16, + BRW_OPCODE_CMPN = 17, + BRW_OPCODE_JMPI = 32, + BRW_OPCODE_IF = 34, + BRW_OPCODE_IFF = 35, + BRW_OPCODE_ELSE = 36, + BRW_OPCODE_ENDIF = 37, + BRW_OPCODE_DO = 38, + BRW_OPCODE_WHILE = 39, + BRW_OPCODE_BREAK = 40, + BRW_OPCODE_CONTINUE = 41, + BRW_OPCODE_HALT = 42, + BRW_OPCODE_MSAVE = 44, + BRW_OPCODE_MRESTORE = 45, + BRW_OPCODE_PUSH = 46, + BRW_OPCODE_POP = 47, + BRW_OPCODE_WAIT = 48, + BRW_OPCODE_SEND = 49, + BRW_OPCODE_SENDC = 50, + BRW_OPCODE_MATH = 56, + BRW_OPCODE_ADD = 64, + BRW_OPCODE_MUL = 65, + BRW_OPCODE_AVG = 66, + BRW_OPCODE_FRC = 67, + BRW_OPCODE_RNDU = 68, + BRW_OPCODE_RNDD = 69, + BRW_OPCODE_RNDE = 70, + BRW_OPCODE_RNDZ = 71, + BRW_OPCODE_MAC = 72, + BRW_OPCODE_MACH = 73, + BRW_OPCODE_LZD = 74, + BRW_OPCODE_SAD2 = 80, + BRW_OPCODE_SADA2 = 81, + BRW_OPCODE_DP4 = 84, + BRW_OPCODE_DPH = 85, + BRW_OPCODE_DP3 = 86, + BRW_OPCODE_DP2 = 87, + BRW_OPCODE_DPA2 = 88, + BRW_OPCODE_LINE = 89, + BRW_OPCODE_PLN = 90, + BRW_OPCODE_NOP = 126, + + /* These are compiler backend opcodes that get translated into other + * instructions. + */ + FS_OPCODE_FB_WRITE = 128, + FS_OPCODE_RCP, + FS_OPCODE_RSQ, + FS_OPCODE_SQRT, + FS_OPCODE_EXP2, + FS_OPCODE_LOG2, + FS_OPCODE_POW, + FS_OPCODE_SIN, + FS_OPCODE_COS, + FS_OPCODE_DDX, + FS_OPCODE_DDY, + FS_OPCODE_PIXEL_X, + FS_OPCODE_PIXEL_Y, + FS_OPCODE_CINTERP, + FS_OPCODE_LINTERP, + FS_OPCODE_TEX, + FS_OPCODE_TXB, + FS_OPCODE_TXD, + FS_OPCODE_TXL, + FS_OPCODE_DISCARD, + FS_OPCODE_SPILL, + FS_OPCODE_UNSPILL, + FS_OPCODE_PULL_CONSTANT_LOAD, +}; #define BRW_PREDICATE_NONE 0 #define BRW_PREDICATE_NORMAL 1 diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index cafb7092ac8..a0d75cc6f96 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -522,7 +522,7 @@ fs_visitor::emit_frontfacing_interpolation(ir_variable *ir) } fs_inst * -fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src) +fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src) { switch (opcode) { case FS_OPCODE_RCP: @@ -565,7 +565,7 @@ fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src) } fs_inst * -fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src0, fs_reg src1) +fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1) { int base_mrf = 2; fs_inst *inst; @@ -1149,6 +1149,9 @@ fs_visitor::propagate_constants() progress = true; } break; + + default: + break; } } @@ -1200,6 +1203,8 @@ fs_visitor::opt_algebraic() break; } + break; + default: break; } } @@ -1267,6 +1272,8 @@ fs_visitor::register_coalesce() case BRW_OPCODE_ENDIF: if_depth--; break; + default: + break; } if (loop_depth || if_depth) continue; diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 4ec649014de..d207ac27aa2 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -25,6 +25,8 @@ * */ +#include "brw_shader.h" + extern "C" { #include @@ -55,33 +57,6 @@ enum register_file { BAD_FILE }; -enum fs_opcodes { - FS_OPCODE_FB_WRITE = 256, - FS_OPCODE_RCP, - FS_OPCODE_RSQ, - FS_OPCODE_SQRT, - FS_OPCODE_EXP2, - FS_OPCODE_LOG2, - FS_OPCODE_POW, - FS_OPCODE_SIN, - FS_OPCODE_COS, - FS_OPCODE_DDX, - FS_OPCODE_DDY, - FS_OPCODE_PIXEL_X, - FS_OPCODE_PIXEL_Y, - FS_OPCODE_CINTERP, - FS_OPCODE_LINTERP, - FS_OPCODE_TEX, - FS_OPCODE_TXB, - FS_OPCODE_TXD, - FS_OPCODE_TXL, - FS_OPCODE_DISCARD, - FS_OPCODE_SPILL, - FS_OPCODE_UNSPILL, - FS_OPCODE_PULL_CONSTANT_LOAD, -}; - - class fs_reg { public: /* Callers of this ralloc-based new need not call delete. It's @@ -227,13 +202,13 @@ public: init(); } - fs_inst(int opcode) + fs_inst(enum opcode opcode) { init(); this->opcode = opcode; } - fs_inst(int opcode, fs_reg dst) + fs_inst(enum opcode opcode, fs_reg dst) { init(); this->opcode = opcode; @@ -243,7 +218,7 @@ public: assert(dst.reg_offset >= 0); } - fs_inst(int opcode, fs_reg dst, fs_reg src0) + fs_inst(enum opcode opcode, fs_reg dst, fs_reg src0) { init(); this->opcode = opcode; @@ -256,7 +231,7 @@ public: assert(src[0].reg_offset >= 0); } - fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1) + fs_inst(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1) { init(); this->opcode = opcode; @@ -272,7 +247,7 @@ public: assert(src[1].reg_offset >= 0); } - fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2) + fs_inst(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2) { init(); this->opcode = opcode; @@ -331,7 +306,7 @@ public: opcode == FS_OPCODE_POW); } - int opcode; /* BRW_OPCODE_* or FS_OPCODE_* */ + enum opcode opcode; /* BRW_OPCODE_* or FS_OPCODE_* */ fs_reg dst; fs_reg src[3]; bool saturate; @@ -448,27 +423,28 @@ public: fs_inst *emit(fs_inst inst); - fs_inst *emit(int opcode) + fs_inst *emit(enum opcode opcode) { return emit(fs_inst(opcode)); } - fs_inst *emit(int opcode, fs_reg dst) + fs_inst *emit(enum opcode opcode, fs_reg dst) { return emit(fs_inst(opcode, dst)); } - fs_inst *emit(int opcode, fs_reg dst, fs_reg src0) + fs_inst *emit(enum opcode opcode, fs_reg dst, fs_reg src0) { return emit(fs_inst(opcode, dst, src0)); } - fs_inst *emit(int opcode, fs_reg dst, fs_reg src0, fs_reg src1) + fs_inst *emit(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1) { return emit(fs_inst(opcode, dst, src0, src1)); } - fs_inst *emit(int opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2) + fs_inst *emit(enum opcode opcode, fs_reg dst, + fs_reg src0, fs_reg src1, fs_reg src2) { return emit(fs_inst(opcode, dst, src0, src1, src2)); } @@ -529,8 +505,8 @@ public: int sampler); fs_inst *emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, int sampler); - fs_inst *emit_math(fs_opcodes op, fs_reg dst, fs_reg src0); - fs_inst *emit_math(fs_opcodes op, fs_reg dst, fs_reg src0, fs_reg src1); + fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0); + fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0, fs_reg src1); bool try_emit_saturate(ir_expression *ir); void emit_bool_to_cond_code(ir_rvalue *condition); void emit_if_gen6(ir_if *ir); diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp index e168e541bef..529df0880f0 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp @@ -277,6 +277,9 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src) /* There is no sample_d_c message; comparisons are done manually */ msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS; break; + default: + assert(!"not reached"); + break; } } else { switch (inst->opcode) { @@ -317,6 +320,9 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src) assert(inst->mlen == 7 || inst->mlen == 10); msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS; break; + default: + assert(!"not reached"); + break; } } assert(msg_type != -1); diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index 5c9cba99ae5..7c5414ac26c 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -25,23 +25,6 @@ * */ -extern "C" { - -#include - -#include "main/macros.h" -#include "main/shaderobj.h" -#include "main/uniforms.h" -#include "program/prog_parameter.h" -#include "program/prog_print.h" -#include "program/prog_optimize.h" -#include "program/register_allocate.h" -#include "program/sampler.h" -#include "program/hash_table.h" -#include "brw_context.h" -#include "brw_eu.h" -#include "brw_wm.h" -} #include "brw_fs.h" #include "../glsl/glsl_types.h" #include "../glsl/ir_optimization.h" @@ -359,6 +342,9 @@ fs_visitor::choose_spill_reg(struct ra_graph *g) if (inst->dst.file == GRF) no_spill[inst->dst.reg] = true; break; + + default: + break; } } diff --git a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp index f1a88fcfa79..965a5b333a2 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp @@ -25,21 +25,6 @@ * */ -extern "C" { - -#include - -#include "main/macros.h" -#include "main/shaderobj.h" -#include "main/uniforms.h" -#include "program/prog_optimize.h" -#include "program/register_allocate.h" -#include "program/sampler.h" -#include "program/hash_table.h" -#include "brw_context.h" -#include "brw_eu.h" -#include "brw_wm.h" -} #include "brw_fs.h" #include "../glsl/glsl_types.h" #include "../glsl/ir_optimization.h" diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h index 4c568a26caa..21671d1c8d6 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.h +++ b/src/mesa/drivers/dri/i965/brw_shader.h @@ -21,5 +21,9 @@ * IN THE SOFTWARE. */ +#include + +#pragma once + int brw_type_for_base_type(const struct glsl_type *type); uint32_t brw_conditional_for_comparison(unsigned int op); -- cgit v1.2.3 From 65b5cbbcf783f6c668ab5b31a0734680dd396794 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 5 Aug 2011 12:38:58 -0700 Subject: i965: Rename math FS_OPCODE_* to SHADER_OPCODE_*. I want to just use the same enums in the VS. --- src/mesa/drivers/dri/i965/brw_defines.h | 16 +++++----- src/mesa/drivers/dri/i965/brw_fs.cpp | 34 +++++++++++----------- src/mesa/drivers/dri/i965/brw_fs.h | 16 +++++----- src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 34 +++++++++++----------- .../dri/i965/brw_fs_schedule_instructions.cpp | 16 +++++----- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 20 ++++++------- 6 files changed, 68 insertions(+), 68 deletions(-) (limited to 'src/mesa/drivers/dri/i965/brw_fs.h') diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index fe5d29c4328..da8d016da42 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -616,14 +616,14 @@ enum opcode { * instructions. */ FS_OPCODE_FB_WRITE = 128, - FS_OPCODE_RCP, - FS_OPCODE_RSQ, - FS_OPCODE_SQRT, - FS_OPCODE_EXP2, - FS_OPCODE_LOG2, - FS_OPCODE_POW, - FS_OPCODE_SIN, - FS_OPCODE_COS, + SHADER_OPCODE_RCP, + SHADER_OPCODE_RSQ, + SHADER_OPCODE_SQRT, + SHADER_OPCODE_EXP2, + SHADER_OPCODE_LOG2, + SHADER_OPCODE_POW, + SHADER_OPCODE_SIN, + SHADER_OPCODE_COS, FS_OPCODE_DDX, FS_OPCODE_DDY, FS_OPCODE_PIXEL_X, diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index a0d75cc6f96..693ef0ce31a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -143,15 +143,15 @@ fs_visitor::implied_mrf_writes(fs_inst *inst) return 0; switch (inst->opcode) { - case FS_OPCODE_RCP: - case FS_OPCODE_RSQ: - case FS_OPCODE_SQRT: - case FS_OPCODE_EXP2: - case FS_OPCODE_LOG2: - case FS_OPCODE_SIN: - case FS_OPCODE_COS: + case SHADER_OPCODE_RCP: + case SHADER_OPCODE_RSQ: + case SHADER_OPCODE_SQRT: + case SHADER_OPCODE_EXP2: + case SHADER_OPCODE_LOG2: + case SHADER_OPCODE_SIN: + case SHADER_OPCODE_COS: return 1 * c->dispatch_width / 8; - case FS_OPCODE_POW: + case SHADER_OPCODE_POW: return 2 * c->dispatch_width / 8; case FS_OPCODE_TEX: case FS_OPCODE_TXB: @@ -525,13 +525,13 @@ fs_inst * fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src) { switch (opcode) { - case FS_OPCODE_RCP: - case FS_OPCODE_RSQ: - case FS_OPCODE_SQRT: - case FS_OPCODE_EXP2: - case FS_OPCODE_LOG2: - case FS_OPCODE_SIN: - case FS_OPCODE_COS: + case SHADER_OPCODE_RCP: + case SHADER_OPCODE_RSQ: + case SHADER_OPCODE_SQRT: + case SHADER_OPCODE_EXP2: + case SHADER_OPCODE_LOG2: + case SHADER_OPCODE_SIN: + case SHADER_OPCODE_COS: break; default: assert(!"not reached: bad math opcode"); @@ -570,7 +570,7 @@ fs_visitor::emit_math(enum opcode opcode, fs_reg dst, fs_reg src0, fs_reg src1) int base_mrf = 2; fs_inst *inst; - assert(opcode == FS_OPCODE_POW); + assert(opcode == SHADER_OPCODE_POW); if (intel->gen >= 6) { /* Can't do hstride == 0 args to gen6 math, so expand it out. @@ -1135,7 +1135,7 @@ fs_visitor::propagate_constants() } break; - case FS_OPCODE_RCP: + case SHADER_OPCODE_RCP: /* The hardware doesn't do math on immediate values * (because why are you doing that, seriously?), but * the correct answer is to just constant fold it diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index d207ac27aa2..94af0e1af16 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -296,14 +296,14 @@ public: bool is_math() { - return (opcode == FS_OPCODE_RCP || - opcode == FS_OPCODE_RSQ || - opcode == FS_OPCODE_SQRT || - opcode == FS_OPCODE_EXP2 || - opcode == FS_OPCODE_LOG2 || - opcode == FS_OPCODE_SIN || - opcode == FS_OPCODE_COS || - opcode == FS_OPCODE_POW); + return (opcode == SHADER_OPCODE_RCP || + opcode == SHADER_OPCODE_RSQ || + opcode == SHADER_OPCODE_SQRT || + opcode == SHADER_OPCODE_EXP2 || + opcode == SHADER_OPCODE_LOG2 || + opcode == SHADER_OPCODE_SIN || + opcode == SHADER_OPCODE_COS || + opcode == SHADER_OPCODE_POW); } enum opcode opcode; /* BRW_OPCODE_* or FS_OPCODE_* */ diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp index 529df0880f0..285ba46bd46 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp @@ -149,28 +149,28 @@ fs_visitor::generate_math(fs_inst *inst, int op; switch (inst->opcode) { - case FS_OPCODE_RCP: + case SHADER_OPCODE_RCP: op = BRW_MATH_FUNCTION_INV; break; - case FS_OPCODE_RSQ: + case SHADER_OPCODE_RSQ: op = BRW_MATH_FUNCTION_RSQ; break; - case FS_OPCODE_SQRT: + case SHADER_OPCODE_SQRT: op = BRW_MATH_FUNCTION_SQRT; break; - case FS_OPCODE_EXP2: + case SHADER_OPCODE_EXP2: op = BRW_MATH_FUNCTION_EXP; break; - case FS_OPCODE_LOG2: + case SHADER_OPCODE_LOG2: op = BRW_MATH_FUNCTION_LOG; break; - case FS_OPCODE_POW: + case SHADER_OPCODE_POW: op = BRW_MATH_FUNCTION_POW; break; - case FS_OPCODE_SIN: + case SHADER_OPCODE_SIN: op = BRW_MATH_FUNCTION_SIN; break; - case FS_OPCODE_COS: + case SHADER_OPCODE_COS: op = BRW_MATH_FUNCTION_COS; break; default: @@ -182,7 +182,7 @@ fs_visitor::generate_math(fs_inst *inst, if (intel->gen >= 6) { assert(inst->mlen == 0); - if (inst->opcode == FS_OPCODE_POW) { + if (inst->opcode == SHADER_OPCODE_POW) { brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_math2(p, dst, op, src[0], src[1]); @@ -775,14 +775,14 @@ fs_visitor::generate_code() } break; - case FS_OPCODE_RCP: - case FS_OPCODE_RSQ: - case FS_OPCODE_SQRT: - case FS_OPCODE_EXP2: - case FS_OPCODE_LOG2: - case FS_OPCODE_POW: - case FS_OPCODE_SIN: - case FS_OPCODE_COS: + case SHADER_OPCODE_RCP: + case SHADER_OPCODE_RSQ: + case SHADER_OPCODE_SQRT: + case SHADER_OPCODE_EXP2: + case SHADER_OPCODE_LOG2: + case SHADER_OPCODE_POW: + case SHADER_OPCODE_SIN: + case SHADER_OPCODE_COS: generate_math(inst, dst, src); break; case FS_OPCODE_PIXEL_X: diff --git a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp index 965a5b333a2..0ea4e5c36f0 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp @@ -69,26 +69,26 @@ public: int math_latency = 22; switch (inst->opcode) { - case FS_OPCODE_RCP: + case SHADER_OPCODE_RCP: this->latency = 1 * chans * math_latency; break; - case FS_OPCODE_RSQ: + case SHADER_OPCODE_RSQ: this->latency = 2 * chans * math_latency; break; - case FS_OPCODE_SQRT: - case FS_OPCODE_LOG2: + case SHADER_OPCODE_SQRT: + case SHADER_OPCODE_LOG2: /* full precision log. partial is 2. */ this->latency = 3 * chans * math_latency; break; - case FS_OPCODE_EXP2: + case SHADER_OPCODE_EXP2: /* full precision. partial is 3, same throughput. */ this->latency = 4 * chans * math_latency; break; - case FS_OPCODE_POW: + case SHADER_OPCODE_POW: this->latency = 8 * chans * math_latency; break; - case FS_OPCODE_SIN: - case FS_OPCODE_COS: + case SHADER_OPCODE_SIN: + case SHADER_OPCODE_COS: /* minimum latency, max is 12 rounds. */ this->latency = 5 * chans * math_latency; break; diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 2e3f9be75b4..8b4f5bbac15 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -250,14 +250,14 @@ fs_visitor::visit(ir_expression *ir) break; case ir_unop_rcp: - emit_math(FS_OPCODE_RCP, this->result, op[0]); + emit_math(SHADER_OPCODE_RCP, this->result, op[0]); break; case ir_unop_exp2: - emit_math(FS_OPCODE_EXP2, this->result, op[0]); + emit_math(SHADER_OPCODE_EXP2, this->result, op[0]); break; case ir_unop_log2: - emit_math(FS_OPCODE_LOG2, this->result, op[0]); + emit_math(SHADER_OPCODE_LOG2, this->result, op[0]); break; case ir_unop_exp: case ir_unop_log: @@ -265,11 +265,11 @@ fs_visitor::visit(ir_expression *ir) break; case ir_unop_sin: case ir_unop_sin_reduced: - emit_math(FS_OPCODE_SIN, this->result, op[0]); + emit_math(SHADER_OPCODE_SIN, this->result, op[0]); break; case ir_unop_cos: case ir_unop_cos_reduced: - emit_math(FS_OPCODE_COS, this->result, op[0]); + emit_math(SHADER_OPCODE_COS, this->result, op[0]); break; case ir_unop_dFdx: @@ -340,11 +340,11 @@ fs_visitor::visit(ir_expression *ir) break; case ir_unop_sqrt: - emit_math(FS_OPCODE_SQRT, this->result, op[0]); + emit_math(SHADER_OPCODE_SQRT, this->result, op[0]); break; case ir_unop_rsq: - emit_math(FS_OPCODE_RSQ, this->result, op[0]); + emit_math(SHADER_OPCODE_RSQ, this->result, op[0]); break; case ir_unop_i2u: @@ -423,7 +423,7 @@ fs_visitor::visit(ir_expression *ir) break; case ir_binop_pow: - emit_math(FS_OPCODE_POW, this->result, op[0], op[1]); + emit_math(SHADER_OPCODE_POW, this->result, op[0], op[1]); break; case ir_unop_bit_not: @@ -1694,7 +1694,7 @@ fs_visitor::emit_interpolation_setup_gen4() interp_reg(FRAG_ATTRIB_WPOS, 3)); /* Compute the pixel 1/W value from wpos.w. */ this->pixel_w = fs_reg(this, glsl_type::float_type); - emit_math(FS_OPCODE_RCP, this->pixel_w, wpos_w); + emit_math(SHADER_OPCODE_RCP, this->pixel_w, wpos_w); this->current_annotation = NULL; } @@ -1731,7 +1731,7 @@ fs_visitor::emit_interpolation_setup_gen6() this->current_annotation = "compute pos.w"; this->pixel_w = fs_reg(brw_vec8_grf(c->source_w_reg, 0)); this->wpos_w = fs_reg(this, glsl_type::float_type); - emit_math(FS_OPCODE_RCP, this->wpos_w, this->pixel_w); + emit_math(SHADER_OPCODE_RCP, this->wpos_w, this->pixel_w); this->delta_x = fs_reg(brw_vec8_grf(2, 0)); this->delta_y = fs_reg(brw_vec8_grf(3, 0)); -- cgit v1.2.3 From ecf8963754489abfb5097c130a9bcd4cdb76b6bd Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Sun, 19 Jun 2011 01:47:50 -0700 Subject: i965/fs: Implement textureSize (TXS) on Gen5+. Signed-off-by: Kenneth Graunke Reviewed-by: Ian Romanick --- src/mesa/drivers/dri/i965/brw_defines.h | 2 ++ src/mesa/drivers/dri/i965/brw_fs.cpp | 1 + src/mesa/drivers/dri/i965/brw_fs.h | 3 ++- src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 4 ++++ src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 28 +++++++++++++++++++++------- src/mesa/program/ir_to_mesa.cpp | 7 +++++-- 6 files changed, 35 insertions(+), 10 deletions(-) (limited to 'src/mesa/drivers/dri/i965/brw_fs.h') diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index b740d87c933..69e0026ee6b 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -634,6 +634,7 @@ enum opcode { FS_OPCODE_TXB, FS_OPCODE_TXD, FS_OPCODE_TXL, + FS_OPCODE_TXS, FS_OPCODE_DISCARD, FS_OPCODE_SPILL, FS_OPCODE_UNSPILL, @@ -781,6 +782,7 @@ enum opcode { #define GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS 4 #define GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE 5 #define GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE 6 +#define GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO 10 /* for GEN5 only */ #define BRW_SAMPLER_SIMD_MODE_SIMD4X2 0 diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index c8f74252654..0b0445ea142 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -157,6 +157,7 @@ fs_visitor::implied_mrf_writes(fs_inst *inst) case FS_OPCODE_TXB: case FS_OPCODE_TXD: case FS_OPCODE_TXL: + case FS_OPCODE_TXS: return 1; case FS_OPCODE_FB_WRITE: return 2; diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 94af0e1af16..10f45f30fe9 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -291,7 +291,8 @@ public: return (opcode == FS_OPCODE_TEX || opcode == FS_OPCODE_TXB || opcode == FS_OPCODE_TXD || - opcode == FS_OPCODE_TXL); + opcode == FS_OPCODE_TXL || + opcode == FS_OPCODE_TXS); } bool is_math() diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp index 482d250c333..5c057e9a00b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp @@ -242,6 +242,9 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src) msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD; } break; + case FS_OPCODE_TXS: + msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO; + break; case FS_OPCODE_TXD: /* There is no sample_d_c message; comparisons are done manually */ msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS; @@ -775,6 +778,7 @@ fs_visitor::generate_code() case FS_OPCODE_TXB: case FS_OPCODE_TXD: case FS_OPCODE_TXL: + case FS_OPCODE_TXS: generate_tex(inst, dst, src[0]); break; case FS_OPCODE_DISCARD: diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 792799d9063..3551e3dfe81 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -751,6 +751,8 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, int base_mrf = 2; int reg_width = c->dispatch_width / 8; bool header_present = false; + const int vector_elements = + ir->coordinate ? ir->coordinate->type->vector_elements : 0; if (ir->offset) { /* The offsets set up by the ir_texture visitor are in the @@ -761,7 +763,7 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, base_mrf--; } - for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { + for (int i = 0; i < vector_elements; i++) { fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i * reg_width), coordinate); @@ -769,7 +771,7 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, inst->saturate = true; coordinate.reg_offset++; } - mlen += ir->coordinate->type->vector_elements * reg_width; + mlen += vector_elements * reg_width; if (ir->shadow_comparitor && ir->op != ir_txd) { mlen = MAX2(mlen, header_present + 4 * reg_width); @@ -837,8 +839,14 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, inst = emit(FS_OPCODE_TXD, dst); break; } - case ir_txf: case ir_txs: + this->result = reg_undef; + ir->lod_info.lod->accept(this); + emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), this->result); + mlen += reg_width; + inst = emit(FS_OPCODE_TXS, dst); + break; + case ir_txf: assert(!"GLSL 1.30 features unsupported"); break; } @@ -927,14 +935,19 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, } break; } - case ir_txf: case ir_txs: + this->result = reg_undef; + ir->lod_info.lod->accept(this); + emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), this->result); + mlen += reg_width; + break; + case ir_txf: assert(!"GLSL 1.30 features unsupported"); break; } /* Set up the coordinate (except for TXD where it was done earlier) */ - if (ir->op != ir_txd) { + if (ir->op != ir_txd && ir->op != ir_txs) { for (int i = 0; i < ir->coordinate->type->vector_elements; i++) { fs_inst *inst = emit(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), coordinate); @@ -953,7 +966,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, case ir_txl: inst = emit(FS_OPCODE_TXL, dst); break; case ir_txd: inst = emit(FS_OPCODE_TXD, dst); break; case ir_txf: assert(!"TXF unsupported."); break; - case ir_txs: assert(!"TXS unsupported."); break; + case ir_txs: inst = emit(FS_OPCODE_TXS, dst); break; } inst->base_mrf = base_mrf; inst->mlen = mlen; @@ -988,7 +1001,8 @@ fs_visitor::visit(ir_texture *ir) } this->result = reg_undef; - ir->coordinate->accept(this); + if (ir->coordinate) + ir->coordinate->accept(this); fs_reg coordinate = this->result; if (ir->offset != NULL) { diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index b222005d1a9..e7609df19ee 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -2104,7 +2104,10 @@ ir_to_mesa_visitor::visit(ir_texture *ir) ir_to_mesa_instruction *inst = NULL; prog_opcode opcode = OPCODE_NOP; - ir->coordinate->accept(this); + if (ir->op == ir_txs) + this->result = src_reg_for_float(0.0); + else + ir->coordinate->accept(this); /* Put our coords in a temp. We'll need to modify them for shadow, * projection, or LOD, so the only case we'd use it as is is if @@ -2128,6 +2131,7 @@ ir_to_mesa_visitor::visit(ir_texture *ir) switch (ir->op) { case ir_tex: + case ir_txs: opcode = OPCODE_TEX; break; case ir_txb: @@ -2148,7 +2152,6 @@ ir_to_mesa_visitor::visit(ir_texture *ir) dy = this->result; break; case ir_txf: - case ir_txs: assert(!"GLSL 1.30 features unsupported"); break; } -- cgit v1.2.3