aboutsummaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
diff options
context:
space:
mode:
authorJason Ekstrand <[email protected]>2015-03-24 17:00:04 -0700
committerJason Ekstrand <[email protected]>2015-05-06 10:29:30 -0700
commit41868bb6824c6106a55c8442006c1e2215abf567 (patch)
tree6156ad5d3db817836a5aab2bb2b6ed6364c8d8a1 /src/mesa/drivers/dri/i965/brw_fs_cse.cpp
parent94ee908448405c8271e8662914a1c49df8d623b2 (diff)
i965/fs: Rework the fs_visitor LOAD_PAYLOAD instruction
The newly reworked instruction is far more straightforward than the original. Before, the LOAD_PAYLOAD instruction was lowered by a the complicated and broken-by-design pile of heuristics to try and guess force_writemask_all, exec_size, and a number of other factors on the sources. Instead, we use the header_size on the instruction to denote which sources are "header sources". Header sources are required to be a single physical hardware register that is copied verbatim. The registers that follow are considered the actual payload registers and have a width that correspond's to the LOAD_PAYLOAD's exec_size and are treated as being per-channel. This gives us a fairly straightforward lowering: 1) All header sources are copied directly using force_writemask_all and, since they are guaranteed to be a single register, there are no force_sechalf issues. 2) All non-header sources are copied using the exact same force_sechalf and force_writemask_all modifiers as the LOAD_PAYLOAD operation itself. 3) In order to accommodate older gens that need interleaved colors, lower_load_payload detects when the destination is a COMPR4 register and automatically interleaves the non-header sources. The lower_load_payload pass does the right thing here regardless of whether or not the hardware actually supports COMPR4. This patch commit itself is made up of a bunch of smaller changes squashed together. Individual change descriptions follow: i965/fs: Rework fs_visitor::LOAD_PAYLOAD We rework LOAD_PAYLOAD to verify that all of the sources that count as headers are, indeed, exactly one register and that all of the non-header sources match the destination width. We then take the exec_size for LOAD_PAYLOAD directly from the destination width. i965/fs: Make destinations of load_payload have the appropreate width i965/fs: Rework fs_visitor::lower_load_payload v2: Don't allow the saturate flag on LOAD_PAYLOAD instructions i965/fs_cse: Support the new-style LOAD_PAYLOAD i965/fs_inst::is_copy_payload: Support the new-style LOAD_PAYLOAD i965/fs: Simplify setup_color_payload Previously, setup_color_payload was a a big helper function that did a lot of gen-specific special casing for setting up the color sources of the LOAD_PAYLOAD instruction. Now that lower_load_payload is much more sane, most of that complexity isn't needed anymore. Instead, we can do a simple fixup pass for color clamps and then just stash sources directly in the LOAD_PAYLOAD. We can trust lower_load_payload to do the right thing with respect to COMPR4. Reviewed-by: Kenneth Graunke <[email protected]>
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_fs_cse.cpp')
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_cse.cpp31
1 files changed, 24 insertions, 7 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
index a582e6a8e4b..db01f8cf7ab 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
@@ -174,17 +174,34 @@ create_copy_instr(fs_visitor *v, fs_inst *inst, fs_reg src, bool negate)
{
int written = inst->regs_written;
int dst_width = inst->dst.width / 8;
- fs_reg dst = inst->dst;
fs_inst *copy;
if (written > dst_width) {
- fs_reg *sources = ralloc_array(v->mem_ctx, fs_reg, written / dst_width);
- for (int i = 0; i < written / dst_width; i++)
- sources[i] = offset(src, i);
- copy = v->LOAD_PAYLOAD(dst, sources, written / dst_width,
- inst->header_size);
+ fs_reg *payload;
+ int sources, header_size;
+ if (inst->opcode == SHADER_OPCODE_LOAD_PAYLOAD) {
+ sources = inst->sources;
+ header_size = inst->header_size;
+ } else {
+ assert(written % dst_width == 0);
+ sources = written / dst_width;
+ header_size = 0;
+ }
+
+ assert(src.file == GRF);
+ payload = ralloc_array(v->mem_ctx, fs_reg, sources);
+ for (int i = 0; i < header_size; i++) {
+ payload[i] = src;
+ payload[i].width = 8;
+ src.reg_offset++;
+ }
+ for (int i = header_size; i < sources; i++) {
+ payload[i] = src;
+ src = offset(src, 1);
+ }
+ copy = v->LOAD_PAYLOAD(inst->dst, payload, sources, header_size);
} else {
- copy = v->MOV(dst, src);
+ copy = v->MOV(inst->dst, src);
copy->force_writemask_all = inst->force_writemask_all;
copy->src[0].negate = negate;
}