diff options
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.cpp | 15 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.h | 22 |
2 files changed, 36 insertions, 1 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 56659f3af51..3277b586ede 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -2988,6 +2988,21 @@ fs_visitor::lower_load_payload() if (inst->src[i].file == BAD_FILE) { /* Do nothing but otherwise increment as normal */ + } else if (dst.file == MRF && + dst.width == 8 && + brw->has_compr4 && + i + 4 < inst->sources && + inst->src[i + 4].equals(horiz_offset(inst->src[i], 8))) { + fs_reg compr4_dst = dst; + compr4_dst.reg += BRW_MRF_COMPR4; + compr4_dst.width = 16; + fs_reg compr4_src = inst->src[i]; + compr4_src.width = 16; + fs_inst *mov = MOV(compr4_dst, compr4_src); + mov->force_writemask_all = true; + inst->insert_before(block, mov); + /* Mark i+4 as BAD_FILE so we don't emit a MOV for it */ + inst->src[i + 4].file = BAD_FILE; } else { fs_inst *mov = MOV(dst, inst->src[i]); if (inst->src[i].file == GRF) { diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index d4d4aa13856..77d76807f37 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -144,6 +144,26 @@ byte_offset(fs_reg reg, unsigned delta) } static inline fs_reg +horiz_offset(fs_reg reg, unsigned delta) +{ + switch (reg.file) { + case BAD_FILE: + case UNIFORM: + case IMM: + /* These only have a single component that is implicitly splatted. A + * horizontal offset should be a harmless no-op. + */ + break; + case GRF: + case MRF: + return byte_offset(reg, delta * reg.stride * type_sz(reg.type)); + default: + assert(delta == 0); + } + return reg; +} + +static inline fs_reg offset(fs_reg reg, unsigned delta) { assert(reg.stride > 0); @@ -184,7 +204,7 @@ half(fs_reg reg, unsigned idx) assert(idx == 0 || (reg.file != HW_REG && reg.file != IMM)); assert(reg.width == 16); reg.width = 8; - return byte_offset(reg, 8 * idx * reg.stride * type_sz(reg.type)); + return horiz_offset(reg, 8 * idx); } static const fs_reg reg_undef; |