diff options
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4.cpp | 1 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 67 |
2 files changed, 68 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 87b5ff98f8c..e2aa1097347 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -251,6 +251,7 @@ vec4_instruction::can_do_writemask(const struct brw_device_info *devinfo) case TES_OPCODE_CREATE_INPUT_READ_HEADER: case TES_OPCODE_ADD_INDIRECT_URB_OFFSET: case VEC4_OPCODE_URB_READ: + case SHADER_OPCODE_MOV_INDIRECT: return false; default: /* The MATH instruction on Gen6 only executes in align1 mode, which does diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index 8409e820f09..4b44c3a63a7 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -1399,6 +1399,69 @@ generate_set_simd4x2_header_gen9(struct brw_codegen *p, } static void +generate_mov_indirect(struct brw_codegen *p, + vec4_instruction *inst, + struct brw_reg dst, struct brw_reg reg, + struct brw_reg indirect, struct brw_reg length) +{ + assert(indirect.type == BRW_REGISTER_TYPE_UD); + assert(p->devinfo->gen >= 6); + + unsigned imm_byte_offset = reg.nr * REG_SIZE + reg.subnr * (REG_SIZE / 2); + + /* This instruction acts in align1 mode */ + assert(dst.writemask == WRITEMASK_XYZW); + + if (indirect.file == BRW_IMMEDIATE_VALUE) { + imm_byte_offset += indirect.ud; + + reg.nr = imm_byte_offset / REG_SIZE; + reg.subnr = (imm_byte_offset / (REG_SIZE / 2)) % 2; + unsigned shift = (imm_byte_offset / 4) % 4; + reg.swizzle += BRW_SWIZZLE4(shift, shift, shift, shift); + + brw_MOV(p, dst, reg); + } else { + brw_push_insn_state(p); + brw_set_default_access_mode(p, BRW_ALIGN_1); + brw_set_default_mask_control(p, BRW_MASK_DISABLE); + + struct brw_reg addr = vec8(brw_address_reg(0)); + + /* We need to move the indirect value into the address register. In + * order to make things make some sense, we want to respect at least the + * X component of the swizzle. In order to do that, we need to convert + * the subnr (probably 0) to an align1 subnr and add in the swizzle. + */ + assert(brw_is_single_value_swizzle(indirect.swizzle)); + indirect.subnr = (indirect.subnr * 4 + BRW_GET_SWZ(indirect.swizzle, 0)); + + /* We then use a region of <8,4,0>:uw to pick off the first 2 bytes of + * the indirect and splat it out to all four channels of the given half + * of a0. + */ + indirect.subnr *= 2; + indirect = stride(retype(indirect, BRW_REGISTER_TYPE_UW), 8, 4, 0); + brw_ADD(p, addr, indirect, brw_imm_uw(imm_byte_offset)); + + /* Now we need to incorporate the swizzle from the source register */ + if (reg.swizzle != BRW_SWIZZLE_XXXX) { + uint32_t uv_swiz = BRW_GET_SWZ(reg.swizzle, 0) << 2 | + BRW_GET_SWZ(reg.swizzle, 1) << 6 | + BRW_GET_SWZ(reg.swizzle, 2) << 10 | + BRW_GET_SWZ(reg.swizzle, 3) << 14; + uv_swiz |= uv_swiz << 16; + + brw_ADD(p, addr, addr, brw_imm_uv(uv_swiz)); + } + + brw_MOV(p, dst, retype(brw_VxH_indirect(0, 0), reg.type)); + + brw_pop_insn_state(p); + } +} + +static void generate_code(struct brw_codegen *p, const struct brw_compiler *compiler, void *log_data, @@ -1946,6 +2009,10 @@ generate_code(struct brw_codegen *p, brw_WAIT(p); break; + case SHADER_OPCODE_MOV_INDIRECT: + generate_mov_indirect(p, inst, dst, src[0], src[1], src[2]); + break; + default: unreachable("Unsupported opcode"); } |