diff options
author | Iago Toral Quiroga <[email protected]> | 2016-05-31 10:17:37 +0200 |
---|---|---|
committer | Samuel Iglesias Gonsálvez <[email protected]> | 2017-01-03 11:26:50 +0100 |
commit | 6979e5a41241993b9e7bedea80f29fb43d96aa47 (patch) | |
tree | 14ca00eb1310909754e50c21f04f4385443ad062 | |
parent | 9b6174dffa4c085a0b7f66db6c46b831c5c91f0b (diff) |
i965/vec4: add VEC4_OPCODE_PICK_{LOW,HIGH}_32BIT opcodes
These opcodes will pick the low/high 32-bit in each 64-bit data element
using Align1 mode. We will use this, for example, to do things like
unpackDouble2x32.
We use Align1 mode because in order to implement this in Align16 mode
we would need to use 32-bit logical swizzles (XZ for low, YW for high),
but the IR works in terms of 64-bit logical swizzles for DF operands
all the way up to codegen.
v2:
- use suboffset() instead of get_element_ud()
- no need to set the width on the dst
Reviewed-by: Ian Romanick <[email protected]>
Reviewed-by: Matt Turner <[email protected]>
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_defines.h | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_shader.cpp | 4 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4.cpp | 4 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 25 |
4 files changed, 35 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 91d9d5225b9..6c981cc5518 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -1100,6 +1100,8 @@ enum opcode { VEC4_OPCODE_UNPACK_UNIFORM, VEC4_OPCODE_DOUBLE_TO_FLOAT, VEC4_OPCODE_FLOAT_TO_DOUBLE, + VEC4_OPCODE_PICK_LOW_32BIT, + VEC4_OPCODE_PICK_HIGH_32BIT, FS_OPCODE_DDX_COARSE, FS_OPCODE_DDX_FINE, diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index ec321ae1910..bea65050211 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -326,6 +326,10 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op) return "double_to_float"; case VEC4_OPCODE_FLOAT_TO_DOUBLE: return "float_to_double"; + case VEC4_OPCODE_PICK_LOW_32BIT: + return "pick_low_32bit"; + case VEC4_OPCODE_PICK_HIGH_32BIT: + return "pick_high_32bit"; case FS_OPCODE_DDX_COARSE: return "ddx_coarse"; diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 4286a6c78aa..d70b8db244e 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -255,6 +255,8 @@ vec4_instruction::can_do_writemask(const struct gen_device_info *devinfo) case SHADER_OPCODE_GEN4_SCRATCH_READ: case VEC4_OPCODE_DOUBLE_TO_FLOAT: case VEC4_OPCODE_FLOAT_TO_DOUBLE: + case VEC4_OPCODE_PICK_LOW_32BIT: + case VEC4_OPCODE_PICK_HIGH_32BIT: case VS_OPCODE_PULL_CONSTANT_LOAD: case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7: case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9: @@ -510,6 +512,8 @@ vec4_visitor::opt_reduce_swizzle() case VEC4_OPCODE_FLOAT_TO_DOUBLE: case VEC4_OPCODE_DOUBLE_TO_FLOAT: + case VEC4_OPCODE_PICK_LOW_32BIT: + case VEC4_OPCODE_PICK_HIGH_32BIT: swizzle = brw_swizzle_for_size(4); break; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index 7864be1f733..0a962e05d1e 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -1953,6 +1953,31 @@ generate_code(struct brw_codegen *p, break; } + case VEC4_OPCODE_PICK_LOW_32BIT: + case VEC4_OPCODE_PICK_HIGH_32BIT: { + /* Stores the low/high 32-bit of each 64-bit element in src[0] into + * dst using ALIGN1 mode and a <8,4,2>:UD region on the source. + */ + assert(type_sz(src[0].type) == 8); + assert(type_sz(dst.type) == 4); + + brw_set_default_access_mode(p, BRW_ALIGN_1); + + dst = retype(dst, BRW_REGISTER_TYPE_UD); + dst.hstride = BRW_HORIZONTAL_STRIDE_1; + + src[0] = retype(src[0], BRW_REGISTER_TYPE_UD); + if (inst->opcode == VEC4_OPCODE_PICK_HIGH_32BIT) + src[0] = suboffset(src[0], 1); + src[0].vstride = BRW_VERTICAL_STRIDE_8; + src[0].width = BRW_WIDTH_4; + src[0].hstride = BRW_HORIZONTAL_STRIDE_2; + brw_MOV(p, dst, src[0]); + + brw_set_default_access_mode(p, BRW_ALIGN_16); + break; + } + case VEC4_OPCODE_PACK_BYTES: { /* Is effectively: * |