diff options
author | Eric Anholt <[email protected]> | 2015-08-18 22:38:34 -0700 |
---|---|---|
committer | Eric Anholt <[email protected]> | 2015-10-23 18:11:21 +0100 |
commit | 5b2fb138bc5ff68af27d8435cbc01f83a09ee078 (patch) | |
tree | 0a3076f2f146896b097b387b99c39d853242adcd /src/glsl/nir/nir_opcodes.py | |
parent | 1066a372d8a260aef29ffb6226e8691c07ec696a (diff) |
nir: Add opcodes for saturated vector math.
This corresponds to instructions used on vc4 for its blending inside of
shaders. I've seen these opcodes on other architectures before, but I
think it's the first time these are needed in Mesa.
v2: Rename to 'u' instead of 'i', since they're all 'u'norm (from review
by jekstrand)
Diffstat (limited to 'src/glsl/nir/nir_opcodes.py')
-rw-r--r-- | src/glsl/nir/nir_opcodes.py | 45 |
1 files changed, 45 insertions, 0 deletions
diff --git a/src/glsl/nir/nir_opcodes.py b/src/glsl/nir/nir_opcodes.py index f2d584fe484..3c0f1da94af 100644 --- a/src/glsl/nir/nir_opcodes.py +++ b/src/glsl/nir/nir_opcodes.py @@ -468,6 +468,51 @@ binop("fmax", tfloat, "", "fmaxf(src0, src1)") binop("imax", tint, commutative + associative, "src1 > src0 ? src1 : src0") binop("umax", tunsigned, commutative + associative, "src1 > src0 ? src1 : src0") +# Saturated vector add for 4 8bit ints. +binop("usadd_4x8", tint, commutative + associative, """ +dst = 0; +for (int i = 0; i < 32; i += 8) { + dst |= MIN2(((src0 >> i) & 0xff) + ((src1 >> i) & 0xff), 0xff) << i; +} +""") + +# Saturated vector subtract for 4 8bit ints. +binop("ussub_4x8", tint, "", """ +dst = 0; +for (int i = 0; i < 32; i += 8) { + int src0_chan = (src0 >> i) & 0xff; + int src1_chan = (src1 >> i) & 0xff; + if (src0_chan > src1_chan) + dst |= (src0_chan - src1_chan) << i; +} +""") + +# vector min for 4 8bit ints. +binop("umin_4x8", tint, commutative + associative, """ +dst = 0; +for (int i = 0; i < 32; i += 8) { + dst |= MIN2((src0 >> i) & 0xff, (src1 >> i) & 0xff) << i; +} +""") + +# vector max for 4 8bit ints. +binop("umax_4x8", tint, commutative + associative, """ +dst = 0; +for (int i = 0; i < 32; i += 8) { + dst |= MAX2((src0 >> i) & 0xff, (src1 >> i) & 0xff) << i; +} +""") + +# unorm multiply: (a * b) / 255. +binop("umul_unorm_4x8", tint, commutative + associative, """ +dst = 0; +for (int i = 0; i < 32; i += 8) { + int src0_chan = (src0 >> i) & 0xff; + int src1_chan = (src1 >> i) & 0xff; + dst |= ((src0_chan * src1_chan) / 255) << i; +} +""") + binop("fpow", tfloat, "", "powf(src0, src1)") binop_horiz("pack_half_2x16_split", 1, tunsigned, 1, tfloat, 1, tfloat, |