TGSI Instruction Specification ============================== ============================== 1 Instruction Set Operations ============================= 1.1 GL_NV_vertex_program ------------------------- 1.1.1 ARL - Address Register Load dst.x = floor(src.x) dst.y = floor(src.y) dst.z = floor(src.z) dst.w = floor(src.w) 1.1.2 MOV - Move dst.x = src.x dst.y = src.y dst.z = src.z dst.w = src.w 1.1.3 LIT - Light Coefficients dst.x = 1.0 dst.y = max(src.x, 0.0) dst.z = (src.x > 0.0) ? pow(max(src.y, 0.0), clamp(src.w, -128.0, 128.0)) : 0.0 dst.w = 1.0 1.1.4 RCP - Reciprocal dst.x = 1.0 / src.x dst.y = 1.0 / src.x dst.z = 1.0 / src.x dst.w = 1.0 / src.x 1.1.5 RSQ - Reciprocal Square Root dst.x = 1.0 / sqrt(abs(src.x)) dst.y = 1.0 / sqrt(abs(src.x)) dst.z = 1.0 / sqrt(abs(src.x)) dst.w = 1.0 / sqrt(abs(src.x)) 1.1.6 EXP - Approximate Exponential Base 2 dst.x = pow(2.0, floor(src.x)) dst.y = src.x - floor(src.x) dst.z = pow(2.0, src.x) dst.w = 1.0 1.1.7 LOG - Approximate Logarithm Base 2 dst.x = floor(lg2(abs(src.x))) dst.y = abs(src.x) / pow(2.0, floor(lg2(abs(src.x)))) dst.z = lg2(abs(src.x)) dst.w = 1.0 1.1.8 MUL - Multiply dst.x = src0.x * src1.x dst.y = src0.y * src1.y dst.z = src0.z * src1.z dst.w = src0.w * src1.w 1.1.9 ADD - Add dst.x = src0.x + src1.x dst.y = src0.y + src1.y dst.z = src0.z + src1.z dst.w = src0.w + src1.w 1.1.10 DP3 - 3-component Dot Product dst.x = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z dst.y = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z dst.z = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z dst.w = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z 1.1.11 DP4 - 4-component Dot Product dst.x = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src0.w * src1.w dst.y = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src0.w * src1.w dst.z = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src0.w * src1.w dst.w = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src0.w * src1.w 1.1.12 DST - Distance Vector dst.x = 1.0 dst.y = src0.y * src1.y dst.z = src0.z dst.w = src1.w 1.1.13 MIN - Minimum dst.x = min(src0.x, src1.x) dst.y = min(src0.y, src1.y) dst.z = min(src0.z, src1.z) dst.w = min(src0.w, src1.w) 1.1.14 MAX - Maximum dst.x = max(src0.x, src1.x) dst.y = max(src0.y, src1.y) dst.z = max(src0.z, src1.z) dst.w = max(src0.w, src1.w) 1.1.15 SLT - Set On Less Than dst.x = (src0.x < src1.x) ? 1.0 : 0.0 dst.y = (src0.y < src1.y) ? 1.0 : 0.0 dst.z = (src0.z < src1.z) ? 1.0 : 0.0 dst.w = (src0.w < src1.w) ? 1.0 : 0.0 1.1.16 SGE - Set On Greater Equal Than dst.x = (src0.x >= src1.x) ? 1.0 : 0.0 dst.y = (src0.y >= src1.y) ? 1.0 : 0.0 dst.z = (src0.z >= src1.z) ? 1.0 : 0.0 dst.w = (src0.w >= src1.w) ? 1.0 : 0.0 1.1.17 MAD - Multiply And Add dst.x = src0.x * src1.x + src2.x dst.y = src0.y * src1.y + src2.y dst.z = src0.z * src1.z + src2.z dst.w = src0.w * src1.w + src2.w 1.2 GL_ATI_fragment_shader --------------------------- 1.2.1 SUB - Subtract dst.x = src0.x - src1.x dst.y = src0.y - src1.y dst.z = src0.z - src1.z dst.w = src0.w - src1.w 1.2.2 DOT3 - 3-component Dot Product Alias for DP3. 1.2.3 DOT4 - 4-component Dot Product Alias for DP4. 1.2.4 LERP - Linear Interpolate dst.x = src0.x * (src1.x - src2.x) + src2.x dst.y = src0.y * (src1.y - src2.y) + src2.y dst.z = src0.z * (src1.z - src2.z) + src2.z dst.w = src0.w * (src1.w - src2.w) + src2.w 1.2.5 CND - Condition dst.x = (src2.x > 0.5) ? src0.x : src1.x dst.y = (src2.y > 0.5) ? src0.y : src1.y dst.z = (src2.z > 0.5) ? src0.z : src1.z dst.w = (src2.w > 0.5) ? src0.w : src1.w 1.2.6 CND0 - Condition Zero dst.x = (src2.x >= 0.0) ? src0.x : src1.x dst.y = (src2.y >= 0.0) ? src0.y : src1.y dst.z = (src2.z >= 0.0) ? src0.z : src1.z dst.w = (src2.w >= 0.0) ? src0.w : src1.w 1.2.7 DOT2ADD - 2-component Dot Product And Add dst.x = src0.x * src1.x + src0.y * src1.y + src2.x dst.y = src0.x * src1.x + src0.y * src1.y + src2.x dst.z = src0.x * src1.x + src0.y * src1.y + src2.x dst.w = src0.x * src1.x + src0.y * src1.y + src2.x 1.3 GL_EXT_vertex_shader ------------------------- 1.3.1 INDEX - Array Lookup Considered for removal from language. 1.3.2 NEGATE - Negate Considered for removal from language. 1.3.3 MADD - Multiply And Add Alias for MAD. 1.3.4 FRAC - Fraction dst.x = src.x - floor(src.x) dst.y = src.y - floor(src.y) dst.z = src.z - floor(src.z) dst.w = src.w - floor(src.w) 1.3.5 SETGE - Set On Greater Equal Alias for SGE. 1.3.6 SETLT - Set On Less Than Alias for SLT. 1.3.7 CLAMP - Clamp dst.x = clamp(src0.x, src1.x, src2.x) dst.y = clamp(src0.y, src1.y, src2.y) dst.z = clamp(src0.z, src1.z, src2.z) dst.w = clamp(src0.w, src1.w, src2.w) 1.3.8 FLOOR - Floor dst.x = floor(src.x) dst.y = floor(src.y) dst.z = floor(src.z) dst.w = floor(src.w) 1.3.9 ROUND - Round dst.x = round(src.x) dst.y = round(src.y) dst.z = round(src.z) dst.w = round(src.w) 1.3.10 EXPBASE2 - Exponent Base 2 dst.x = pow(2.0, src.x) dst.y = pow(2.0, src.x) dst.z = pow(2.0, src.x) dst.w = pow(2.0, src.x) 1.3.11 LOGBASE2 - Logarithm Base 2 dst.x = lg2(src.x) dst.y = lg2(src.x) dst.z = lg2(src.x) dst.w = lg2(src.x) 1.3.12 POWER - Power dst.x = pow(src0.x, src1.x) dst.y = pow(src0.x, src1.x) dst.z = pow(src0.x, src1.x) dst.w = pow(src0.x, src1.x) 1.3.13 RECIP - Reciprocal Alias for RCP. 1.3.14 RECIPSQRT - Reciprocal Square Root Alias for RSQ. 1.3.15 CROSSPRODUCT - Cross Product dst.x = src0.y * src1.z - src1.y * src0.z dst.y = src0.z * src1.x - src1.z * src0.x dst.z = src0.x * src1.y - src1.x * src0.y dst.w = 1.0 1.3.16 MULTIPLYMATRIX - Multiply Matrix Considered for removal from language. 1.4 GL_NV_vertex_program1_1 ---------------------------- 1.4.1 ABS - Absolute dst.x = abs(src.x) dst.y = abs(src.y) dst.z = abs(src.z) dst.w = abs(src.w) 1.4.2 RCC - Reciprocal Clamped dst.x = (1.0 / src.x) > 0.0 ? clamp(1.0 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1.0 / src.x, -1.884467e+019, -5.42101e-020) dst.y = (1.0 / src.x) > 0.0 ? clamp(1.0 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1.0 / src.x, -1.884467e+019, -5.42101e-020) dst.z = (1.0 / src.x) > 0.0 ? clamp(1.0 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1.0 / src.x, -1.884467e+019, -5.42101e-020) dst.w = (1.0 / src.x) > 0.0 ? clamp(1.0 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1.0 / src.x, -1.884467e+019, -5.42101e-020) 1.4.3 DPH - Homogeneous Dot Product dst.x = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w dst.y = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w dst.z = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w dst.w = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w 1.5 GL_NV_fragment_program --------------------------- 1.5.1 COS - Cosine dst.x = cos(src.x) dst.y = cos(src.x) dst.z = cos(src.x) dst.w = cos(src.w) 1.5.2 DDX - Derivative Relative To X dst.x = partialx(src.x) dst.y = partialx(src.y) dst.z = partialx(src.z) dst.w = partialx(src.w) 1.5.3 DDY - Derivative Relative To Y dst.x = partialy(src.x) dst.y = partialy(src.y) dst.z = partialy(src.z) dst.w = partialy(src.w) 1.5.4 EX2 - Exponential Base 2 Alias for EXPBASE2. 1.5.5 FLR - Floor Alias for FLOOR. 1.5.6 FRC - Fraction Alias for FRAC. 1.5.7 KILP - Predicated Discard TBD 1.5.8 LG2 - Logarithm Base 2 Alias for LOGBASE2. 1.5.9 LRP - Linear Interpolate Alias for LERP. 1.5.10 PK2H - Pack Two 16-bit Floats TBD 1.5.11 PK2US - Pack Two Unsigned 16-bit Scalars TBD 1.5.12 PK4B - Pack Four Signed 8-bit Scalars TBD 1.5.13 PK4UB - Pack Four Unsigned 8-bit Scalars TBD 1.5.14 POW - Power Alias for POWER. 1.5.15 RFL - Reflection Vector dst.x = 2.0 * (src0.x * src1.x + src0.y * src1.y + src0.z * src1.z) / (src0.x * src0.x + src0.y * src0.y + src0.z * src0.z) * src0.x - src1.x dst.y = 2.0 * (src0.x * src1.x + src0.y * src1.y + src0.z * src1.z) / (src0.x * src0.x + src0.y * src0.y + src0.z * src0.z) * src0.y - src1.y dst.z = 2.0 * (src0.x * src1.x + src0.y * src1.y + src0.z * src1.z) / (src0.x * src0.x + src0.y * src0.y + src0.z * src0.z) * src0.z - src1.z dst.w = 1.0 1.5.16 SEQ - Set On Equal dst.x = (src0.x == src1.x) ? 1.0 : 0.0 dst.y = (src0.y == src1.y) ? 1.0 : 0.0 dst.z = (src0.z == src1.z) ? 1.0 : 0.0 dst.w = (src0.w == src1.w) ? 1.0 : 0.0 1.5.17 SFL - Set On False dst.x = 0.0 dst.y = 0.0 dst.z = 0.0 dst.w = 0.0 1.5.18 SGT - Set On Greater Than dst.x = (src0.x > src1.x) ? 1.0 : 0.0 dst.y = (src0.y > src1.y) ? 1.0 : 0.0 dst.z = (src0.z > src1.z) ? 1.0 : 0.0 dst.w = (src0.w > src1.w) ? 1.0 : 0.0 1.5.19 SIN - Sine dst.x = sin(src.x) dst.y = sin(src.x) dst.z = sin(src.x) dst.w = sin(src.w) 1.5.20 SLE - Set On Less Equal Than dst.x = (src0.x <= src1.x) ? 1.0 : 0.0 dst.y = (src0.y <= src1.y) ? 1.0 : 0.0 dst.z = (src0.z <= src1.z) ? 1.0 : 0.0 dst.w = (src0.w <= src1.w) ? 1.0 : 0.0 1.5.21 SNE - Set On Not Equal dst.x = (src0.x != src1.x) ? 1.0 : 0.0 dst.y = (src0.y != src1.y) ? 1.0 : 0.0 dst.z = (src0.z != src1.z) ? 1.0 : 0.0 dst.w = (src0.w != src1.w) ? 1.0 : 0.0 1.5.22 STR - Set On True dst.x = 1.0 dst.y = 1.0 dst.z = 1.0 dst.w = 1.0 1.5.23 TEX - Texture Lookup TBD 1.5.24 TXD - Texture Lookup with Derivatives TBD 1.5.25 TXP - Projective Texture Lookup TBD 1.5.26 UP2H - Unpack Two 16-Bit Floats TBD 1.5.27 UP2US - Unpack Two Unsigned 16-Bit Scalars TBD 1.5.28 UP4B - Unpack Four Signed 8-Bit Values TBD 1.5.29 UP4UB - Unpack Four Unsigned 8-Bit Scalars TBD 1.5.30 X2D - 2D Coordinate Transformation dst.x = src0.x + src1.x * src2.x + src1.y * src2.y dst.y = src0.y + src1.x * src2.z + src1.y * src2.w dst.z = src0.x + src1.x * src2.x + src1.y * src2.y dst.w = src0.y + src1.x * src2.z + src1.y * src2.w 1.6 GL_NV_vertex_program2 -------------------------- 1.6.1 ARA - Address Register Add TBD 1.6.2 ARR - Address Register Load With Round dst.x = round(src.x) dst.y = round(src.y) dst.z = round(src.z) dst.w = round(src.w) 1.6.3 BRA - Branch TBD 1.6.4 CAL - Subroutine Call push(pc) pc = target 1.6.5 RET - Subroutine Call Return pc = pop() 1.6.6 SSG - Set Sign dst.x = (src.x > 0.0) ? 1.0 : (src.x < 0.0) ? -1.0 : 0.0 dst.y = (src.y > 0.0) ? 1.0 : (src.y < 0.0) ? -1.0 : 0.0 dst.z = (src.z > 0.0) ? 1.0 : (src.z < 0.0) ? -1.0 : 0.0 dst.w = (src.w > 0.0) ? 1.0 : (src.w < 0.0) ? -1.0 : 0.0 1.7 GL_ARB_vertex_program -------------------------- 1.7.1 SWZ - Extended Swizzle dst.x = src.x dst.y = src.y dst.z = src.z dst.w = src.w 1.7.2 XPD - Cross Product Alias for CROSSPRODUCT. 1.8 GL_ARB_fragment_program ---------------------------- 1.8.1 CMP - Compare dst.x = (src0.x < 0.0) ? src1.x : src2.x dst.y = (src0.y < 0.0) ? src1.y : src2.y dst.z = (src0.z < 0.0) ? src1.z : src2.z dst.w = (src0.w < 0.0) ? src1.w : src2.w 1.8.2 KIL - Conditional Discard TBD 1.8.3 SCS - Sine Cosine dst.x = cos(src.x) dst.y = sin(src.x) dst.z = 0.0 dst.y = 1.0 1.8.4 TXB - Texture Lookup With Bias TBD 1.9 GL_NV_fragment_program2 ---------------------------- 1.9.1 NRM - 3-component Vector Normalise dst.x = src.x / (src.x * src.x + src.y * src.y + src.z * src.z) dst.y = src.y / (src.x * src.x + src.y * src.y + src.z * src.z) dst.z = src.z / (src.x * src.x + src.y * src.y + src.z * src.z) dst.w = 1.0 1.9.2 DIV - Divide dst.x = src0.x / src1.x dst.y = src0.y / src1.y dst.z = src0.z / src1.z dst.w = src0.w / src1.w 1.9.3 DP2 - 2-component Dot Product dst.x = src0.x * src1.x + src0.y * src1.y dst.y = src0.x * src1.x + src0.y * src1.y dst.z = src0.x * src1.x + src0.y * src1.y dst.w = src0.x * src1.x + src0.y * src1.y 1.9.4 DP2A - 2-component Dot Product And Add Alias for DOT2ADD. 1.9.5 TXL - Texture Lookup With LOD TBD 1.9.6 BRK - Break TBD 1.9.7 IF - If TBD 1.9.8 LOOP - Loop TBD 1.9.9 REP - Repeat TBD 1.9.10 ELSE - Else TBD 1.9.11 ENDIF - End If TBD 1.9.12 ENDLOOP - End Loop TBD 1.9.13 ENDREP - End Repeat TBD 1.10 GL_NV_vertex_program3 --------------------------- 1.10.1 PUSHA - Push Address Register On Stack push(src.x) push(src.y) push(src.z) push(src.w) 1.10.2 POPA - Pop Address Register From Stack dst.w = pop() dst.z = pop() dst.y = pop() dst.x = pop() 1.11 GL_NV_gpu_program4 ------------------------ 1.11.1 CEIL - Ceiling dst.x = ceil(src.x) dst.y = ceil(src.y) dst.z = ceil(src.z) dst.w = ceil(src.w) 1.11.2 I2F - Integer To Float dst.x = (float) src.x dst.y = (float) src.y dst.z = (float) src.z dst.w = (float) src.w 1.11.3 NOT - Bitwise Not dst.x = ~src.x dst.y = ~src.y dst.z = ~src.z dst.w = ~src.w 1.11.4 TRUNC - Truncate dst.x = trunc(src.x) dst.y = trunc(src.y) dst.z = trunc(src.z) dst.w = trunc(src.w) 1.11.5 SHL - Shift Left dst.x = src0.x << src1.x dst.y = src0.y << src1.x dst.z = src0.z << src1.x dst.w = src0.w << src1.x 1.11.6 SHR - Shift Right dst.x = src0.x >> src1.x dst.y = src0.y >> src1.x dst.z = src0.z >> src1.x dst.w = src0.w >> src1.x 1.11.7 AND - Bitwise And dst.x = src0.x & src1.x dst.y = src0.y & src1.y dst.z = src0.z & src1.z dst.w = src0.w & src1.w 1.11.8 OR - Bitwise Or dst.x = src0.x | src1.x dst.y = src0.y | src1.y dst.z = src0.z | src1.z dst.w = src0.w | src1.w 1.11.9 MOD - Modulus dst.x = src0.x % src1.x dst.y = src0.y % src1.y dst.z = src0.z % src1.z dst.w = src0.w % src1.w 1.11.10 XOR - Bitwise Xor dst.x = src0.x ^ src1.x dst.y = src0.y ^ src1.y dst.z = src0.z ^ src1.z dst.w = src0.w ^ src1.w 1.11.11 SAD - Sum Of Absolute Differences dst.x = abs(src0.x - src1.x) + src2.x dst.y = abs(src0.y - src1.y) + src2.y dst.z = abs(src0.z - src1.z) + src2.z dst.w = abs(src0.w - src1.w) + src2.w 1.11.12 TXF - Texel Fetch TBD 1.11.13 TXQ - Texture Size Query TBD 1.11.14 CONT - Continue TBD 1.12 GL_NV_geometry_program4 ----------------------------- 1.12.1 EMIT - Emit TBD 1.12.2 ENDPRIM - End Primitive TBD 1.13 GLSL ---------- 1.13.1 BGNLOOP2 - Begin Loop TBD 1.13.2 BGNSUB - Begin Subroutine TBD 1.13.3 ENDLOOP2 - End Loop TBD 1.13.4 ENDSUB - End Subroutine TBD 1.13.5 INT - Truncate Alias for TRUNC. 1.13.6 NOISE1 - 1D Noise TBD 1.13.7 NOISE2 - 2D Noise TBD 1.13.8 NOISE3 - 3D Noise TBD 1.13.9 NOISE4 - 4D Noise TBD 1.13.10 NOP - No Operation Do nothing. 1.14 ps_1_1 ------------ 1.14.1 TEXKILL - Conditional Discard Alias for KIL.