summaryrefslogtreecommitdiffstats
path: root/src/gallium/docs/source/tgsi.rst
diff options
context:
space:
mode:
authorKeith Whitwell <[email protected]>2009-12-21 23:25:15 +0000
committerKeith Whitwell <[email protected]>2009-12-21 23:25:15 +0000
commita62aaa739924208f9469a75c43a407c0d72a427e (patch)
treecf9eb09c9c73dd6493b874f8776d4d70d108d86d /src/gallium/docs/source/tgsi.rst
parentc083fa9bba81cd7ec44f34ac613269781dd4c3bb (diff)
docs: pull in tgsi-instruction-set.txt verbatim
Diffstat (limited to 'src/gallium/docs/source/tgsi.rst')
-rw-r--r--src/gallium/docs/source/tgsi.rst1164
1 files changed, 1164 insertions, 0 deletions
diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst
index 2474925b464..30717bdf7b9 100644
--- a/src/gallium/docs/source/tgsi.rst
+++ b/src/gallium/docs/source/tgsi.rst
@@ -5,3 +5,1167 @@ TGSI, Tungsten Graphics Shader Instructions, is an intermediate language
for describing shaders. Since Gallium is inherently shaderful, shaders are
an important part of the API. TGSI is the only intermediate representation
used by all drivers.
+
+
+TGSI Instruction Specification
+==============================
+
+
+1 Instruction Set Operations
+=============================
+
+
+1.1 GL_NV_vertex_program
+-------------------------
+
+
+1.1.1 ARL - Address Register Load
+
+ dst.x = floor(src.x)
+ dst.y = floor(src.y)
+ dst.z = floor(src.z)
+ dst.w = floor(src.w)
+
+
+1.1.2 MOV - Move
+
+ dst.x = src.x
+ dst.y = src.y
+ dst.z = src.z
+ dst.w = src.w
+
+
+1.1.3 LIT - Light Coefficients
+
+ dst.x = 1.0
+ dst.y = max(src.x, 0.0)
+ dst.z = (src.x > 0.0) ? pow(max(src.y, 0.0), clamp(src.w, -128.0, 128.0)) : 0.0
+ dst.w = 1.0
+
+
+1.1.4 RCP - Reciprocal
+
+ dst.x = 1.0 / src.x
+ dst.y = 1.0 / src.x
+ dst.z = 1.0 / src.x
+ dst.w = 1.0 / src.x
+
+
+1.1.5 RSQ - Reciprocal Square Root
+
+ dst.x = 1.0 / sqrt(abs(src.x))
+ dst.y = 1.0 / sqrt(abs(src.x))
+ dst.z = 1.0 / sqrt(abs(src.x))
+ dst.w = 1.0 / sqrt(abs(src.x))
+
+
+1.1.6 EXP - Approximate Exponential Base 2
+
+ dst.x = pow(2.0, floor(src.x))
+ dst.y = src.x - floor(src.x)
+ dst.z = pow(2.0, src.x)
+ dst.w = 1.0
+
+
+1.1.7 LOG - Approximate Logarithm Base 2
+
+ dst.x = floor(lg2(abs(src.x)))
+ dst.y = abs(src.x) / pow(2.0, floor(lg2(abs(src.x))))
+ dst.z = lg2(abs(src.x))
+ dst.w = 1.0
+
+
+1.1.8 MUL - Multiply
+
+ dst.x = src0.x * src1.x
+ dst.y = src0.y * src1.y
+ dst.z = src0.z * src1.z
+ dst.w = src0.w * src1.w
+
+
+1.1.9 ADD - Add
+
+ dst.x = src0.x + src1.x
+ dst.y = src0.y + src1.y
+ dst.z = src0.z + src1.z
+ dst.w = src0.w + src1.w
+
+
+1.1.10 DP3 - 3-component Dot Product
+
+ dst.x = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z
+ dst.y = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z
+ dst.z = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z
+ dst.w = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z
+
+
+1.1.11 DP4 - 4-component Dot Product
+
+ dst.x = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src0.w * src1.w
+ dst.y = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src0.w * src1.w
+ dst.z = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src0.w * src1.w
+ dst.w = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src0.w * src1.w
+
+
+1.1.12 DST - Distance Vector
+
+ dst.x = 1.0
+ dst.y = src0.y * src1.y
+ dst.z = src0.z
+ dst.w = src1.w
+
+
+1.1.13 MIN - Minimum
+
+ dst.x = min(src0.x, src1.x)
+ dst.y = min(src0.y, src1.y)
+ dst.z = min(src0.z, src1.z)
+ dst.w = min(src0.w, src1.w)
+
+
+1.1.14 MAX - Maximum
+
+ dst.x = max(src0.x, src1.x)
+ dst.y = max(src0.y, src1.y)
+ dst.z = max(src0.z, src1.z)
+ dst.w = max(src0.w, src1.w)
+
+
+1.1.15 SLT - Set On Less Than
+
+ dst.x = (src0.x < src1.x) ? 1.0 : 0.0
+ dst.y = (src0.y < src1.y) ? 1.0 : 0.0
+ dst.z = (src0.z < src1.z) ? 1.0 : 0.0
+ dst.w = (src0.w < src1.w) ? 1.0 : 0.0
+
+
+1.1.16 SGE - Set On Greater Equal Than
+
+ dst.x = (src0.x >= src1.x) ? 1.0 : 0.0
+ dst.y = (src0.y >= src1.y) ? 1.0 : 0.0
+ dst.z = (src0.z >= src1.z) ? 1.0 : 0.0
+ dst.w = (src0.w >= src1.w) ? 1.0 : 0.0
+
+
+1.1.17 MAD - Multiply And Add
+
+ dst.x = src0.x * src1.x + src2.x
+ dst.y = src0.y * src1.y + src2.y
+ dst.z = src0.z * src1.z + src2.z
+ dst.w = src0.w * src1.w + src2.w
+
+
+1.2 GL_ATI_fragment_shader
+---------------------------
+
+
+1.2.1 SUB - Subtract
+
+ dst.x = src0.x - src1.x
+ dst.y = src0.y - src1.y
+ dst.z = src0.z - src1.z
+ dst.w = src0.w - src1.w
+
+
+1.2.2 DOT3 - 3-component Dot Product
+
+ Alias for DP3.
+
+
+1.2.3 DOT4 - 4-component Dot Product
+
+ Alias for DP4.
+
+
+1.2.4 LERP - Linear Interpolate
+
+ dst.x = src0.x * (src1.x - src2.x) + src2.x
+ dst.y = src0.y * (src1.y - src2.y) + src2.y
+ dst.z = src0.z * (src1.z - src2.z) + src2.z
+ dst.w = src0.w * (src1.w - src2.w) + src2.w
+
+
+1.2.5 CND - Condition
+
+ dst.x = (src2.x > 0.5) ? src0.x : src1.x
+ dst.y = (src2.y > 0.5) ? src0.y : src1.y
+ dst.z = (src2.z > 0.5) ? src0.z : src1.z
+ dst.w = (src2.w > 0.5) ? src0.w : src1.w
+
+
+1.2.6 CND0 - Condition Zero
+
+ Removed. Use (CMP src2, src1, src0) instead.
+
+1.2.7 DOT2ADD - 2-component Dot Product And Add
+
+ dst.x = src0.x * src1.x + src0.y * src1.y + src2.x
+ dst.y = src0.x * src1.x + src0.y * src1.y + src2.x
+ dst.z = src0.x * src1.x + src0.y * src1.y + src2.x
+ dst.w = src0.x * src1.x + src0.y * src1.y + src2.x
+
+
+1.3 GL_EXT_vertex_shader
+-------------------------
+
+
+1.3.1 INDEX - Array Lookup
+
+ Considered for removal from language.
+
+
+1.3.2 NEGATE - Negate
+
+ Considered for removal from language.
+
+
+1.3.3 MADD - Multiply And Add
+
+ Alias for MAD.
+
+
+1.3.4 FRAC - Fraction
+
+ dst.x = src.x - floor(src.x)
+ dst.y = src.y - floor(src.y)
+ dst.z = src.z - floor(src.z)
+ dst.w = src.w - floor(src.w)
+
+
+1.3.5 SETGE - Set On Greater Equal
+
+ Alias for SGE.
+
+
+1.3.6 SETLT - Set On Less Than
+
+ Alias for SLT.
+
+
+1.3.7 CLAMP - Clamp
+
+ dst.x = clamp(src0.x, src1.x, src2.x)
+ dst.y = clamp(src0.y, src1.y, src2.y)
+ dst.z = clamp(src0.z, src1.z, src2.z)
+ dst.w = clamp(src0.w, src1.w, src2.w)
+
+
+1.3.8 FLOOR - Floor
+
+ dst.x = floor(src.x)
+ dst.y = floor(src.y)
+ dst.z = floor(src.z)
+ dst.w = floor(src.w)
+
+
+1.3.9 ROUND - Round
+
+ dst.x = round(src.x)
+ dst.y = round(src.y)
+ dst.z = round(src.z)
+ dst.w = round(src.w)
+
+
+1.3.10 EXPBASE2 - Exponential Base 2
+
+ dst.x = pow(2.0, src.x)
+ dst.y = pow(2.0, src.x)
+ dst.z = pow(2.0, src.x)
+ dst.w = pow(2.0, src.x)
+
+
+1.3.11 LOGBASE2 - Logarithm Base 2
+
+ dst.x = lg2(src.x)
+ dst.y = lg2(src.x)
+ dst.z = lg2(src.x)
+ dst.w = lg2(src.x)
+
+
+1.3.12 POWER - Power
+
+ dst.x = pow(src0.x, src1.x)
+ dst.y = pow(src0.x, src1.x)
+ dst.z = pow(src0.x, src1.x)
+ dst.w = pow(src0.x, src1.x)
+
+
+1.3.13 RECIP - Reciprocal
+
+ Alias for RCP.
+
+
+1.3.14 RECIPSQRT - Reciprocal Square Root
+
+ Alias for RSQ.
+
+
+1.3.15 CROSSPRODUCT - Cross Product
+
+ dst.x = src0.y * src1.z - src1.y * src0.z
+ dst.y = src0.z * src1.x - src1.z * src0.x
+ dst.z = src0.x * src1.y - src1.x * src0.y
+ dst.w = 1.0
+
+
+1.3.16 MULTIPLYMATRIX - Multiply Matrix
+
+ Considered for removal from language.
+
+
+1.4 GL_NV_vertex_program1_1
+----------------------------
+
+
+1.4.1 ABS - Absolute
+
+ dst.x = abs(src.x)
+ dst.y = abs(src.y)
+ dst.z = abs(src.z)
+ dst.w = abs(src.w)
+
+
+1.4.2 RCC - Reciprocal Clamped
+
+ dst.x = (1.0 / src.x) > 0.0 ? clamp(1.0 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1.0 / src.x, -1.884467e+019, -5.42101e-020)
+ dst.y = (1.0 / src.x) > 0.0 ? clamp(1.0 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1.0 / src.x, -1.884467e+019, -5.42101e-020)
+ dst.z = (1.0 / src.x) > 0.0 ? clamp(1.0 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1.0 / src.x, -1.884467e+019, -5.42101e-020)
+ dst.w = (1.0 / src.x) > 0.0 ? clamp(1.0 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1.0 / src.x, -1.884467e+019, -5.42101e-020)
+
+
+1.4.3 DPH - Homogeneous Dot Product
+
+ dst.x = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w
+ dst.y = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w
+ dst.z = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w
+ dst.w = src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w
+
+
+1.5 GL_NV_fragment_program
+---------------------------
+
+
+1.5.1 COS - Cosine
+
+ dst.x = cos(src.x)
+ dst.y = cos(src.x)
+ dst.z = cos(src.x)
+ dst.w = cos(src.w)
+
+
+1.5.2 DDX - Derivative Relative To X
+
+ dst.x = partialx(src.x)
+ dst.y = partialx(src.y)
+ dst.z = partialx(src.z)
+ dst.w = partialx(src.w)
+
+
+1.5.3 DDY - Derivative Relative To Y
+
+ dst.x = partialy(src.x)
+ dst.y = partialy(src.y)
+ dst.z = partialy(src.z)
+ dst.w = partialy(src.w)
+
+
+1.5.4 EX2 - Exponential Base 2
+
+ Alias for EXPBASE2.
+
+
+1.5.5 FLR - Floor
+
+ Alias for FLOOR.
+
+
+1.5.6 FRC - Fraction
+
+ Alias for FRAC.
+
+
+1.5.7 KILP - Predicated Discard
+
+ discard
+
+
+1.5.8 LG2 - Logarithm Base 2
+
+ Alias for LOGBASE2.
+
+
+1.5.9 LRP - Linear Interpolate
+
+ Alias for LERP.
+
+
+1.5.10 PK2H - Pack Two 16-bit Floats
+
+ TBD
+
+
+1.5.11 PK2US - Pack Two Unsigned 16-bit Scalars
+
+ TBD
+
+
+1.5.12 PK4B - Pack Four Signed 8-bit Scalars
+
+ TBD
+
+
+1.5.13 PK4UB - Pack Four Unsigned 8-bit Scalars
+
+ TBD
+
+
+1.5.14 POW - Power
+
+ Alias for POWER.
+
+
+1.5.15 RFL - Reflection Vector
+
+ dst.x = 2.0 * (src0.x * src1.x + src0.y * src1.y + src0.z * src1.z) / (src0.x * src0.x + src0.y * src0.y + src0.z * src0.z) * src0.x - src1.x
+ dst.y = 2.0 * (src0.x * src1.x + src0.y * src1.y + src0.z * src1.z) / (src0.x * src0.x + src0.y * src0.y + src0.z * src0.z) * src0.y - src1.y
+ dst.z = 2.0 * (src0.x * src1.x + src0.y * src1.y + src0.z * src1.z) / (src0.x * src0.x + src0.y * src0.y + src0.z * src0.z) * src0.z - src1.z
+ dst.w = 1.0
+
+
+1.5.16 SEQ - Set On Equal
+
+ dst.x = (src0.x == src1.x) ? 1.0 : 0.0
+ dst.y = (src0.y == src1.y) ? 1.0 : 0.0
+ dst.z = (src0.z == src1.z) ? 1.0 : 0.0
+ dst.w = (src0.w == src1.w) ? 1.0 : 0.0
+
+
+1.5.17 SFL - Set On False
+
+ dst.x = 0.0
+ dst.y = 0.0
+ dst.z = 0.0
+ dst.w = 0.0
+
+
+1.5.18 SGT - Set On Greater Than
+
+ dst.x = (src0.x > src1.x) ? 1.0 : 0.0
+ dst.y = (src0.y > src1.y) ? 1.0 : 0.0
+ dst.z = (src0.z > src1.z) ? 1.0 : 0.0
+ dst.w = (src0.w > src1.w) ? 1.0 : 0.0
+
+
+1.5.19 SIN - Sine
+
+ dst.x = sin(src.x)
+ dst.y = sin(src.x)
+ dst.z = sin(src.x)
+ dst.w = sin(src.w)
+
+
+1.5.20 SLE - Set On Less Equal Than
+
+ dst.x = (src0.x <= src1.x) ? 1.0 : 0.0
+ dst.y = (src0.y <= src1.y) ? 1.0 : 0.0
+ dst.z = (src0.z <= src1.z) ? 1.0 : 0.0
+ dst.w = (src0.w <= src1.w) ? 1.0 : 0.0
+
+
+1.5.21 SNE - Set On Not Equal
+
+ dst.x = (src0.x != src1.x) ? 1.0 : 0.0
+ dst.y = (src0.y != src1.y) ? 1.0 : 0.0
+ dst.z = (src0.z != src1.z) ? 1.0 : 0.0
+ dst.w = (src0.w != src1.w) ? 1.0 : 0.0
+
+
+1.5.22 STR - Set On True
+
+ dst.x = 1.0
+ dst.y = 1.0
+ dst.z = 1.0
+ dst.w = 1.0
+
+
+1.5.23 TEX - Texture Lookup
+
+ TBD
+
+
+1.5.24 TXD - Texture Lookup with Derivatives
+
+ TBD
+
+
+1.5.25 TXP - Projective Texture Lookup
+
+ TBD
+
+
+1.5.26 UP2H - Unpack Two 16-Bit Floats
+
+ TBD
+
+
+1.5.27 UP2US - Unpack Two Unsigned 16-Bit Scalars
+
+ TBD
+
+
+1.5.28 UP4B - Unpack Four Signed 8-Bit Values
+
+ TBD
+
+
+1.5.29 UP4UB - Unpack Four Unsigned 8-Bit Scalars
+
+ TBD
+
+
+1.5.30 X2D - 2D Coordinate Transformation
+
+ dst.x = src0.x + src1.x * src2.x + src1.y * src2.y
+ dst.y = src0.y + src1.x * src2.z + src1.y * src2.w
+ dst.z = src0.x + src1.x * src2.x + src1.y * src2.y
+ dst.w = src0.y + src1.x * src2.z + src1.y * src2.w
+
+
+1.6 GL_NV_vertex_program2
+--------------------------
+
+
+1.6.1 ARA - Address Register Add
+
+ TBD
+
+
+1.6.2 ARR - Address Register Load With Round
+
+ dst.x = round(src.x)
+ dst.y = round(src.y)
+ dst.z = round(src.z)
+ dst.w = round(src.w)
+
+
+1.6.3 BRA - Branch
+
+ pc = target
+
+
+1.6.4 CAL - Subroutine Call
+
+ push(pc)
+ pc = target
+
+
+1.6.5 RET - Subroutine Call Return
+
+ pc = pop()
+
+
+1.6.6 SSG - Set Sign
+
+ dst.x = (src.x > 0.0) ? 1.0 : (src.x < 0.0) ? -1.0 : 0.0
+ dst.y = (src.y > 0.0) ? 1.0 : (src.y < 0.0) ? -1.0 : 0.0
+ dst.z = (src.z > 0.0) ? 1.0 : (src.z < 0.0) ? -1.0 : 0.0
+ dst.w = (src.w > 0.0) ? 1.0 : (src.w < 0.0) ? -1.0 : 0.0
+
+
+1.7 GL_ARB_vertex_program
+--------------------------
+
+
+1.7.1 SWZ - Extended Swizzle
+
+ dst.x = src.x
+ dst.y = src.y
+ dst.z = src.z
+ dst.w = src.w
+
+
+1.7.2 XPD - Cross Product
+
+ Alias for CROSSPRODUCT.
+
+
+1.8 GL_ARB_fragment_program
+----------------------------
+
+
+1.8.1 CMP - Compare
+
+ dst.x = (src0.x < 0.0) ? src1.x : src2.x
+ dst.y = (src0.y < 0.0) ? src1.y : src2.y
+ dst.z = (src0.z < 0.0) ? src1.z : src2.z
+ dst.w = (src0.w < 0.0) ? src1.w : src2.w
+
+
+1.8.2 KIL - Conditional Discard
+
+ if (src.x < 0.0 || src.y < 0.0 || src.z < 0.0 || src.w < 0.0)
+ discard
+ endif
+
+
+1.8.3 SCS - Sine Cosine
+
+ dst.x = cos(src.x)
+ dst.y = sin(src.x)
+ dst.z = 0.0
+ dst.y = 1.0
+
+
+1.8.4 TXB - Texture Lookup With Bias
+
+ TBD
+
+
+1.9 GL_NV_fragment_program2
+----------------------------
+
+
+1.9.1 NRM - 3-component Vector Normalise
+
+ dst.x = src.x / (src.x * src.x + src.y * src.y + src.z * src.z)
+ dst.y = src.y / (src.x * src.x + src.y * src.y + src.z * src.z)
+ dst.z = src.z / (src.x * src.x + src.y * src.y + src.z * src.z)
+ dst.w = 1.0
+
+
+1.9.2 DIV - Divide
+
+ dst.x = src0.x / src1.x
+ dst.y = src0.y / src1.y
+ dst.z = src0.z / src1.z
+ dst.w = src0.w / src1.w
+
+
+1.9.3 DP2 - 2-component Dot Product
+
+ dst.x = src0.x * src1.x + src0.y * src1.y
+ dst.y = src0.x * src1.x + src0.y * src1.y
+ dst.z = src0.x * src1.x + src0.y * src1.y
+ dst.w = src0.x * src1.x + src0.y * src1.y
+
+
+1.9.4 DP2A - 2-component Dot Product And Add
+
+ Alias for DOT2ADD.
+
+
+1.9.5 TXL - Texture Lookup With LOD
+
+ TBD
+
+
+1.9.6 BRK - Break
+
+ TBD
+
+
+1.9.7 IF - If
+
+ TBD
+
+
+1.9.8 BGNFOR - Begin a For-Loop
+
+ dst.x = floor(src.x)
+ dst.y = floor(src.y)
+ dst.z = floor(src.z)
+
+ if (dst.y <= 0)
+ pc = [matching ENDFOR] + 1
+ endif
+
+ Note: The destination must be a loop register.
+ The source must be a constant register.
+
+
+1.9.9 REP - Repeat
+
+ TBD
+
+
+1.9.10 ELSE - Else
+
+ TBD
+
+
+1.9.11 ENDIF - End If
+
+ TBD
+
+
+1.9.12 ENDFOR - End a For-Loop
+
+ dst.x = dst.x + dst.z
+ dst.y = dst.y - 1.0
+
+ if (dst.y > 0)
+ pc = [matching BGNFOR instruction] + 1
+ endif
+
+ Note: The destination must be a loop register.
+
+
+1.9.13 ENDREP - End Repeat
+
+ TBD
+
+
+1.10 GL_NV_vertex_program3
+---------------------------
+
+
+1.10.1 PUSHA - Push Address Register On Stack
+
+ push(src.x)
+ push(src.y)
+ push(src.z)
+ push(src.w)
+
+
+1.10.2 POPA - Pop Address Register From Stack
+
+ dst.w = pop()
+ dst.z = pop()
+ dst.y = pop()
+ dst.x = pop()
+
+
+1.11 GL_NV_gpu_program4
+------------------------
+
+
+1.11.1 CEIL - Ceiling
+
+ dst.x = ceil(src.x)
+ dst.y = ceil(src.y)
+ dst.z = ceil(src.z)
+ dst.w = ceil(src.w)
+
+
+1.11.2 I2F - Integer To Float
+
+ dst.x = (float) src.x
+ dst.y = (float) src.y
+ dst.z = (float) src.z
+ dst.w = (float) src.w
+
+
+1.11.3 NOT - Bitwise Not
+
+ dst.x = ~src.x
+ dst.y = ~src.y
+ dst.z = ~src.z
+ dst.w = ~src.w
+
+
+1.11.4 TRUNC - Truncate
+
+ dst.x = trunc(src.x)
+ dst.y = trunc(src.y)
+ dst.z = trunc(src.z)
+ dst.w = trunc(src.w)
+
+
+1.11.5 SHL - Shift Left
+
+ dst.x = src0.x << src1.x
+ dst.y = src0.y << src1.x
+ dst.z = src0.z << src1.x
+ dst.w = src0.w << src1.x
+
+
+1.11.6 SHR - Shift Right
+
+ dst.x = src0.x >> src1.x
+ dst.y = src0.y >> src1.x
+ dst.z = src0.z >> src1.x
+ dst.w = src0.w >> src1.x
+
+
+1.11.7 AND - Bitwise And
+
+ dst.x = src0.x & src1.x
+ dst.y = src0.y & src1.y
+ dst.z = src0.z & src1.z
+ dst.w = src0.w & src1.w
+
+
+1.11.8 OR - Bitwise Or
+
+ dst.x = src0.x | src1.x
+ dst.y = src0.y | src1.y
+ dst.z = src0.z | src1.z
+ dst.w = src0.w | src1.w
+
+
+1.11.9 MOD - Modulus
+
+ dst.x = src0.x % src1.x
+ dst.y = src0.y % src1.y
+ dst.z = src0.z % src1.z
+ dst.w = src0.w % src1.w
+
+
+1.11.10 XOR - Bitwise Xor
+
+ dst.x = src0.x ^ src1.x
+ dst.y = src0.y ^ src1.y
+ dst.z = src0.z ^ src1.z
+ dst.w = src0.w ^ src1.w
+
+
+1.11.11 SAD - Sum Of Absolute Differences
+
+ dst.x = abs(src0.x - src1.x) + src2.x
+ dst.y = abs(src0.y - src1.y) + src2.y
+ dst.z = abs(src0.z - src1.z) + src2.z
+ dst.w = abs(src0.w - src1.w) + src2.w
+
+
+1.11.12 TXF - Texel Fetch
+
+ TBD
+
+
+1.11.13 TXQ - Texture Size Query
+
+ TBD
+
+
+1.11.14 CONT - Continue
+
+ TBD
+
+
+1.12 GL_NV_geometry_program4
+-----------------------------
+
+
+1.12.1 EMIT - Emit
+
+ TBD
+
+
+1.12.2 ENDPRIM - End Primitive
+
+ TBD
+
+
+1.13 GLSL
+----------
+
+
+1.13.1 BGNLOOP - Begin a Loop
+
+ TBD
+
+
+1.13.2 BGNSUB - Begin Subroutine
+
+ TBD
+
+
+1.13.3 ENDLOOP - End a Loop
+
+ TBD
+
+
+1.13.4 ENDSUB - End Subroutine
+
+ TBD
+
+
+1.13.5 INT - Truncate
+
+ Alias for TRUNC.
+
+
+1.13.6 NOISE1 - 1D Noise
+
+ TBD
+
+
+1.13.7 NOISE2 - 2D Noise
+
+ TBD
+
+
+1.13.8 NOISE3 - 3D Noise
+
+ TBD
+
+
+1.13.9 NOISE4 - 4D Noise
+
+ TBD
+
+
+1.13.10 NOP - No Operation
+
+ Do nothing.
+
+
+1.14 ps_1_1
+------------
+
+
+1.14.1 TEXKILL - Conditional Discard
+
+ Alias for KIL.
+
+
+1.15 ps_1_4
+------------
+
+
+1.15.1 TEXLD - Texture Lookup
+
+ Alias for TEX.
+
+
+1.16 ps_2_0
+------------
+
+
+1.16.1 M4X4 - Multiply Matrix
+
+ Alias for MULTIPLYMATRIX.
+
+
+1.16.2 M4X3 - Multiply Matrix
+
+ Considered for removal from language.
+
+
+1.16.3 M3X4 - Multiply Matrix
+
+ Considered for removal from language.
+
+
+1.16.4 M3X3 - Multiply Matrix
+
+ Considered for removal from language.
+
+
+1.16.5 M3X2 - Multiply Matrix
+
+ Considered for removal from language.
+
+
+1.16.6 CRS - Cross Product
+
+ Alias for XPD.
+
+
+1.16.7 NRM4 - 4-component Vector Normalise
+
+ dst.x = src.x / (src.x * src.x + src.y * src.y + src.z * src.z + src.w * src.w)
+ dst.y = src.y / (src.x * src.x + src.y * src.y + src.z * src.z + src.w * src.w)
+ dst.z = src.z / (src.x * src.x + src.y * src.y + src.z * src.z + src.w * src.w)
+ dst.w = src.w / (src.x * src.x + src.y * src.y + src.z * src.z + src.w * src.w)
+
+
+1.16.8 SINCOS - Sine Cosine
+
+ Alias for SCS.
+
+
+1.16.9 TEXLDB - Texture Lookup With Bias
+
+ Alias for TXB.
+
+
+1.16.10 DP2ADD - 2-component Dot Product And Add
+
+ Alias for DP2A.
+
+
+1.17 ps_2_x
+------------
+
+
+1.17.1 CALL - Subroutine Call
+
+ Alias for CAL.
+
+
+1.17.2 CALLNZ - Subroutine Call If Not Zero
+
+ TBD
+
+
+1.17.3 IFC - If
+
+ TBD
+
+
+1.17.4 BREAK - Break
+
+ Alias for BRK.
+
+
+1.17.5 BREAKC - Break Conditional
+
+ TBD
+
+
+1.17.6 DSX - Derivative Relative To X
+
+ Alias for DDX.
+
+
+1.17.7 DSY - Derivative Relative To Y
+
+ Alias for DDY.
+
+
+1.17.8 TEXLDD - Texture Lookup with Derivatives
+
+ Alias for TXD.
+
+
+1.18 vs_1_1
+------------
+
+
+1.18.1 EXPP - Approximate Exponential Base 2
+
+ Use EXP. See also 1.19.3.
+
+
+1.18.2 LOGP - Logarithm Base 2
+
+ Use LOG. See also 1.19.4.
+
+
+1.19 vs_2_0
+------------
+
+
+1.19.1 SGN - Set Sign
+
+ Alias for SSG.
+
+
+1.19.2 MOVA - Move Address Register
+
+ Alias for ARR.
+
+
+1.19.3 EXPP - Approximate Exponential Base 2
+
+ Use EX2.
+
+
+1.19.4 LOGP - Logarithm Base 2
+
+ Use LG2.
+
+
+2 Explanation of symbols used
+==============================
+
+
+2.1 Functions
+--------------
+
+
+ abs(x) Absolute value of x.
+ '|x|'
+ (x < 0.0) ? -x : x
+
+ ceil(x) Ceiling of x.
+
+ clamp(x,y,z) Clamp x between y and z.
+ (x < y) ? y : (x > z) ? z : x
+
+ cos(x) Cosine of x.
+
+ floor(x) Floor of x.
+
+ lg2(x) Logarithm base 2 of x.
+
+ max(x,y) Maximum of x and y.
+ (x > y) ? x : y
+
+ min(x,y) Minimum of x and y.
+ (x < y) ? x : y
+
+ partialx(x) Derivative of x relative to fragment's X.
+
+ partialy(x) Derivative of x relative to fragment's Y.
+
+ pop() Pop from stack.
+
+ pow(x,y) Raise x to power of y.
+
+ push(x) Push x on stack.
+
+ round(x) Round x.
+
+ sin(x) Sine of x.
+
+ sqrt(x) Square root of x.
+
+ trunc(x) Truncate x.
+
+
+2.2 Keywords
+-------------
+
+
+ discard Discard fragment.
+
+ dst First destination register.
+
+ dst0 First destination register.
+
+ pc Program counter.
+
+ src First source register.
+
+ src0 First source register.
+
+ src1 Second source register.
+
+ src2 Third source register.
+
+ target Label of target instruction.
+
+
+3 Other tokens
+===============
+
+
+3.1 Declaration Semantic
+-------------------------
+
+
+ Follows Declaration token if Semantic bit is set.
+
+ Since its purpose is to link a shader with other stages of the pipeline,
+ it is valid to follow only those Declaration tokens that declare a register
+ either in INPUT or OUTPUT file.
+
+ SemanticName field contains the semantic name of the register being declared.
+ There is no default value.
+
+ SemanticIndex is an optional subscript that can be used to distinguish
+ different register declarations with the same semantic name. The default value
+ is 0.
+
+ The meanings of the individual semantic names are explained in the following
+ sections.
+
+
+3.1.1 FACE
+
+ Valid only in a fragment shader INPUT declaration.
+
+ FACE.x is negative when the primitive is back facing. FACE.x is positive
+ when the primitive is front facing.