summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/freedreno/ir3/ir3.h
diff options
context:
space:
mode:
authorRob Clark <[email protected]>2015-03-29 11:18:29 -0400
committerRob Clark <[email protected]>2015-04-05 12:44:01 -0400
commit104713d9f2dced94a427004a25c54b2c7feee166 (patch)
treea5aa80e953c25828ab376b009027bb4106661ba8 /src/gallium/drivers/freedreno/ir3/ir3.h
parent203f37540a698a812f0a66e2f3f1fff954af22ab (diff)
freedreno/ir3: split float/int abs/neg
Even though in the end, they map to the same bits, the backend will need to be able to differentiate float abs/neg vs integer abs/neg. Rather than making the backend figure it out based on instruction opcode (which when combined with mov/absneg instructions, can be awkward), just split out different flags for each so the frontend can signal it's intentions more clearly. Also, since (neg) for bitwise op's is actually a bitwise- not, split it out into bnot flag. Signed-off-by: Rob Clark <[email protected]>
Diffstat (limited to 'src/gallium/drivers/freedreno/ir3/ir3.h')
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3.h161
1 files changed, 153 insertions, 8 deletions
diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h
index ce14807cb01..8310286102e 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3.h
@@ -57,20 +57,31 @@ struct ir3_register {
IR3_REG_HALF = 0x004,
IR3_REG_RELATIV= 0x008,
IR3_REG_R = 0x010,
- IR3_REG_NEGATE = 0x020,
- IR3_REG_ABS = 0x040,
- IR3_REG_EVEN = 0x080,
- IR3_REG_POS_INF= 0x100,
+ /* Most instructions, it seems, can do float abs/neg but not
+ * integer. The CP pass needs to know what is intended (int or
+ * float) in order to do the right thing. For this reason the
+ * abs/neg flags are split out into float and int variants. In
+ * addition, .b (bitwise) operations, the negate is actually a
+ * bitwise not, so split that out into a new flag to make it
+ * more clear.
+ */
+ IR3_REG_FNEG = 0x020,
+ IR3_REG_FABS = 0x040,
+ IR3_REG_SNEG = 0x080,
+ IR3_REG_SABS = 0x100,
+ IR3_REG_BNOT = 0x200,
+ IR3_REG_EVEN = 0x400,
+ IR3_REG_POS_INF= 0x800,
/* (ei) flag, end-input? Set on last bary, presumably to signal
* that the shader needs no more input:
*/
- IR3_REG_EI = 0x200,
+ IR3_REG_EI = 0x1000,
/* meta-flags, for intermediate stages of IR, ie.
* before register assignment is done:
*/
- IR3_REG_SSA = 0x1000, /* 'instr' is ptr to assigning instr */
- IR3_REG_IA = 0x2000, /* meta-input dst is "assigned" */
- IR3_REG_ADDR = 0x4000, /* register is a0.x */
+ IR3_REG_SSA = 0x2000, /* 'instr' is ptr to assigning instr */
+ IR3_REG_IA = 0x4000, /* meta-input dst is "assigned" */
+ IR3_REG_ADDR = 0x8000, /* register is a0.x */
} flags;
union {
/* normal registers:
@@ -523,6 +534,140 @@ static inline bool reg_gpr(struct ir3_register *r)
return true;
}
+/* some cat2 instructions (ie. those which are not float can embed an
+ * immediate:
+ */
+static inline bool ir3_cat2_immed(opc_t opc)
+{
+ switch (opc) {
+ case OPC_ADD_U:
+ case OPC_ADD_S:
+ case OPC_SUB_U:
+ case OPC_SUB_S:
+ case OPC_CMPS_U:
+ case OPC_CMPS_S:
+ case OPC_MIN_U:
+ case OPC_MIN_S:
+ case OPC_MAX_U:
+ case OPC_MAX_S:
+ case OPC_CMPV_U:
+ case OPC_CMPV_S:
+ case OPC_MUL_U:
+ case OPC_MUL_S:
+ case OPC_MULL_U:
+ case OPC_CLZ_S:
+ case OPC_ABSNEG_S:
+ case OPC_AND_B:
+ case OPC_OR_B:
+ case OPC_NOT_B:
+ case OPC_XOR_B:
+ case OPC_BFREV_B:
+ case OPC_CLZ_B:
+ case OPC_SHL_B:
+ case OPC_SHR_B:
+ case OPC_ASHR_B:
+ case OPC_MGEN_B:
+ case OPC_GETBIT_B:
+ case OPC_CBITS_B:
+ case OPC_BARY_F:
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+
+/* map cat2 instruction to valid abs/neg flags: */
+static inline unsigned ir3_cat2_absneg(opc_t opc)
+{
+ switch (opc) {
+ case OPC_ADD_F:
+ case OPC_MIN_F:
+ case OPC_MAX_F:
+ case OPC_MUL_F:
+ case OPC_SIGN_F:
+ case OPC_CMPS_F:
+ case OPC_ABSNEG_F:
+ case OPC_CMPV_F:
+ case OPC_FLOOR_F:
+ case OPC_CEIL_F:
+ case OPC_RNDNE_F:
+ case OPC_RNDAZ_F:
+ case OPC_TRUNC_F:
+ case OPC_BARY_F:
+ return IR3_REG_FABS | IR3_REG_FNEG;
+
+ case OPC_ADD_U:
+ case OPC_ADD_S:
+ case OPC_SUB_U:
+ case OPC_SUB_S:
+ case OPC_CMPS_U:
+ case OPC_CMPS_S:
+ case OPC_MIN_U:
+ case OPC_MIN_S:
+ case OPC_MAX_U:
+ case OPC_MAX_S:
+ case OPC_CMPV_U:
+ case OPC_CMPV_S:
+ case OPC_MUL_U:
+ case OPC_MUL_S:
+ case OPC_MULL_U:
+ case OPC_CLZ_S:
+ return 0;
+
+ case OPC_ABSNEG_S:
+ return IR3_REG_SABS | IR3_REG_SNEG;
+
+ case OPC_AND_B:
+ case OPC_OR_B:
+ case OPC_NOT_B:
+ case OPC_XOR_B:
+ case OPC_BFREV_B:
+ case OPC_CLZ_B:
+ case OPC_SHL_B:
+ case OPC_SHR_B:
+ case OPC_ASHR_B:
+ case OPC_MGEN_B:
+ case OPC_GETBIT_B:
+ case OPC_CBITS_B:
+ return IR3_REG_BNOT;
+
+ default:
+ return 0;
+ }
+}
+
+/* map cat3 instructions to valid abs/neg flags: */
+static inline unsigned ir3_cat3_absneg(opc_t opc)
+{
+ switch (opc) {
+ case OPC_MAD_F16:
+ case OPC_MAD_F32:
+ case OPC_SEL_F16:
+ case OPC_SEL_F32:
+ return IR3_REG_FNEG;
+
+ case OPC_MAD_U16:
+ case OPC_MADSH_U16:
+ case OPC_MAD_S16:
+ case OPC_MADSH_M16:
+ case OPC_MAD_U24:
+ case OPC_MAD_S24:
+ case OPC_SEL_S16:
+ case OPC_SEL_S32:
+ case OPC_SAD_S16:
+ case OPC_SAD_S32:
+ /* neg *may* work on 3rd src.. */
+
+ case OPC_SEL_B16:
+ case OPC_SEL_B32:
+
+ default:
+ return 0;
+ }
+}
+
#define array_insert(arr, val) do { \
if (arr ## _count == arr ## _sz) { \
arr ## _sz = MAX2(2 * arr ## _sz, 16); \