diff options
author | Connor Abbott <[email protected]> | 2019-12-16 17:17:38 +0100 |
---|---|---|
committer | Connor Abbott <[email protected]> | 2019-12-18 23:08:55 +0100 |
commit | cfa1fb895ac5a752772f4d0748c1c2bce0c2e653 (patch) | |
tree | eeb9ea0a9c07521abebd6fe875f9f2416f1562fb /src/freedreno | |
parent | a9a3108be774aea620fa4fc726c33100d9a49add (diff) |
a6xx: Add more CP packets
And add fields uncovered by looking at the firmware. I think this covers
all the memory, register, and scratch manipulation opcodes that exist on
A6xx, plus one additional nice find for Vulkan and describing a
previously unknown opcode and documenting CP_WAIT_REG_MEM.
Note that the bits for the CP_REG_TO_MEM count, as well as the formula
for computing the actual count for both CP_REG_TO_MEM and CP_MEM_TO_REG,
are changed because the A630 SQE firmware actually does something
different. I haven't investigated older microcodes to see whether this
extends back to A5xx and A4xx, but the only non-A6xx uses of this
field result in the same bit-pattern when using the A6xx bit range and
formula, so it should be safe to change the definition universally.
Reviewed-by: Kristian H. Kristensen <[email protected]>
Reviewed-by: Rob Clark <[email protected]>
Reviewed-by: Eric Anholt <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3116>
Diffstat (limited to 'src/freedreno')
-rw-r--r-- | src/freedreno/registers/adreno_pm4.xml | 291 | ||||
-rw-r--r-- | src/freedreno/vulkan/tu_cmd_buffer.c | 10 |
2 files changed, 282 insertions, 19 deletions
diff --git a/src/freedreno/registers/adreno_pm4.xml b/src/freedreno/registers/adreno_pm4.xml index 3a7865b489d..533dcf0fb38 100644 --- a/src/freedreno/registers/adreno_pm4.xml +++ b/src/freedreno/registers/adreno_pm4.xml @@ -219,7 +219,7 @@ xsi:schemaLocation="http://nouveau.freedesktop.org/ rules-ng.xsd"> <doc>load sequencer instruction memory (code embedded in packet)</doc> <value name="CP_IM_LOAD_IMMEDIATE" value="0x2b"/> <doc>load constants from a location in memory</doc> - <value name="CP_LOAD_CONSTANT_CONTEXT" value="0x2e"/> + <value name="CP_LOAD_CONSTANT_CONTEXT" value="0x2e" variants="A2XX"/> <doc>selective invalidation of state pointers</doc> <value name="CP_INVALIDATE_STATE" value="0x3b"/> <doc>dynamically changes shader instruction memory partition</doc> @@ -266,7 +266,7 @@ xsi:schemaLocation="http://nouveau.freedesktop.org/ rules-ng.xsd"> <doc>Load a buffer with pre-fetch enabled</doc> <value name="CP_INDIRECT_BUFFER_PFE" value="0x3f" variants="A5XX"/> <doc>Set bin (?)</doc> - <value name="CP_SET_BIN" value="0x4c"/> + <value name="CP_SET_BIN" value="0x4c" variants="A2XX"/> <doc>test 2 memory locations to dword values specified</doc> <value name="CP_TEST_TWO_MEMS" value="0x71"/> @@ -310,7 +310,7 @@ xsi:schemaLocation="http://nouveau.freedesktop.org/ rules-ng.xsd"> for A4xx Write to register with address that does not fit into type-0 pkt </doc> - <value name="CP_WIDE_REG_WRITE" value="0x74"/> + <value name="CP_WIDE_REG_WRITE" value="0x74" variants="A4XX"/> <doc>copy from ME scratch RAM to a register</doc> <value name="CP_SCRATCH_TO_REG" value="0x4d"/> @@ -413,6 +413,15 @@ xsi:schemaLocation="http://nouveau.freedesktop.org/ rules-ng.xsd"> <!-- jmptable entry used to handle type4 packet on a5xx+: --> <value name="PKT4" value="0x04" variants="A5XX,A6XX"/> + + <!-- TODO do these exist on A5xx? --> + <value name="CP_SCRATCH_WRITE" value="0x4c" variants="A6XX"/> + <value name="CP_REG_TO_MEM_OFFSET_MEM" value="0x74" variants="A6XX"/> + <value name="CP_REG_TO_MEM_OFFSET_REG" value="0x72" variants="A6XX"/> + <value name="CP_WAIT_MEM_GTE" value="0x14" variants="A6XX"/> + <value name="CP_WAIT_TWO_REGS" value="0x70" variants="A6XX"/> + <value name="CP_MEMCPY" value="0x75" variants="A6XX"/> + <value name="CP_SET_BIN_DATA5_OFFSET" value="0x2e" variants="A6XX"/> <!-- unknown a6xx opcodes: @@ -420,7 +429,6 @@ opcode: (null) (14) (5 dwords) opcode: (null) (55) (4 dwords) opcode: (null) (6d) (4 dwords) --> - <value name="CP_UNK_A6XX_14" value="0x14" variants="A6XX"/> <value name="CP_UNK_A6XX_55" value="0x55" variants="A6XX"/> <!-- @@ -830,14 +838,66 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) </reg32> </domain> +<domain name="CP_SET_BIN_DATA5_OFFSET" width="32"> + <doc> + Like CP_SET_BIN_DATA5, but set the pointers as offsets from the + pointers stored in VSC_PIPE_{DATA,DATA2,SIZE}_ADDRESS. Useful + for Vulkan where these values aren't known when the command + stream is recorded. + </doc> + <reg32 offset="0" name="0"> + <!-- equiv to PC_VSTREAM_CONTROL.SIZE on a3xx/a4xx: --> + <bitfield name="VSC_SIZE" low="16" high="21" type="uint"/> + <!-- equiv to PC_VSTREAM_CONTROL.N on a3xx/a4xx: --> + <bitfield name="VSC_N" low="22" high="26" type="uint"/> + </reg32> + <!-- BIN_DATA_ADDR -> VSC_PIPE[p].DATA_ADDRESS --> + <reg32 offset="1" name="1"> + <bitfield name="BIN_DATA_OFFSET" low="0" high="31" type="uint"/> + </reg32> + <!-- BIN_SIZE_ADDRESS -> VSC_SIZE_ADDRESS + (p * 4)--> + <reg32 offset="2" name="2"> + <bitfield name="BIN_SIZE_OFFSET" low="0" high="31" type="uint"/> + </reg32> + <!-- BIN_DATA2_ADDR -> VSC_PIPE[p].DATA2_ADDRESS --> + <reg32 offset="3" name="3"> + <bitfield name="BIN_DATA2_OFFSET" low="0" high="31" type="uint"/> + </reg32> +</domain> + +<domain name="CP_REG_RMW" width="32"> + <doc> + Modifies DST_REG using two sources that can either be registers + or immediates. If SRC1_ADD is set, then do the following: + + $dst = (($dst & $src0) rot $rotate) + $src1 + + Otherwise: + + $dst = (($dst & $src0) rot $rotate) | $src1 + + Here "rot" means rotate left. + </doc> + <reg32 offset="0" name="0"> + <bitfield name="DST_REG" low="0" high="17" type="hex"/> + <bitfield name="ROTATE" low="24" high="28" type="uint"/> + <bitfield name="SRC1_ADD" pos="29" type="boolean"/> + <bitfield name="SRC1_IS_REG" pos="30" type="boolean"/> + <bitfield name="SRC0_IS_REG" pos="31" type="boolean"/> + </reg32> + <reg32 offset="1" name="1"> + <bitfield name="SRC0" low="0" high="31" type="uint"/> + </reg32> + <reg32 offset="2" name="2"> + <bitfield name="SRC1" low="0" high="31" type="uint"/> + </reg32> +</domain> + <domain name="CP_REG_TO_MEM" width="32"> <reg32 offset="0" name="0"> <bitfield name="REG" low="0" high="15" type="hex"/> - <!-- - number of regsiters/dwords copied is CNT+1.. unsure - about # of bits - --> - <bitfield name="CNT" low="19" high="29" type="uint"/> + <!-- number of registers/dwords copied is max(CNT, 1). --> + <bitfield name="CNT" low="18" high="29" type="uint"/> <bitfield name="64B" pos="30" type="boolean"/> <bitfield name="ACCUMULATE" pos="31" type="boolean"/> </reg32> @@ -849,13 +909,62 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) </reg32> </domain> +<domain name="CP_REG_TO_MEM_OFFSET_REG" width="32"> + <doc> + Like CP_REG_TO_MEM, but the memory address to write to can be + offsetted using either one or two registers or scratch + registers. + </doc> + <reg32 offset="0" name="0"> + <bitfield name="REG" low="0" high="15" type="hex"/> + <!-- number of registers/dwords copied is max(CNT, 1). --> + <bitfield name="CNT" low="18" high="29" type="uint"/> + <bitfield name="64B" pos="30" type="boolean"/> + <bitfield name="ACCUMULATE" pos="31" type="boolean"/> + </reg32> + <reg32 offset="1" name="1"> + <bitfield name="DEST" low="0" high="31"/> + </reg32> + <reg32 offset="2" name="2" variants="A5XX-"> + <bitfield name="DEST_HI" low="0" high="31"/> + </reg32> + <reg32 offset="3" name="3"> + <bitfield name="OFFSET0" low="0" high="17" type="hex"/> + <bitfield name="OFFSET0_SCRATCH" pos="19" type="boolean"/> + </reg32> + <!-- followed by an optional identical OFFSET1 dword --> +</domain> + +<domain name="CP_REG_TO_MEM_OFFSET_MEM" width="32"> + <doc> + Like CP_REG_TO_MEM, but the memory address to write to can be + offsetted using a DWORD in memory. + </doc> + <reg32 offset="0" name="0"> + <bitfield name="REG" low="0" high="15" type="hex"/> + <!-- number of registers/dwords copied is max(CNT, 1). --> + <bitfield name="CNT" low="18" high="29" type="uint"/> + <bitfield name="64B" pos="30" type="boolean"/> + <bitfield name="ACCUMULATE" pos="31" type="boolean"/> + </reg32> + <reg32 offset="1" name="1"> + <bitfield name="DEST" low="0" high="31"/> + </reg32> + <reg32 offset="2" name="2" variants="A5XX-"> + <bitfield name="DEST_HI" low="0" high="31"/> + </reg32> + <reg32 offset="3" name="3"> + <bitfield name="OFFSET_LO" low="0" high="31" type="hex"/> + </reg32> + <reg32 offset="4" name="4"> + <bitfield name="OFFSET_HI" low="0" high="31" type="hex"/> + </reg32> +</domain> + <domain name="CP_MEM_TO_REG" width="32"> <reg32 offset="0" name="0"> <bitfield name="REG" low="0" high="15" type="hex"/> - <!-- - number of regsiters/dwords copied is CNT+1.. unsure - about # of bits - --> + <!-- number of registers/dwords copied is max(CNT, 1). --> <bitfield name="CNT" low="19" high="29" type="uint"/> <bitfield name="64B" pos="30" type="boolean"/> <bitfield name="ACCUMULATE" pos="31" type="boolean"/> @@ -880,6 +989,10 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) <!-- if set treat src/dst as 64bit values --> <bitfield name="DOUBLE" pos="29" type="boolean"/> + <!-- execute CP_WAIT_FOR_MEM_WRITES beforehand --> + <bitfield name="WAIT_FOR_MEM_WRITES" pos="30" type="boolean"/> + <!-- some other kind of wait --> + <bitfield name="UNK31" pos="31" type="boolean"/> </reg32> <!-- followed by sequence of addresses.. the first is the @@ -891,6 +1004,61 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) --> </domain> +<domain name="CP_MEMCPY" width="32"> + <reg32 offset="0" name="0"> + <bitfield name="DWORDS" low="0" high="31" type="uint"/> + </reg32> + <reg32 offset="1" name="1"> + <bitfield name="SRC_LO" low="0" high="31" type="hex"/> + </reg32> + <reg32 offset="2" name="2"> + <bitfield name="SRC_HI" low="0" high="31" type="hex"/> + </reg32> + <reg32 offset="3" name="3"> + <bitfield name="DST_LO" low="0" high="31" type="hex"/> + </reg32> + <reg32 offset="4" name="4"> + <bitfield name="DST_HI" low="0" high="31" type="hex"/> + </reg32> +</domain> + +<domain name="CP_REG_TO_SCRATCH" width="32"> + <reg32 offset="0" name="0"> + <bitfield name="REG" low="0" high="17" type="hex"/> + <bitfield name="SCRATCH" low="20" high="22" type="uint"/> + <!-- number of registers/dwords copied is CNT + 1. --> + <bitfield name="CNT" low="24" high="26" type="uint"/> + </reg32> +</domain> + +<domain name="CP_SCRATCH_TO_REG" width="32"> + <reg32 offset="0" name="0"> + <bitfield name="REG" low="0" high="17" type="hex"/> + <!-- note: CP_MEM_TO_REG always sets this when writing to the register --> + <bitfield name="UNK18" pos="18" type="boolean"/> + <bitfield name="SCRATCH" low="20" high="22" type="uint"/> + <!-- number of registers/dwords copied is CNT + 1. --> + <bitfield name="CNT" low="24" high="26" type="uint"/> + </reg32> +</domain> + +<domain name="CP_SCRATCH_WRITE" width="32"> + <reg32 offset="0" name="0"> + <bitfield name="SCRATCH" low="20" high="22" type="uint"/> + </reg32> + <!-- followed by one or more DWORDs to write to scratch registers --> +</domain> + +<domain name="CP_MEM_WRITE" width="32"> + <reg32 offset="0" name="0"> + <bitfield name="ADDR_LO" low="0" high="31"/> + </reg32> + <reg32 offset="1" name="1"> + <bitfield name="ADDR_HI" low="0" high="31"/> + </reg32> + <!-- followed by the DWORDs to write --> +</domain> + <enum name="cp_cond_function"> <value value="0" name="WRITE_ALWAYS"/> <value value="1" name="WRITE_LT"/> @@ -927,7 +1095,10 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) <domain name="CP_COND_WRITE5" width="32"> <reg32 offset="0" name="0"> <bitfield name="FUNCTION" low="0" high="2" type="cp_cond_function"/> + <bitfield name="SIGNED_COMPARE" pos="3" type="boolean"/> + <!-- if both POLL_MEMORY and POLL_SCRATCH are false, it polls a register at POLL_ADDR_LO instead. --> <bitfield name="POLL_MEMORY" pos="4" type="boolean"/> + <bitfield name="POLL_SCRATCH" pos="5" type="boolean"/> <bitfield name="WRITE_MEMORY" pos="8" type="boolean"/> </reg32> <reg32 offset="1" name="1"> @@ -953,6 +1124,71 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) </reg32> </domain> +<domain name="CP_WAIT_MEM_GTE" width="32"> + <doc> + Wait until a memory value is greater than or equal to the + reference, using signed comparison. + </doc> + <reg32 offset="0" name="0"> + <!-- Reserved for flags, presumably? Unused in FW --> + <bitfield name="RESERVED" low="0" high="31" type="hex"/> + </reg32> + <reg32 offset="1" name="1"> + <bitfield name="POLL_ADDR_LO" low="0" high="31" type="hex"/> + </reg32> + <reg32 offset="2" name="2"> + <bitfield name="POLL_ADDR_HI" low="0" high="31" type="hex"/> + </reg32> + <reg32 offset="3" name="3"> + <bitfield name="REF" low="0" high="31"/> + </reg32> +</domain> + +<domain name="CP_WAIT_REG_MEM" width="32"> + <doc> + This uses the same internal comparison as CP_COND_WRITE, + but waits until the comparison is true instead. It busy-loops in + the CP for the given number of cycles before trying again. + </doc> + <reg32 offset="0" name="0"> + <bitfield name="FUNCTION" low="0" high="2" type="cp_cond_function"/> + <bitfield name="SIGNED_COMPARE" pos="3" type="boolean"/> + <bitfield name="POLL_MEMORY" pos="4" type="boolean"/> + <bitfield name="POLL_SCRATCH" pos="5" type="boolean"/> + <bitfield name="WRITE_MEMORY" pos="8" type="boolean"/> + </reg32> + <reg32 offset="1" name="1"> + <bitfield name="POLL_ADDR_LO" low="0" high="31" type="hex"/> + </reg32> + <reg32 offset="2" name="2"> + <bitfield name="POLL_ADDR_HI" low="0" high="31" type="hex"/> + </reg32> + <reg32 offset="3" name="3"> + <bitfield name="REF" low="0" high="31"/> + </reg32> + <reg32 offset="4" name="4"> + <bitfield name="MASK" low="0" high="31"/> + </reg32> + <reg32 offset="5" name="5"> + <bitfield name="DELAY_LOOP_CYCLES" low="0" high="31"/> + </reg32> +</domain> + +<domain name="CP_WAIT_TWO_REGS" width="32"> + <doc> + Waits for REG0 to not be 0 or REG1 to not equal REF + </doc> + <reg32 offset="0" name="0"> + <bitfield name="REG0" low="0" high="17" type="hex"/> + </reg32> + <reg32 offset="1" name="1"> + <bitfield name="REG1" low="0" high="17" type="hex"/> + </reg32> + <reg32 offset="2" name="2"> + <bitfield name="REF" low="0" high="31" type="uint"/> + </reg32> +</domain> + <domain name="CP_DISPATCH_COMPUTE" width="32"> <reg32 offset="0" name="0"/> <reg32 offset="1" name="1"> @@ -1201,7 +1437,8 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) <bitfield name="REG" low="0" high="11"/> <!-- the bit to test --> <bitfield name="BIT" low="20" high="24" type="uint"/> - <bitfield name="UNK25" pos="25" type="boolean"/> + <!-- execute CP_WAIT_FOR_ME beforehand --> + <bitfield name="WAIT_FOR_ME" pos="25" type="boolean"/> </reg32> </domain> @@ -1215,5 +1452,31 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) </reg32> </domain> +<domain name="CP_COND_EXEC" width="32"> + <doc> + Executes the following DWORDs of commands if the dword at ADDR0 + is not equal to 0 and the dword at ADDR1 is less than REF + (signed comparison). + </doc> + <reg32 offset="0" name="0"> + <bitfield name="ADDR0_LO" low="0" high="31"/> + </reg32> + <reg32 offset="1" name="1"> + <bitfield name="ADDR0_HI" low="0" high="31"/> + </reg32> + <reg32 offset="2" name="2"> + <bitfield name="ADDR1_LO" low="0" high="31"/> + </reg32> + <reg32 offset="3" name="3"> + <bitfield name="ADDR1_HI" low="0" high="31"/> + </reg32> + <reg32 offset="4" name="4"> + <bitfield name="REF" low="0" high="31"/> + </reg32> + <reg32 offset="1" name="1"> + <bitfield name="DWORDS" low="0" high="31" type="uint"/> + </reg32> +</domain> + </database> diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c index 49ea11acfaa..caa1a54af14 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.c +++ b/src/freedreno/vulkan/tu_cmd_buffer.c @@ -738,7 +738,7 @@ tu6_emit_tile_select(struct tu_cmd_buffer *cmd, tu_cs_emit_pkt7(cs, CP_REG_TEST, 1); tu_cs_emit(cs, A6XX_CP_REG_TEST_0_REG(OVERFLOW_FLAG_REG) | A6XX_CP_REG_TEST_0_BIT(0) | - A6XX_CP_REG_TEST_0_UNK25); + A6XX_CP_REG_TEST_0_WAIT_FOR_ME); tu_cs_emit_pkt7(cs, CP_COND_REG_EXEC, 2); tu_cs_emit(cs, 0x10000000); @@ -1124,7 +1124,7 @@ tu6_cache_flush(struct tu_cmd_buffer *cmd, struct tu_cs *cs) seqno = tu6_emit_event_write(cmd, cs, CACHE_FLUSH_TS, true); - tu_cs_emit_pkt7(cs, CP_UNK_A6XX_14, 4); + tu_cs_emit_pkt7(cs, CP_WAIT_MEM_GTE, 4); tu_cs_emit(cs, 0x00000000); tu_cs_emit_qw(cs, cmd->scratch_bo.iova); tu_cs_emit(cs, seqno); @@ -1217,7 +1217,7 @@ emit_vsc_overflow_test(struct tu_cmd_buffer *cmd, struct tu_cs *cs) tu_cs_emit_pkt7(cs, CP_REG_TEST, 1); tu_cs_emit(cs, A6XX_CP_REG_TEST_0_REG(OVERFLOW_FLAG_REG) | A6XX_CP_REG_TEST_0_BIT(0) | - A6XX_CP_REG_TEST_0_UNK25); + A6XX_CP_REG_TEST_0_WAIT_FOR_ME); tu_cs_emit_pkt7(cs, CP_COND_REG_EXEC, 2); tu_cs_emit(cs, 0x10000000); @@ -1231,7 +1231,7 @@ emit_vsc_overflow_test(struct tu_cmd_buffer *cmd, struct tu_cs *cs) */ tu_cs_emit_pkt7(cs, CP_REG_TO_MEM, 3); tu_cs_emit(cs, CP_REG_TO_MEM_0_REG(OVERFLOW_FLAG_REG) | - CP_REG_TO_MEM_0_CNT(1 - 1)); + CP_REG_TO_MEM_0_CNT(0)); tu_cs_emit_qw(cs, cmd->scratch_bo.iova + VSC_OVERFLOW); tu_cs_emit_pkt4(cs, OVERFLOW_FLAG_REG, 1); @@ -1401,7 +1401,7 @@ tu6_render_tile(struct tu_cmd_buffer *cmd, tu_cs_emit_pkt7(cs, CP_REG_TEST, 1); tu_cs_emit(cs, A6XX_CP_REG_TEST_0_REG(OVERFLOW_FLAG_REG) | A6XX_CP_REG_TEST_0_BIT(0) | - A6XX_CP_REG_TEST_0_UNK25); + A6XX_CP_REG_TEST_0_WAIT_FOR_ME); tu_cs_emit_pkt7(cs, CP_COND_REG_EXEC, 2); tu_cs_emit(cs, 0x10000000); |