diff options
author | Rob Clark <[email protected]> | 2016-11-08 10:50:03 -0500 |
---|---|---|
committer | Rob Clark <[email protected]> | 2016-11-30 12:35:49 -0500 |
commit | 946cf4eb6846767306a221eec7d0f82d20dfb6b5 (patch) | |
tree | bb35f2837a125ed66736c7ff96be954685891b09 /src/gallium/drivers/freedreno/freedreno_util.h | |
parent | fcba3046e1b90ecec6f5f4ad5397cd36e0c740ac (diff) |
freedreno/a5xx: initial support
Signed-off-by: Rob Clark <[email protected]>
Diffstat (limited to 'src/gallium/drivers/freedreno/freedreno_util.h')
-rw-r--r-- | src/gallium/drivers/freedreno/freedreno_util.h | 116 |
1 files changed, 111 insertions, 5 deletions
diff --git a/src/gallium/drivers/freedreno/freedreno_util.h b/src/gallium/drivers/freedreno/freedreno_util.h index 30097008e2a..a2d13589158 100644 --- a/src/gallium/drivers/freedreno/freedreno_util.h +++ b/src/gallium/drivers/freedreno/freedreno_util.h @@ -57,8 +57,9 @@ enum adreno_stencil_op fd_stencil_op(unsigned op); #define A2XX_MAX_RENDER_TARGETS 1 #define A3XX_MAX_RENDER_TARGETS 4 #define A4XX_MAX_RENDER_TARGETS 8 +#define A5XX_MAX_RENDER_TARGETS 8 -#define MAX_RENDER_TARGETS A4XX_MAX_RENDER_TARGETS +#define MAX_RENDER_TARGETS A5XX_MAX_RENDER_TARGETS #define FD_DBG_MSGS 0x0001 #define FD_DBG_DISASM 0x0002 @@ -176,6 +177,7 @@ fd_half_precision(struct pipe_framebuffer_state *pfb) #define LOG_DWORDS 0 static inline void emit_marker(struct fd_ringbuffer *ring, int scratch_idx); +static inline void emit_marker5(struct fd_ringbuffer *ring, int scratch_idx); static inline void OUT_RING(struct fd_ringbuffer *ring, uint32_t data) @@ -202,39 +204,45 @@ OUT_RINGP(struct fd_ringbuffer *ring, uint32_t data, })); } +/* + * NOTE: OUT_RELOC*() is 2 dwords (64b) on a5xx+ + */ + static inline void OUT_RELOC(struct fd_ringbuffer *ring, struct fd_bo *bo, - uint32_t offset, uint32_t or, int32_t shift) + uint32_t offset, uint64_t or, int32_t shift) { if (LOG_DWORDS) { DBG("ring[%p]: OUT_RELOC %04x: %p+%u << %d", ring, (uint32_t)(ring->cur - ring->last_start), bo, offset, shift); } debug_assert(offset < fd_bo_size(bo)); - fd_ringbuffer_reloc(ring, &(struct fd_reloc){ + fd_ringbuffer_reloc2(ring, &(struct fd_reloc){ .bo = bo, .flags = FD_RELOC_READ, .offset = offset, .or = or, .shift = shift, + .orhi = or >> 32, }); } static inline void OUT_RELOCW(struct fd_ringbuffer *ring, struct fd_bo *bo, - uint32_t offset, uint32_t or, int32_t shift) + uint32_t offset, uint64_t or, int32_t shift) { if (LOG_DWORDS) { DBG("ring[%p]: OUT_RELOCW %04x: %p+%u << %d", ring, (uint32_t)(ring->cur - ring->last_start), bo, offset, shift); } debug_assert(offset < fd_bo_size(bo)); - fd_ringbuffer_reloc(ring, &(struct fd_reloc){ + fd_ringbuffer_reloc2(ring, &(struct fd_reloc){ .bo = bo, .flags = FD_RELOC_READ | FD_RELOC_WRITE, .offset = offset, .or = or, .shift = shift, + .orhi = or >> 32, }); } @@ -244,9 +252,18 @@ static inline void BEGIN_RING(struct fd_ringbuffer *ring, uint32_t ndwords) fd_ringbuffer_grow(ring, ndwords); } +static inline uint32_t +__gpu_id(struct fd_ringbuffer *ring) +{ + uint64_t val; + fd_pipe_get_param(ring->pipe, FD_GPU_ID, &val); + return val; +} + static inline void OUT_PKT0(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt) { + debug_assert(__gpu_id(ring) < 500); BEGIN_RING(ring, cnt+1); OUT_RING(ring, CP_TYPE0_PKT | ((cnt-1) << 16) | (regindx & 0x7FFF)); } @@ -254,6 +271,7 @@ OUT_PKT0(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt) static inline void OUT_PKT2(struct fd_ringbuffer *ring) { + debug_assert(__gpu_id(ring) < 500); BEGIN_RING(ring, 1); OUT_RING(ring, CP_TYPE2_PKT); } @@ -261,10 +279,48 @@ OUT_PKT2(struct fd_ringbuffer *ring) static inline void OUT_PKT3(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt) { + debug_assert(__gpu_id(ring) < 500); BEGIN_RING(ring, cnt+1); OUT_RING(ring, CP_TYPE3_PKT | ((cnt-1) << 16) | ((opcode & 0xFF) << 8)); } +/* + * Starting with a5xx, pkt4/pkt7 are used instead of pkt0/pkt3 + */ + +static inline unsigned +_odd_parity_bit(unsigned val) +{ + /* See: http://graphics.stanford.edu/~seander/bithacks.html#ParityParallel + * note that we want odd parity so 0x6996 is inverted. + */ + val ^= val >> 16; + val ^= val >> 8; + val ^= val >> 4; + val &= 0xf; + return (~0x6996 >> val) & 1; +} + +static inline void +OUT_PKT4(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt) +{ + BEGIN_RING(ring, cnt+1); + OUT_RING(ring, CP_TYPE4_PKT | cnt | + (_odd_parity_bit(cnt) << 7) | + ((regindx & 0x3ffff) << 8) | + ((_odd_parity_bit(regindx) << 27))); +} + +static inline void +OUT_PKT7(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt) +{ + BEGIN_RING(ring, cnt+1); + OUT_RING(ring, CP_TYPE7_PKT | cnt | + (_odd_parity_bit(cnt) << 15) | + ((opcode & 0x7f) << 16) | + ((_odd_parity_bit(opcode) << 23))); +} + static inline void OUT_WFI(struct fd_ringbuffer *ring) { @@ -273,10 +329,18 @@ OUT_WFI(struct fd_ringbuffer *ring) } static inline void +OUT_WFI5(struct fd_ringbuffer *ring) +{ + OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0); +} + +static inline void __OUT_IB(struct fd_ringbuffer *ring, bool prefetch, struct fd_ringbuffer *target) { unsigned count = fd_ringbuffer_cmd_count(target); + debug_assert(__gpu_id(ring) < 500); + /* for debug after a lock up, write a unique counter value * to scratch6 for each IB, to make it easier to match up * register dumps to cmdstream. The combination of IB and @@ -297,7 +361,34 @@ __OUT_IB(struct fd_ringbuffer *ring, bool prefetch, struct fd_ringbuffer *target emit_marker(ring, 6); } +static inline void +__OUT_IB5(struct fd_ringbuffer *ring, struct fd_ringbuffer *target) +{ + unsigned count = fd_ringbuffer_cmd_count(target); + + /* for debug after a lock up, write a unique counter value + * to scratch6 for each IB, to make it easier to match up + * register dumps to cmdstream. The combination of IB and + * DRAW (scratch7) is enough to "triangulate" the particular + * draw that caused lockup. + */ + emit_marker5(ring, 6); + + for (unsigned i = 0; i < count; i++) { + uint32_t dwords; + OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3); + dwords = fd_ringbuffer_emit_reloc_ring_full(ring, target, i) / 4; + assert(dwords > 0); + OUT_RING(ring, dwords); + } + + emit_marker5(ring, 6); +} + /* CP_SCRATCH_REG4 is used to hold base address for query results: */ +// XXX annoyingly scratch regs move on a5xx.. and additionally different +// packet types.. so freedreno_query_hw is going to need a bit of +// rework.. #define HW_QUERY_BASE_REG REG_AXXX_CP_SCRATCH_REG4 static inline void @@ -312,6 +403,21 @@ emit_marker(struct fd_ringbuffer *ring, int scratch_idx) OUT_RING(ring, ++marker_cnt); } +static inline void +emit_marker5(struct fd_ringbuffer *ring, int scratch_idx) +{ + extern unsigned marker_cnt; +//XXX unsigned reg = REG_A5XX_CP_SCRATCH_REG(scratch_idx); + unsigned reg = 0x00000b78 + scratch_idx; + assert(reg != HW_QUERY_BASE_REG); + if (reg == HW_QUERY_BASE_REG) + return; + OUT_WFI5(ring); + OUT_PKT4(ring, reg, 1); + OUT_RING(ring, ++marker_cnt); + OUT_WFI5(ring); +} + /* helper to get numeric value from environment variable.. mostly * just leaving this here because it is helpful to brute-force figure * out unknown formats, etc, which blob driver does not support: |