summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/freedreno
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/freedreno')
-rw-r--r--src/gallium/drivers/freedreno/a2xx/a2xx.xml.h11
-rw-r--r--src/gallium/drivers/freedreno/a3xx/a3xx.xml.h277
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_emit.c3
-rw-r--r--src/gallium/drivers/freedreno/a4xx/a4xx.xml.h1042
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_context.h2
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_emit.c3
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_query.c138
-rw-r--r--src/gallium/drivers/freedreno/adreno_common.xml.h13
-rw-r--r--src/gallium/drivers/freedreno/adreno_pm4.xml.h42
-rw-r--r--src/gallium/drivers/freedreno/freedreno_context.h3
-rw-r--r--src/gallium/drivers/freedreno/freedreno_query.h12
-rw-r--r--src/gallium/drivers/freedreno/freedreno_query_hw.c33
-rw-r--r--src/gallium/drivers/freedreno/freedreno_query_hw.h6
-rw-r--r--src/gallium/drivers/freedreno/freedreno_screen.c17
-rw-r--r--src/gallium/drivers/freedreno/freedreno_screen.h1
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c1
16 files changed, 1570 insertions, 34 deletions
diff --git a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
index d23111352b7..71ee55054d3 100644
--- a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
+++ b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
@@ -9,16 +9,17 @@ git clone https://github.com/freedreno/envytools.git
The rules-ng-ng source files this header was generated from are:
- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31)
-- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2016-02-10 17:07:21)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2015-11-24 14:39:00)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 15149 bytes, from 2015-11-20 16:22:25)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 69600 bytes, from 2015-11-24 14:39:00)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 67220 bytes, from 2015-12-13 17:58:09)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2016-02-10 21:03:25)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 16166 bytes, from 2016-02-11 21:20:31)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83967 bytes, from 2016-02-10 17:07:21)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 109858 bytes, from 2016-02-10 17:07:21)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
Copyright (C) 2013-2015 by the following authors:
- Rob Clark <[email protected]> (robclark)
+- Ilia Mirkin <[email protected]> (imirkin)
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
diff --git a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
index c4f253b836c..c6286a1f290 100644
--- a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
+++ b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
@@ -9,16 +9,17 @@ git clone https://github.com/freedreno/envytools.git
The rules-ng-ng source files this header was generated from are:
- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31)
-- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2016-02-10 17:07:21)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2015-11-24 14:39:00)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 15149 bytes, from 2015-11-20 16:22:25)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 69600 bytes, from 2015-11-24 14:39:00)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 67220 bytes, from 2015-12-13 17:58:09)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2016-02-10 21:03:25)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 16166 bytes, from 2016-02-11 21:20:31)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83967 bytes, from 2016-02-10 17:07:21)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 109858 bytes, from 2016-02-10 17:07:21)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
-Copyright (C) 2013-2015 by the following authors:
+Copyright (C) 2013-2016 by the following authors:
- Rob Clark <[email protected]> (robclark)
+- Ilia Mirkin <[email protected]> (imirkin)
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
@@ -255,11 +256,273 @@ enum a3xx_color_fmt {
RB_R32G32B32A32_UINT = 59,
};
+enum a3xx_cp_perfcounter_select {
+ CP_ALWAYS_COUNT = 0,
+ CP_AHB_PFPTRANS_WAIT = 3,
+ CP_AHB_NRTTRANS_WAIT = 6,
+ CP_CSF_NRT_READ_WAIT = 8,
+ CP_CSF_I1_FIFO_FULL = 9,
+ CP_CSF_I2_FIFO_FULL = 10,
+ CP_CSF_ST_FIFO_FULL = 11,
+ CP_RESERVED_12 = 12,
+ CP_CSF_RING_ROQ_FULL = 13,
+ CP_CSF_I1_ROQ_FULL = 14,
+ CP_CSF_I2_ROQ_FULL = 15,
+ CP_CSF_ST_ROQ_FULL = 16,
+ CP_RESERVED_17 = 17,
+ CP_MIU_TAG_MEM_FULL = 18,
+ CP_MIU_NRT_WRITE_STALLED = 22,
+ CP_MIU_NRT_READ_STALLED = 23,
+ CP_ME_REGS_RB_DONE_FIFO_FULL = 26,
+ CP_ME_REGS_VS_EVENT_FIFO_FULL = 27,
+ CP_ME_REGS_PS_EVENT_FIFO_FULL = 28,
+ CP_ME_REGS_CF_EVENT_FIFO_FULL = 29,
+ CP_ME_MICRO_RB_STARVED = 30,
+ CP_AHB_RBBM_DWORD_SENT = 40,
+ CP_ME_BUSY_CLOCKS = 41,
+ CP_ME_WAIT_CONTEXT_AVAIL = 42,
+ CP_PFP_TYPE0_PACKET = 43,
+ CP_PFP_TYPE3_PACKET = 44,
+ CP_CSF_RB_WPTR_NEQ_RPTR = 45,
+ CP_CSF_I1_SIZE_NEQ_ZERO = 46,
+ CP_CSF_I2_SIZE_NEQ_ZERO = 47,
+ CP_CSF_RBI1I2_FETCHING = 48,
+};
+
+enum a3xx_gras_tse_perfcounter_select {
+ GRAS_TSEPERF_INPUT_PRIM = 0,
+ GRAS_TSEPERF_INPUT_NULL_PRIM = 1,
+ GRAS_TSEPERF_TRIVAL_REJ_PRIM = 2,
+ GRAS_TSEPERF_CLIPPED_PRIM = 3,
+ GRAS_TSEPERF_NEW_PRIM = 4,
+ GRAS_TSEPERF_ZERO_AREA_PRIM = 5,
+ GRAS_TSEPERF_FACENESS_CULLED_PRIM = 6,
+ GRAS_TSEPERF_ZERO_PIXEL_PRIM = 7,
+ GRAS_TSEPERF_OUTPUT_NULL_PRIM = 8,
+ GRAS_TSEPERF_OUTPUT_VISIBLE_PRIM = 9,
+ GRAS_TSEPERF_PRE_CLIP_PRIM = 10,
+ GRAS_TSEPERF_POST_CLIP_PRIM = 11,
+ GRAS_TSEPERF_WORKING_CYCLES = 12,
+ GRAS_TSEPERF_PC_STARVE = 13,
+ GRAS_TSERASPERF_STALL = 14,
+};
+
+enum a3xx_gras_ras_perfcounter_select {
+ GRAS_RASPERF_16X16_TILES = 0,
+ GRAS_RASPERF_8X8_TILES = 1,
+ GRAS_RASPERF_4X4_TILES = 2,
+ GRAS_RASPERF_WORKING_CYCLES = 3,
+ GRAS_RASPERF_STALL_CYCLES_BY_RB = 4,
+ GRAS_RASPERF_STALL_CYCLES_BY_VSC = 5,
+ GRAS_RASPERF_STARVE_CYCLES_BY_TSE = 6,
+};
+
+enum a3xx_hlsq_perfcounter_select {
+ HLSQ_PERF_SP_VS_CONSTANT = 0,
+ HLSQ_PERF_SP_VS_INSTRUCTIONS = 1,
+ HLSQ_PERF_SP_FS_CONSTANT = 2,
+ HLSQ_PERF_SP_FS_INSTRUCTIONS = 3,
+ HLSQ_PERF_TP_STATE = 4,
+ HLSQ_PERF_QUADS = 5,
+ HLSQ_PERF_PIXELS = 6,
+ HLSQ_PERF_VERTICES = 7,
+ HLSQ_PERF_FS8_THREADS = 8,
+ HLSQ_PERF_FS16_THREADS = 9,
+ HLSQ_PERF_FS32_THREADS = 10,
+ HLSQ_PERF_VS8_THREADS = 11,
+ HLSQ_PERF_VS16_THREADS = 12,
+ HLSQ_PERF_SP_VS_DATA_BYTES = 13,
+ HLSQ_PERF_SP_FS_DATA_BYTES = 14,
+ HLSQ_PERF_ACTIVE_CYCLES = 15,
+ HLSQ_PERF_STALL_CYCLES_SP_STATE = 16,
+ HLSQ_PERF_STALL_CYCLES_SP_VS = 17,
+ HLSQ_PERF_STALL_CYCLES_SP_FS = 18,
+ HLSQ_PERF_STALL_CYCLES_UCHE = 19,
+ HLSQ_PERF_RBBM_LOAD_CYCLES = 20,
+ HLSQ_PERF_DI_TO_VS_START_SP0 = 21,
+ HLSQ_PERF_DI_TO_FS_START_SP0 = 22,
+ HLSQ_PERF_VS_START_TO_DONE_SP0 = 23,
+ HLSQ_PERF_FS_START_TO_DONE_SP0 = 24,
+ HLSQ_PERF_SP_STATE_COPY_CYCLES_VS = 25,
+ HLSQ_PERF_SP_STATE_COPY_CYCLES_FS = 26,
+ HLSQ_PERF_UCHE_LATENCY_CYCLES = 27,
+ HLSQ_PERF_UCHE_LATENCY_COUNT = 28,
+};
+
+enum a3xx_pc_perfcounter_select {
+ PC_PCPERF_VISIBILITY_STREAMS = 0,
+ PC_PCPERF_TOTAL_INSTANCES = 1,
+ PC_PCPERF_PRIMITIVES_PC_VPC = 2,
+ PC_PCPERF_PRIMITIVES_KILLED_BY_VS = 3,
+ PC_PCPERF_PRIMITIVES_VISIBLE_BY_VS = 4,
+ PC_PCPERF_DRAWCALLS_KILLED_BY_VS = 5,
+ PC_PCPERF_DRAWCALLS_VISIBLE_BY_VS = 6,
+ PC_PCPERF_VERTICES_TO_VFD = 7,
+ PC_PCPERF_REUSED_VERTICES = 8,
+ PC_PCPERF_CYCLES_STALLED_BY_VFD = 9,
+ PC_PCPERF_CYCLES_STALLED_BY_TSE = 10,
+ PC_PCPERF_CYCLES_STALLED_BY_VBIF = 11,
+ PC_PCPERF_CYCLES_IS_WORKING = 12,
+};
+
+enum a3xx_rb_perfcounter_select {
+ RB_RBPERF_ACTIVE_CYCLES_ANY = 0,
+ RB_RBPERF_ACTIVE_CYCLES_ALL = 1,
+ RB_RBPERF_STARVE_CYCLES_BY_SP = 2,
+ RB_RBPERF_STARVE_CYCLES_BY_RAS = 3,
+ RB_RBPERF_STARVE_CYCLES_BY_MARB = 4,
+ RB_RBPERF_STALL_CYCLES_BY_MARB = 5,
+ RB_RBPERF_STALL_CYCLES_BY_HLSQ = 6,
+ RB_RBPERF_RB_MARB_DATA = 7,
+ RB_RBPERF_SP_RB_QUAD = 8,
+ RB_RBPERF_RAS_EARLY_Z_QUADS = 9,
+ RB_RBPERF_GMEM_CH0_READ = 10,
+ RB_RBPERF_GMEM_CH1_READ = 11,
+ RB_RBPERF_GMEM_CH0_WRITE = 12,
+ RB_RBPERF_GMEM_CH1_WRITE = 13,
+ RB_RBPERF_CP_CONTEXT_DONE = 14,
+ RB_RBPERF_CP_CACHE_FLUSH = 15,
+ RB_RBPERF_CP_ZPASS_DONE = 16,
+};
+
+enum a3xx_rbbm_perfcounter_select {
+ RBBM_ALAWYS_ON = 0,
+ RBBM_VBIF_BUSY = 1,
+ RBBM_TSE_BUSY = 2,
+ RBBM_RAS_BUSY = 3,
+ RBBM_PC_DCALL_BUSY = 4,
+ RBBM_PC_VSD_BUSY = 5,
+ RBBM_VFD_BUSY = 6,
+ RBBM_VPC_BUSY = 7,
+ RBBM_UCHE_BUSY = 8,
+ RBBM_VSC_BUSY = 9,
+ RBBM_HLSQ_BUSY = 10,
+ RBBM_ANY_RB_BUSY = 11,
+ RBBM_ANY_TEX_BUSY = 12,
+ RBBM_ANY_USP_BUSY = 13,
+ RBBM_ANY_MARB_BUSY = 14,
+ RBBM_ANY_ARB_BUSY = 15,
+ RBBM_AHB_STATUS_BUSY = 16,
+ RBBM_AHB_STATUS_STALLED = 17,
+ RBBM_AHB_STATUS_TXFR = 18,
+ RBBM_AHB_STATUS_TXFR_SPLIT = 19,
+ RBBM_AHB_STATUS_TXFR_ERROR = 20,
+ RBBM_AHB_STATUS_LONG_STALL = 21,
+ RBBM_RBBM_STATUS_MASKED = 22,
+};
+
enum a3xx_sp_perfcounter_select {
+ SP_LM_LOAD_INSTRUCTIONS = 0,
+ SP_LM_STORE_INSTRUCTIONS = 1,
+ SP_LM_ATOMICS = 2,
+ SP_UCHE_LOAD_INSTRUCTIONS = 3,
+ SP_UCHE_STORE_INSTRUCTIONS = 4,
+ SP_UCHE_ATOMICS = 5,
+ SP_VS_TEX_INSTRUCTIONS = 6,
+ SP_VS_CFLOW_INSTRUCTIONS = 7,
+ SP_VS_EFU_INSTRUCTIONS = 8,
+ SP_VS_FULL_ALU_INSTRUCTIONS = 9,
+ SP_VS_HALF_ALU_INSTRUCTIONS = 10,
+ SP_FS_TEX_INSTRUCTIONS = 11,
SP_FS_CFLOW_INSTRUCTIONS = 12,
+ SP_FS_EFU_INSTRUCTIONS = 13,
SP_FS_FULL_ALU_INSTRUCTIONS = 14,
- SP0_ICL1_MISSES = 26,
+ SP_FS_HALF_ALU_INSTRUCTIONS = 15,
+ SP_FS_BARY_INSTRUCTIONS = 16,
+ SP_VS_INSTRUCTIONS = 17,
+ SP_FS_INSTRUCTIONS = 18,
+ SP_ADDR_LOCK_COUNT = 19,
+ SP_UCHE_READ_TRANS = 20,
+ SP_UCHE_WRITE_TRANS = 21,
+ SP_EXPORT_VPC_TRANS = 22,
+ SP_EXPORT_RB_TRANS = 23,
+ SP_PIXELS_KILLED = 24,
+ SP_ICL1_REQUESTS = 25,
+ SP_ICL1_MISSES = 26,
+ SP_ICL0_REQUESTS = 27,
+ SP_ICL0_MISSES = 28,
SP_ALU_ACTIVE_CYCLES = 29,
+ SP_EFU_ACTIVE_CYCLES = 30,
+ SP_STALL_CYCLES_BY_VPC = 31,
+ SP_STALL_CYCLES_BY_TP = 32,
+ SP_STALL_CYCLES_BY_UCHE = 33,
+ SP_STALL_CYCLES_BY_RB = 34,
+ SP_ACTIVE_CYCLES_ANY = 35,
+ SP_ACTIVE_CYCLES_ALL = 36,
+};
+
+enum a3xx_tp_perfcounter_select {
+ TPL1_TPPERF_L1_REQUESTS = 0,
+ TPL1_TPPERF_TP0_L1_REQUESTS = 1,
+ TPL1_TPPERF_TP0_L1_MISSES = 2,
+ TPL1_TPPERF_TP1_L1_REQUESTS = 3,
+ TPL1_TPPERF_TP1_L1_MISSES = 4,
+ TPL1_TPPERF_TP2_L1_REQUESTS = 5,
+ TPL1_TPPERF_TP2_L1_MISSES = 6,
+ TPL1_TPPERF_TP3_L1_REQUESTS = 7,
+ TPL1_TPPERF_TP3_L1_MISSES = 8,
+ TPL1_TPPERF_OUTPUT_TEXELS_POINT = 9,
+ TPL1_TPPERF_OUTPUT_TEXELS_BILINEAR = 10,
+ TPL1_TPPERF_OUTPUT_TEXELS_MIP = 11,
+ TPL1_TPPERF_OUTPUT_TEXELS_ANISO = 12,
+ TPL1_TPPERF_BILINEAR_OPS = 13,
+ TPL1_TPPERF_QUADSQUADS_OFFSET = 14,
+ TPL1_TPPERF_QUADQUADS_SHADOW = 15,
+ TPL1_TPPERF_QUADS_ARRAY = 16,
+ TPL1_TPPERF_QUADS_PROJECTION = 17,
+ TPL1_TPPERF_QUADS_GRADIENT = 18,
+ TPL1_TPPERF_QUADS_1D2D = 19,
+ TPL1_TPPERF_QUADS_3DCUBE = 20,
+ TPL1_TPPERF_ZERO_LOD = 21,
+ TPL1_TPPERF_OUTPUT_TEXELS = 22,
+ TPL1_TPPERF_ACTIVE_CYCLES_ANY = 23,
+ TPL1_TPPERF_ACTIVE_CYCLES_ALL = 24,
+ TPL1_TPPERF_STALL_CYCLES_BY_ARB = 25,
+ TPL1_TPPERF_LATENCY = 26,
+ TPL1_TPPERF_LATENCY_TRANS = 27,
+};
+
+enum a3xx_vfd_perfcounter_select {
+ VFD_PERF_UCHE_BYTE_FETCHED = 0,
+ VFD_PERF_UCHE_TRANS = 1,
+ VFD_PERF_VPC_BYPASS_COMPONENTS = 2,
+ VFD_PERF_FETCH_INSTRUCTIONS = 3,
+ VFD_PERF_DECODE_INSTRUCTIONS = 4,
+ VFD_PERF_ACTIVE_CYCLES = 5,
+ VFD_PERF_STALL_CYCLES_UCHE = 6,
+ VFD_PERF_STALL_CYCLES_HLSQ = 7,
+ VFD_PERF_STALL_CYCLES_VPC_BYPASS = 8,
+ VFD_PERF_STALL_CYCLES_VPC_ALLOC = 9,
+};
+
+enum a3xx_vpc_perfcounter_select {
+ VPC_PERF_SP_LM_PRIMITIVES = 0,
+ VPC_PERF_COMPONENTS_FROM_SP = 1,
+ VPC_PERF_SP_LM_COMPONENTS = 2,
+ VPC_PERF_ACTIVE_CYCLES = 3,
+ VPC_PERF_STALL_CYCLES_LM = 4,
+ VPC_PERF_STALL_CYCLES_RAS = 5,
+};
+
+enum a3xx_uche_perfcounter_select {
+ UCHE_UCHEPERF_VBIF_READ_BEATS_TP = 0,
+ UCHE_UCHEPERF_VBIF_READ_BEATS_VFD = 1,
+ UCHE_UCHEPERF_VBIF_READ_BEATS_HLSQ = 2,
+ UCHE_UCHEPERF_VBIF_READ_BEATS_MARB = 3,
+ UCHE_UCHEPERF_VBIF_READ_BEATS_SP = 4,
+ UCHE_UCHEPERF_READ_REQUESTS_TP = 8,
+ UCHE_UCHEPERF_READ_REQUESTS_VFD = 9,
+ UCHE_UCHEPERF_READ_REQUESTS_HLSQ = 10,
+ UCHE_UCHEPERF_READ_REQUESTS_MARB = 11,
+ UCHE_UCHEPERF_READ_REQUESTS_SP = 12,
+ UCHE_UCHEPERF_WRITE_REQUESTS_MARB = 13,
+ UCHE_UCHEPERF_WRITE_REQUESTS_SP = 14,
+ UCHE_UCHEPERF_TAG_CHECK_FAILS = 15,
+ UCHE_UCHEPERF_EVICTS = 16,
+ UCHE_UCHEPERF_FLUSHES = 17,
+ UCHE_UCHEPERF_VBIF_LATENCY_CYCLES = 18,
+ UCHE_UCHEPERF_VBIF_LATENCY_SAMPLES = 19,
+ UCHE_UCHEPERF_ACTIVE_CYCLES = 20,
};
enum a3xx_rb_blend_opcode {
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
index 811f58bbba2..8c37992e17d 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
@@ -33,6 +33,7 @@
#include "util/u_format.h"
#include "freedreno_resource.h"
+#include "freedreno_query_hw.h"
#include "fd3_emit.h"
#include "fd3_blend.h"
@@ -888,6 +889,8 @@ fd3_emit_restore(struct fd_context *ctx)
fd_wfi(ctx, ring);
+ fd_hw_query_enable(ctx, ring);
+
ctx->needs_rb_fbd = true;
}
diff --git a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
index e8df429441e..d6fd1bb583e 100644
--- a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
+++ b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
@@ -9,16 +9,17 @@ git clone https://github.com/freedreno/envytools.git
The rules-ng-ng source files this header was generated from are:
- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31)
-- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2016-02-10 17:07:21)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2015-11-24 14:39:00)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 15149 bytes, from 2015-11-20 16:22:25)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 69600 bytes, from 2015-11-24 14:39:00)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 67220 bytes, from 2015-12-13 17:58:09)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2016-02-10 21:03:25)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 16166 bytes, from 2016-02-11 21:20:31)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83967 bytes, from 2016-02-10 17:07:21)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 109858 bytes, from 2016-02-10 17:07:21)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
-Copyright (C) 2013-2015 by the following authors:
+Copyright (C) 2013-2016 by the following authors:
- Rob Clark <[email protected]> (robclark)
+- Ilia Mirkin <[email protected]> (imirkin)
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
@@ -271,6 +272,545 @@ enum a4xx_tess_spacing {
EVEN_SPACING = 3,
};
+enum a4xx_ccu_perfcounter_select {
+ CCU_BUSY_CYCLES = 0,
+ CCU_RB_DEPTH_RETURN_STALL = 2,
+ CCU_RB_COLOR_RETURN_STALL = 3,
+ CCU_DEPTH_BLOCKS = 6,
+ CCU_COLOR_BLOCKS = 7,
+ CCU_DEPTH_BLOCK_HIT = 8,
+ CCU_COLOR_BLOCK_HIT = 9,
+ CCU_DEPTH_FLAG1_COUNT = 10,
+ CCU_DEPTH_FLAG2_COUNT = 11,
+ CCU_DEPTH_FLAG3_COUNT = 12,
+ CCU_DEPTH_FLAG4_COUNT = 13,
+ CCU_COLOR_FLAG1_COUNT = 14,
+ CCU_COLOR_FLAG2_COUNT = 15,
+ CCU_COLOR_FLAG3_COUNT = 16,
+ CCU_COLOR_FLAG4_COUNT = 17,
+ CCU_PARTIAL_BLOCK_READ = 18,
+};
+
+enum a4xx_cp_perfcounter_select {
+ CP_ALWAYS_COUNT = 0,
+ CP_BUSY = 1,
+ CP_PFP_IDLE = 2,
+ CP_PFP_BUSY_WORKING = 3,
+ CP_PFP_STALL_CYCLES_ANY = 4,
+ CP_PFP_STARVE_CYCLES_ANY = 5,
+ CP_PFP_STARVED_PER_LOAD_ADDR = 6,
+ CP_PFP_STALLED_PER_STORE_ADDR = 7,
+ CP_PFP_PC_PROFILE = 8,
+ CP_PFP_MATCH_PM4_PKT_PROFILE = 9,
+ CP_PFP_COND_INDIRECT_DISCARDED = 10,
+ CP_LONG_RESUMPTIONS = 11,
+ CP_RESUME_CYCLES = 12,
+ CP_RESUME_TO_BOUNDARY_CYCLES = 13,
+ CP_LONG_PREEMPTIONS = 14,
+ CP_PREEMPT_CYCLES = 15,
+ CP_PREEMPT_TO_BOUNDARY_CYCLES = 16,
+ CP_ME_FIFO_EMPTY_PFP_IDLE = 17,
+ CP_ME_FIFO_EMPTY_PFP_BUSY = 18,
+ CP_ME_FIFO_NOT_EMPTY_NOT_FULL = 19,
+ CP_ME_FIFO_FULL_ME_BUSY = 20,
+ CP_ME_FIFO_FULL_ME_NON_WORKING = 21,
+ CP_ME_WAITING_FOR_PACKETS = 22,
+ CP_ME_BUSY_WORKING = 23,
+ CP_ME_STARVE_CYCLES_ANY = 24,
+ CP_ME_STARVE_CYCLES_PER_PROFILE = 25,
+ CP_ME_STALL_CYCLES_PER_PROFILE = 26,
+ CP_ME_PC_PROFILE = 27,
+ CP_RCIU_FIFO_EMPTY = 28,
+ CP_RCIU_FIFO_NOT_EMPTY_NOT_FULL = 29,
+ CP_RCIU_FIFO_FULL = 30,
+ CP_RCIU_FIFO_FULL_NO_CONTEXT = 31,
+ CP_RCIU_FIFO_FULL_AHB_MASTER = 32,
+ CP_RCIU_FIFO_FULL_OTHER = 33,
+ CP_AHB_IDLE = 34,
+ CP_AHB_STALL_ON_GRANT_NO_SPLIT = 35,
+ CP_AHB_STALL_ON_GRANT_SPLIT = 36,
+ CP_AHB_STALL_ON_GRANT_SPLIT_PROFILE = 37,
+ CP_AHB_BUSY_WORKING = 38,
+ CP_AHB_BUSY_STALL_ON_HRDY = 39,
+ CP_AHB_BUSY_STALL_ON_HRDY_PROFILE = 40,
+};
+
+enum a4xx_gras_ras_perfcounter_select {
+ RAS_SUPER_TILES = 0,
+ RAS_8X8_TILES = 1,
+ RAS_4X4_TILES = 2,
+ RAS_BUSY_CYCLES = 3,
+ RAS_STALL_CYCLES_BY_RB = 4,
+ RAS_STALL_CYCLES_BY_VSC = 5,
+ RAS_STARVE_CYCLES_BY_TSE = 6,
+ RAS_SUPERTILE_CYCLES = 7,
+ RAS_TILE_CYCLES = 8,
+ RAS_FULLY_COVERED_SUPER_TILES = 9,
+ RAS_FULLY_COVERED_8X8_TILES = 10,
+ RAS_4X4_PRIM = 11,
+ RAS_8X4_4X8_PRIM = 12,
+ RAS_8X8_PRIM = 13,
+};
+
+enum a4xx_gras_tse_perfcounter_select {
+ TSE_INPUT_PRIM = 0,
+ TSE_INPUT_NULL_PRIM = 1,
+ TSE_TRIVAL_REJ_PRIM = 2,
+ TSE_CLIPPED_PRIM = 3,
+ TSE_NEW_PRIM = 4,
+ TSE_ZERO_AREA_PRIM = 5,
+ TSE_FACENESS_CULLED_PRIM = 6,
+ TSE_ZERO_PIXEL_PRIM = 7,
+ TSE_OUTPUT_NULL_PRIM = 8,
+ TSE_OUTPUT_VISIBLE_PRIM = 9,
+ TSE_PRE_CLIP_PRIM = 10,
+ TSE_POST_CLIP_PRIM = 11,
+ TSE_BUSY_CYCLES = 12,
+ TSE_PC_STARVE = 13,
+ TSE_RAS_STALL = 14,
+ TSE_STALL_BARYPLANE_FIFO_FULL = 15,
+ TSE_STALL_ZPLANE_FIFO_FULL = 16,
+};
+
+enum a4xx_hlsq_perfcounter_select {
+ HLSQ_SP_VS_STAGE_CONSTANT = 0,
+ HLSQ_SP_VS_STAGE_INSTRUCTIONS = 1,
+ HLSQ_SP_FS_STAGE_CONSTANT = 2,
+ HLSQ_SP_FS_STAGE_INSTRUCTIONS = 3,
+ HLSQ_TP_STATE = 4,
+ HLSQ_QUADS = 5,
+ HLSQ_PIXELS = 6,
+ HLSQ_VERTICES = 7,
+ HLSQ_SP_VS_STAGE_DATA_BYTES = 13,
+ HLSQ_SP_FS_STAGE_DATA_BYTES = 14,
+ HLSQ_BUSY_CYCLES = 15,
+ HLSQ_STALL_CYCLES_SP_STATE = 16,
+ HLSQ_STALL_CYCLES_SP_VS_STAGE = 17,
+ HLSQ_STALL_CYCLES_SP_FS_STAGE = 18,
+ HLSQ_STALL_CYCLES_UCHE = 19,
+ HLSQ_RBBM_LOAD_CYCLES = 20,
+ HLSQ_DI_TO_VS_START_SP = 21,
+ HLSQ_DI_TO_FS_START_SP = 22,
+ HLSQ_VS_STAGE_START_TO_DONE_SP = 23,
+ HLSQ_FS_STAGE_START_TO_DONE_SP = 24,
+ HLSQ_SP_STATE_COPY_CYCLES_VS_STAGE = 25,
+ HLSQ_SP_STATE_COPY_CYCLES_FS_STAGE = 26,
+ HLSQ_UCHE_LATENCY_CYCLES = 27,
+ HLSQ_UCHE_LATENCY_COUNT = 28,
+ HLSQ_STARVE_CYCLES_VFD = 29,
+};
+
+enum a4xx_pc_perfcounter_select {
+ PC_VIS_STREAMS_LOADED = 0,
+ PC_VPC_PRIMITIVES = 2,
+ PC_DEAD_PRIM = 3,
+ PC_LIVE_PRIM = 4,
+ PC_DEAD_DRAWCALLS = 5,
+ PC_LIVE_DRAWCALLS = 6,
+ PC_VERTEX_MISSES = 7,
+ PC_STALL_CYCLES_VFD = 9,
+ PC_STALL_CYCLES_TSE = 10,
+ PC_STALL_CYCLES_UCHE = 11,
+ PC_WORKING_CYCLES = 12,
+ PC_IA_VERTICES = 13,
+ PC_GS_PRIMITIVES = 14,
+ PC_HS_INVOCATIONS = 15,
+ PC_DS_INVOCATIONS = 16,
+ PC_DS_PRIMITIVES = 17,
+ PC_STARVE_CYCLES_FOR_INDEX = 20,
+ PC_STARVE_CYCLES_FOR_TESS_FACTOR = 21,
+ PC_STARVE_CYCLES_FOR_VIZ_STREAM = 22,
+ PC_STALL_CYCLES_TESS = 23,
+ PC_STARVE_CYCLES_FOR_POSITION = 24,
+ PC_MODE0_DRAWCALL = 25,
+ PC_MODE1_DRAWCALL = 26,
+ PC_MODE2_DRAWCALL = 27,
+ PC_MODE3_DRAWCALL = 28,
+ PC_MODE4_DRAWCALL = 29,
+ PC_PREDICATED_DEAD_DRAWCALL = 30,
+ PC_STALL_CYCLES_BY_TSE_ONLY = 31,
+ PC_STALL_CYCLES_BY_VPC_ONLY = 32,
+ PC_VPC_POS_DATA_TRANSACTION = 33,
+ PC_BUSY_CYCLES = 34,
+ PC_STARVE_CYCLES_DI = 35,
+ PC_STALL_CYCLES_VPC = 36,
+ TESS_WORKING_CYCLES = 37,
+ TESS_NUM_CYCLES_SETUP_WORKING = 38,
+ TESS_NUM_CYCLES_PTGEN_WORKING = 39,
+ TESS_NUM_CYCLES_CONNGEN_WORKING = 40,
+ TESS_BUSY_CYCLES = 41,
+ TESS_STARVE_CYCLES_PC = 42,
+ TESS_STALL_CYCLES_PC = 43,
+};
+
+enum a4xx_pwr_perfcounter_select {
+ PWR_CORE_CLOCK_CYCLES = 0,
+ PWR_BUSY_CLOCK_CYCLES = 1,
+};
+
+enum a4xx_rb_perfcounter_select {
+ RB_BUSY_CYCLES = 0,
+ RB_BUSY_CYCLES_BINNING = 1,
+ RB_BUSY_CYCLES_RENDERING = 2,
+ RB_BUSY_CYCLES_RESOLVE = 3,
+ RB_STARVE_CYCLES_BY_SP = 4,
+ RB_STARVE_CYCLES_BY_RAS = 5,
+ RB_STARVE_CYCLES_BY_MARB = 6,
+ RB_STALL_CYCLES_BY_MARB = 7,
+ RB_STALL_CYCLES_BY_HLSQ = 8,
+ RB_RB_RB_MARB_DATA = 9,
+ RB_SP_RB_QUAD = 10,
+ RB_RAS_RB_Z_QUADS = 11,
+ RB_GMEM_CH0_READ = 12,
+ RB_GMEM_CH1_READ = 13,
+ RB_GMEM_CH0_WRITE = 14,
+ RB_GMEM_CH1_WRITE = 15,
+ RB_CP_CONTEXT_DONE = 16,
+ RB_CP_CACHE_FLUSH = 17,
+ RB_CP_ZPASS_DONE = 18,
+ RB_STALL_FIFO0_FULL = 19,
+ RB_STALL_FIFO1_FULL = 20,
+ RB_STALL_FIFO2_FULL = 21,
+ RB_STALL_FIFO3_FULL = 22,
+ RB_RB_HLSQ_TRANSACTIONS = 23,
+ RB_Z_READ = 24,
+ RB_Z_WRITE = 25,
+ RB_C_READ = 26,
+ RB_C_WRITE = 27,
+ RB_C_READ_LATENCY = 28,
+ RB_Z_READ_LATENCY = 29,
+ RB_STALL_BY_UCHE = 30,
+ RB_MARB_UCHE_TRANSACTIONS = 31,
+ RB_CACHE_STALL_MISS = 32,
+ RB_CACHE_STALL_FIFO_FULL = 33,
+ RB_8BIT_BLENDER_UNITS_ACTIVE = 34,
+ RB_16BIT_BLENDER_UNITS_ACTIVE = 35,
+ RB_SAMPLER_UNITS_ACTIVE = 36,
+ RB_TOTAL_PASS = 38,
+ RB_Z_PASS = 39,
+ RB_Z_FAIL = 40,
+ RB_S_FAIL = 41,
+ RB_POWER0 = 42,
+ RB_POWER1 = 43,
+ RB_POWER2 = 44,
+ RB_POWER3 = 45,
+ RB_POWER4 = 46,
+ RB_POWER5 = 47,
+ RB_POWER6 = 48,
+ RB_POWER7 = 49,
+};
+
+enum a4xx_rbbm_perfcounter_select {
+ RBBM_ALWAYS_ON = 0,
+ RBBM_VBIF_BUSY = 1,
+ RBBM_TSE_BUSY = 2,
+ RBBM_RAS_BUSY = 3,
+ RBBM_PC_DCALL_BUSY = 4,
+ RBBM_PC_VSD_BUSY = 5,
+ RBBM_VFD_BUSY = 6,
+ RBBM_VPC_BUSY = 7,
+ RBBM_UCHE_BUSY = 8,
+ RBBM_VSC_BUSY = 9,
+ RBBM_HLSQ_BUSY = 10,
+ RBBM_ANY_RB_BUSY = 11,
+ RBBM_ANY_TPL1_BUSY = 12,
+ RBBM_ANY_SP_BUSY = 13,
+ RBBM_ANY_MARB_BUSY = 14,
+ RBBM_ANY_ARB_BUSY = 15,
+ RBBM_AHB_STATUS_BUSY = 16,
+ RBBM_AHB_STATUS_STALLED = 17,
+ RBBM_AHB_STATUS_TXFR = 18,
+ RBBM_AHB_STATUS_TXFR_SPLIT = 19,
+ RBBM_AHB_STATUS_TXFR_ERROR = 20,
+ RBBM_AHB_STATUS_LONG_STALL = 21,
+ RBBM_STATUS_MASKED = 22,
+ RBBM_CP_BUSY_GFX_CORE_IDLE = 23,
+ RBBM_TESS_BUSY = 24,
+ RBBM_COM_BUSY = 25,
+ RBBM_DCOM_BUSY = 32,
+ RBBM_ANY_CCU_BUSY = 33,
+ RBBM_DPM_BUSY = 34,
+};
+
+enum a4xx_sp_perfcounter_select {
+ SP_LM_LOAD_INSTRUCTIONS = 0,
+ SP_LM_STORE_INSTRUCTIONS = 1,
+ SP_LM_ATOMICS = 2,
+ SP_GM_LOAD_INSTRUCTIONS = 3,
+ SP_GM_STORE_INSTRUCTIONS = 4,
+ SP_GM_ATOMICS = 5,
+ SP_VS_STAGE_TEX_INSTRUCTIONS = 6,
+ SP_VS_STAGE_CFLOW_INSTRUCTIONS = 7,
+ SP_VS_STAGE_EFU_INSTRUCTIONS = 8,
+ SP_VS_STAGE_FULL_ALU_INSTRUCTIONS = 9,
+ SP_VS_STAGE_HALF_ALU_INSTRUCTIONS = 10,
+ SP_FS_STAGE_TEX_INSTRUCTIONS = 11,
+ SP_FS_STAGE_CFLOW_INSTRUCTIONS = 12,
+ SP_FS_STAGE_EFU_INSTRUCTIONS = 13,
+ SP_FS_STAGE_FULL_ALU_INSTRUCTIONS = 14,
+ SP_FS_STAGE_HALF_ALU_INSTRUCTIONS = 15,
+ SP_VS_INSTRUCTIONS = 17,
+ SP_FS_INSTRUCTIONS = 18,
+ SP_ADDR_LOCK_COUNT = 19,
+ SP_UCHE_READ_TRANS = 20,
+ SP_UCHE_WRITE_TRANS = 21,
+ SP_EXPORT_VPC_TRANS = 22,
+ SP_EXPORT_RB_TRANS = 23,
+ SP_PIXELS_KILLED = 24,
+ SP_ICL1_REQUESTS = 25,
+ SP_ICL1_MISSES = 26,
+ SP_ICL0_REQUESTS = 27,
+ SP_ICL0_MISSES = 28,
+ SP_ALU_WORKING_CYCLES = 29,
+ SP_EFU_WORKING_CYCLES = 30,
+ SP_STALL_CYCLES_BY_VPC = 31,
+ SP_STALL_CYCLES_BY_TP = 32,
+ SP_STALL_CYCLES_BY_UCHE = 33,
+ SP_STALL_CYCLES_BY_RB = 34,
+ SP_BUSY_CYCLES = 35,
+ SP_HS_INSTRUCTIONS = 36,
+ SP_DS_INSTRUCTIONS = 37,
+ SP_GS_INSTRUCTIONS = 38,
+ SP_CS_INSTRUCTIONS = 39,
+ SP_SCHEDULER_NON_WORKING = 40,
+ SP_WAVE_CONTEXTS = 41,
+ SP_WAVE_CONTEXT_CYCLES = 42,
+ SP_POWER0 = 43,
+ SP_POWER1 = 44,
+ SP_POWER2 = 45,
+ SP_POWER3 = 46,
+ SP_POWER4 = 47,
+ SP_POWER5 = 48,
+ SP_POWER6 = 49,
+ SP_POWER7 = 50,
+ SP_POWER8 = 51,
+ SP_POWER9 = 52,
+ SP_POWER10 = 53,
+ SP_POWER11 = 54,
+ SP_POWER12 = 55,
+ SP_POWER13 = 56,
+ SP_POWER14 = 57,
+ SP_POWER15 = 58,
+};
+
+enum a4xx_tp_perfcounter_select {
+ TP_L1_REQUESTS = 0,
+ TP_L1_MISSES = 1,
+ TP_QUADS_OFFSET = 8,
+ TP_QUAD_SHADOW = 9,
+ TP_QUADS_ARRAY = 10,
+ TP_QUADS_GRADIENT = 11,
+ TP_QUADS_1D2D = 12,
+ TP_QUADS_3DCUBE = 13,
+ TP_BUSY_CYCLES = 16,
+ TP_STALL_CYCLES_BY_ARB = 17,
+ TP_STATE_CACHE_REQUESTS = 20,
+ TP_STATE_CACHE_MISSES = 21,
+ TP_POWER0 = 22,
+ TP_POWER1 = 23,
+ TP_POWER2 = 24,
+ TP_POWER3 = 25,
+ TP_POWER4 = 26,
+ TP_POWER5 = 27,
+ TP_POWER6 = 28,
+ TP_POWER7 = 29,
+};
+
+enum a4xx_uche_perfcounter_select {
+ UCHE_VBIF_READ_BEATS_TP = 0,
+ UCHE_VBIF_READ_BEATS_VFD = 1,
+ UCHE_VBIF_READ_BEATS_HLSQ = 2,
+ UCHE_VBIF_READ_BEATS_MARB = 3,
+ UCHE_VBIF_READ_BEATS_SP = 4,
+ UCHE_READ_REQUESTS_TP = 5,
+ UCHE_READ_REQUESTS_VFD = 6,
+ UCHE_READ_REQUESTS_HLSQ = 7,
+ UCHE_READ_REQUESTS_MARB = 8,
+ UCHE_READ_REQUESTS_SP = 9,
+ UCHE_WRITE_REQUESTS_MARB = 10,
+ UCHE_WRITE_REQUESTS_SP = 11,
+ UCHE_TAG_CHECK_FAILS = 12,
+ UCHE_EVICTS = 13,
+ UCHE_FLUSHES = 14,
+ UCHE_VBIF_LATENCY_CYCLES = 15,
+ UCHE_VBIF_LATENCY_SAMPLES = 16,
+ UCHE_BUSY_CYCLES = 17,
+ UCHE_VBIF_READ_BEATS_PC = 18,
+ UCHE_READ_REQUESTS_PC = 19,
+ UCHE_WRITE_REQUESTS_VPC = 20,
+ UCHE_STALL_BY_VBIF = 21,
+ UCHE_WRITE_REQUESTS_VSC = 22,
+ UCHE_POWER0 = 23,
+ UCHE_POWER1 = 24,
+ UCHE_POWER2 = 25,
+ UCHE_POWER3 = 26,
+ UCHE_POWER4 = 27,
+ UCHE_POWER5 = 28,
+ UCHE_POWER6 = 29,
+ UCHE_POWER7 = 30,
+};
+
+enum a4xx_vbif_perfcounter_select {
+ AXI_READ_REQUESTS_ID_0 = 0,
+ AXI_READ_REQUESTS_ID_1 = 1,
+ AXI_READ_REQUESTS_ID_2 = 2,
+ AXI_READ_REQUESTS_ID_3 = 3,
+ AXI_READ_REQUESTS_ID_4 = 4,
+ AXI_READ_REQUESTS_ID_5 = 5,
+ AXI_READ_REQUESTS_ID_6 = 6,
+ AXI_READ_REQUESTS_ID_7 = 7,
+ AXI_READ_REQUESTS_ID_8 = 8,
+ AXI_READ_REQUESTS_ID_9 = 9,
+ AXI_READ_REQUESTS_ID_10 = 10,
+ AXI_READ_REQUESTS_ID_11 = 11,
+ AXI_READ_REQUESTS_ID_12 = 12,
+ AXI_READ_REQUESTS_ID_13 = 13,
+ AXI_READ_REQUESTS_ID_14 = 14,
+ AXI_READ_REQUESTS_ID_15 = 15,
+ AXI0_READ_REQUESTS_TOTAL = 16,
+ AXI1_READ_REQUESTS_TOTAL = 17,
+ AXI2_READ_REQUESTS_TOTAL = 18,
+ AXI3_READ_REQUESTS_TOTAL = 19,
+ AXI_READ_REQUESTS_TOTAL = 20,
+ AXI_WRITE_REQUESTS_ID_0 = 21,
+ AXI_WRITE_REQUESTS_ID_1 = 22,
+ AXI_WRITE_REQUESTS_ID_2 = 23,
+ AXI_WRITE_REQUESTS_ID_3 = 24,
+ AXI_WRITE_REQUESTS_ID_4 = 25,
+ AXI_WRITE_REQUESTS_ID_5 = 26,
+ AXI_WRITE_REQUESTS_ID_6 = 27,
+ AXI_WRITE_REQUESTS_ID_7 = 28,
+ AXI_WRITE_REQUESTS_ID_8 = 29,
+ AXI_WRITE_REQUESTS_ID_9 = 30,
+ AXI_WRITE_REQUESTS_ID_10 = 31,
+ AXI_WRITE_REQUESTS_ID_11 = 32,
+ AXI_WRITE_REQUESTS_ID_12 = 33,
+ AXI_WRITE_REQUESTS_ID_13 = 34,
+ AXI_WRITE_REQUESTS_ID_14 = 35,
+ AXI_WRITE_REQUESTS_ID_15 = 36,
+ AXI0_WRITE_REQUESTS_TOTAL = 37,
+ AXI1_WRITE_REQUESTS_TOTAL = 38,
+ AXI2_WRITE_REQUESTS_TOTAL = 39,
+ AXI3_WRITE_REQUESTS_TOTAL = 40,
+ AXI_WRITE_REQUESTS_TOTAL = 41,
+ AXI_TOTAL_REQUESTS = 42,
+ AXI_READ_DATA_BEATS_ID_0 = 43,
+ AXI_READ_DATA_BEATS_ID_1 = 44,
+ AXI_READ_DATA_BEATS_ID_2 = 45,
+ AXI_READ_DATA_BEATS_ID_3 = 46,
+ AXI_READ_DATA_BEATS_ID_4 = 47,
+ AXI_READ_DATA_BEATS_ID_5 = 48,
+ AXI_READ_DATA_BEATS_ID_6 = 49,
+ AXI_READ_DATA_BEATS_ID_7 = 50,
+ AXI_READ_DATA_BEATS_ID_8 = 51,
+ AXI_READ_DATA_BEATS_ID_9 = 52,
+ AXI_READ_DATA_BEATS_ID_10 = 53,
+ AXI_READ_DATA_BEATS_ID_11 = 54,
+ AXI_READ_DATA_BEATS_ID_12 = 55,
+ AXI_READ_DATA_BEATS_ID_13 = 56,
+ AXI_READ_DATA_BEATS_ID_14 = 57,
+ AXI_READ_DATA_BEATS_ID_15 = 58,
+ AXI0_READ_DATA_BEATS_TOTAL = 59,
+ AXI1_READ_DATA_BEATS_TOTAL = 60,
+ AXI2_READ_DATA_BEATS_TOTAL = 61,
+ AXI3_READ_DATA_BEATS_TOTAL = 62,
+ AXI_READ_DATA_BEATS_TOTAL = 63,
+ AXI_WRITE_DATA_BEATS_ID_0 = 64,
+ AXI_WRITE_DATA_BEATS_ID_1 = 65,
+ AXI_WRITE_DATA_BEATS_ID_2 = 66,
+ AXI_WRITE_DATA_BEATS_ID_3 = 67,
+ AXI_WRITE_DATA_BEATS_ID_4 = 68,
+ AXI_WRITE_DATA_BEATS_ID_5 = 69,
+ AXI_WRITE_DATA_BEATS_ID_6 = 70,
+ AXI_WRITE_DATA_BEATS_ID_7 = 71,
+ AXI_WRITE_DATA_BEATS_ID_8 = 72,
+ AXI_WRITE_DATA_BEATS_ID_9 = 73,
+ AXI_WRITE_DATA_BEATS_ID_10 = 74,
+ AXI_WRITE_DATA_BEATS_ID_11 = 75,
+ AXI_WRITE_DATA_BEATS_ID_12 = 76,
+ AXI_WRITE_DATA_BEATS_ID_13 = 77,
+ AXI_WRITE_DATA_BEATS_ID_14 = 78,
+ AXI_WRITE_DATA_BEATS_ID_15 = 79,
+ AXI0_WRITE_DATA_BEATS_TOTAL = 80,
+ AXI1_WRITE_DATA_BEATS_TOTAL = 81,
+ AXI2_WRITE_DATA_BEATS_TOTAL = 82,
+ AXI3_WRITE_DATA_BEATS_TOTAL = 83,
+ AXI_WRITE_DATA_BEATS_TOTAL = 84,
+ AXI_DATA_BEATS_TOTAL = 85,
+ CYCLES_HELD_OFF_ID_0 = 86,
+ CYCLES_HELD_OFF_ID_1 = 87,
+ CYCLES_HELD_OFF_ID_2 = 88,
+ CYCLES_HELD_OFF_ID_3 = 89,
+ CYCLES_HELD_OFF_ID_4 = 90,
+ CYCLES_HELD_OFF_ID_5 = 91,
+ CYCLES_HELD_OFF_ID_6 = 92,
+ CYCLES_HELD_OFF_ID_7 = 93,
+ CYCLES_HELD_OFF_ID_8 = 94,
+ CYCLES_HELD_OFF_ID_9 = 95,
+ CYCLES_HELD_OFF_ID_10 = 96,
+ CYCLES_HELD_OFF_ID_11 = 97,
+ CYCLES_HELD_OFF_ID_12 = 98,
+ CYCLES_HELD_OFF_ID_13 = 99,
+ CYCLES_HELD_OFF_ID_14 = 100,
+ CYCLES_HELD_OFF_ID_15 = 101,
+ AXI_READ_REQUEST_HELD_OFF = 102,
+ AXI_WRITE_REQUEST_HELD_OFF = 103,
+ AXI_REQUEST_HELD_OFF = 104,
+ AXI_WRITE_DATA_HELD_OFF = 105,
+ OCMEM_AXI_READ_REQUEST_HELD_OFF = 106,
+ OCMEM_AXI_WRITE_REQUEST_HELD_OFF = 107,
+ OCMEM_AXI_REQUEST_HELD_OFF = 108,
+ OCMEM_AXI_WRITE_DATA_HELD_OFF = 109,
+ ELAPSED_CYCLES_DDR = 110,
+ ELAPSED_CYCLES_OCMEM = 111,
+};
+
+enum a4xx_vfd_perfcounter_select {
+ VFD_UCHE_BYTE_FETCHED = 0,
+ VFD_UCHE_TRANS = 1,
+ VFD_FETCH_INSTRUCTIONS = 3,
+ VFD_BUSY_CYCLES = 5,
+ VFD_STALL_CYCLES_UCHE = 6,
+ VFD_STALL_CYCLES_HLSQ = 7,
+ VFD_STALL_CYCLES_VPC_BYPASS = 8,
+ VFD_STALL_CYCLES_VPC_ALLOC = 9,
+ VFD_MODE_0_FIBERS = 13,
+ VFD_MODE_1_FIBERS = 14,
+ VFD_MODE_2_FIBERS = 15,
+ VFD_MODE_3_FIBERS = 16,
+ VFD_MODE_4_FIBERS = 17,
+ VFD_BFIFO_STALL = 18,
+ VFD_NUM_VERTICES_TOTAL = 19,
+ VFD_PACKER_FULL = 20,
+ VFD_UCHE_REQUEST_FIFO_FULL = 21,
+ VFD_STARVE_CYCLES_PC = 22,
+ VFD_STARVE_CYCLES_UCHE = 23,
+};
+
+enum a4xx_vpc_perfcounter_select {
+ VPC_SP_LM_COMPONENTS = 2,
+ VPC_SP0_LM_BYTES = 3,
+ VPC_SP1_LM_BYTES = 4,
+ VPC_SP2_LM_BYTES = 5,
+ VPC_SP3_LM_BYTES = 6,
+ VPC_WORKING_CYCLES = 7,
+ VPC_STALL_CYCLES_LM = 8,
+ VPC_STARVE_CYCLES_RAS = 9,
+ VPC_STREAMOUT_CYCLES = 10,
+ VPC_UCHE_TRANSACTIONS = 12,
+ VPC_STALL_CYCLES_UCHE = 13,
+ VPC_BUSY_CYCLES = 14,
+ VPC_STARVE_CYCLES_SP = 15,
+};
+
+enum a4xx_vsc_perfcounter_select {
+ VSC_BUSY_CYCLES = 0,
+ VSC_WORKING_CYCLES = 1,
+ VSC_STALL_CYCLES_UCHE = 2,
+ VSC_STARVE_CYCLES_RAS = 3,
+ VSC_EOT_NUM = 4,
+};
+
enum a4xx_tex_filter {
A4XX_TEX_NEAREST = 0,
A4XX_TEX_LINEAR = 1,
@@ -357,6 +897,12 @@ static inline uint32_t A4XX_CGC_HLSQ_EARLY_CYC(uint32_t val)
#define REG_A4XX_RB_PERFCTR_RB_SEL_7 0x00000cce
+#define REG_A4XX_RB_PERFCTR_CCU_SEL_0 0x00000ccf
+
+#define REG_A4XX_RB_PERFCTR_CCU_SEL_1 0x00000cd0
+
+#define REG_A4XX_RB_PERFCTR_CCU_SEL_2 0x00000cd1
+
#define REG_A4XX_RB_PERFCTR_CCU_SEL_3 0x00000cd2
#define REG_A4XX_RB_FRAME_BUFFER_DIMENSION 0x00000ce0
@@ -1070,6 +1616,380 @@ static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_TP_REG(uint32_t i0) { return 0x
#define REG_A4XX_RBBM_PERFCTR_CP_0_LO 0x0000009c
+#define REG_A4XX_RBBM_PERFCTR_CP_0_HI 0x0000009d
+
+#define REG_A4XX_RBBM_PERFCTR_CP_1_LO 0x0000009e
+
+#define REG_A4XX_RBBM_PERFCTR_CP_1_HI 0x0000009f
+
+#define REG_A4XX_RBBM_PERFCTR_CP_2_LO 0x000000a0
+
+#define REG_A4XX_RBBM_PERFCTR_CP_2_HI 0x000000a1
+
+#define REG_A4XX_RBBM_PERFCTR_CP_3_LO 0x000000a2
+
+#define REG_A4XX_RBBM_PERFCTR_CP_3_HI 0x000000a3
+
+#define REG_A4XX_RBBM_PERFCTR_CP_4_LO 0x000000a4
+
+#define REG_A4XX_RBBM_PERFCTR_CP_4_HI 0x000000a5
+
+#define REG_A4XX_RBBM_PERFCTR_CP_5_LO 0x000000a6
+
+#define REG_A4XX_RBBM_PERFCTR_CP_5_HI 0x000000a7
+
+#define REG_A4XX_RBBM_PERFCTR_CP_6_LO 0x000000a8
+
+#define REG_A4XX_RBBM_PERFCTR_CP_6_HI 0x000000a9
+
+#define REG_A4XX_RBBM_PERFCTR_CP_7_LO 0x000000aa
+
+#define REG_A4XX_RBBM_PERFCTR_CP_7_HI 0x000000ab
+
+#define REG_A4XX_RBBM_PERFCTR_RBBM_0_LO 0x000000ac
+
+#define REG_A4XX_RBBM_PERFCTR_RBBM_0_HI 0x000000ad
+
+#define REG_A4XX_RBBM_PERFCTR_RBBM_1_LO 0x000000ae
+
+#define REG_A4XX_RBBM_PERFCTR_RBBM_1_HI 0x000000af
+
+#define REG_A4XX_RBBM_PERFCTR_RBBM_2_LO 0x000000b0
+
+#define REG_A4XX_RBBM_PERFCTR_RBBM_2_HI 0x000000b1
+
+#define REG_A4XX_RBBM_PERFCTR_RBBM_3_LO 0x000000b2
+
+#define REG_A4XX_RBBM_PERFCTR_RBBM_3_HI 0x000000b3
+
+#define REG_A4XX_RBBM_PERFCTR_PC_0_LO 0x000000b4
+
+#define REG_A4XX_RBBM_PERFCTR_PC_0_HI 0x000000b5
+
+#define REG_A4XX_RBBM_PERFCTR_PC_1_LO 0x000000b6
+
+#define REG_A4XX_RBBM_PERFCTR_PC_1_HI 0x000000b7
+
+#define REG_A4XX_RBBM_PERFCTR_PC_2_LO 0x000000b8
+
+#define REG_A4XX_RBBM_PERFCTR_PC_2_HI 0x000000b9
+
+#define REG_A4XX_RBBM_PERFCTR_PC_3_LO 0x000000ba
+
+#define REG_A4XX_RBBM_PERFCTR_PC_3_HI 0x000000bb
+
+#define REG_A4XX_RBBM_PERFCTR_PC_4_LO 0x000000bc
+
+#define REG_A4XX_RBBM_PERFCTR_PC_4_HI 0x000000bd
+
+#define REG_A4XX_RBBM_PERFCTR_PC_5_LO 0x000000be
+
+#define REG_A4XX_RBBM_PERFCTR_PC_5_HI 0x000000bf
+
+#define REG_A4XX_RBBM_PERFCTR_PC_6_LO 0x000000c0
+
+#define REG_A4XX_RBBM_PERFCTR_PC_6_HI 0x000000c1
+
+#define REG_A4XX_RBBM_PERFCTR_PC_7_LO 0x000000c2
+
+#define REG_A4XX_RBBM_PERFCTR_PC_7_HI 0x000000c3
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_0_LO 0x000000c4
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_0_HI 0x000000c5
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_1_LO 0x000000c6
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_1_HI 0x000000c7
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_2_LO 0x000000c8
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_2_HI 0x000000c9
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_3_LO 0x000000ca
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_3_HI 0x000000cb
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_4_LO 0x000000cc
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_4_HI 0x000000cd
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_5_LO 0x000000ce
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_5_HI 0x000000cf
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_6_LO 0x000000d0
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_6_HI 0x000000d1
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_7_LO 0x000000d2
+
+#define REG_A4XX_RBBM_PERFCTR_VFD_7_HI 0x000000d3
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_0_LO 0x000000d4
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_0_HI 0x000000d5
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_1_LO 0x000000d6
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_1_HI 0x000000d7
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_2_LO 0x000000d8
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_2_HI 0x000000d9
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_3_LO 0x000000da
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_3_HI 0x000000db
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_4_LO 0x000000dc
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_4_HI 0x000000dd
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_5_LO 0x000000de
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_5_HI 0x000000df
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_6_LO 0x000000e0
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_6_HI 0x000000e1
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_7_LO 0x000000e2
+
+#define REG_A4XX_RBBM_PERFCTR_HLSQ_7_HI 0x000000e3
+
+#define REG_A4XX_RBBM_PERFCTR_VPC_0_LO 0x000000e4
+
+#define REG_A4XX_RBBM_PERFCTR_VPC_0_HI 0x000000e5
+
+#define REG_A4XX_RBBM_PERFCTR_VPC_1_LO 0x000000e6
+
+#define REG_A4XX_RBBM_PERFCTR_VPC_1_HI 0x000000e7
+
+#define REG_A4XX_RBBM_PERFCTR_VPC_2_LO 0x000000e8
+
+#define REG_A4XX_RBBM_PERFCTR_VPC_2_HI 0x000000e9
+
+#define REG_A4XX_RBBM_PERFCTR_VPC_3_LO 0x000000ea
+
+#define REG_A4XX_RBBM_PERFCTR_VPC_3_HI 0x000000eb
+
+#define REG_A4XX_RBBM_PERFCTR_CCU_0_LO 0x000000ec
+
+#define REG_A4XX_RBBM_PERFCTR_CCU_0_HI 0x000000ed
+
+#define REG_A4XX_RBBM_PERFCTR_CCU_1_LO 0x000000ee
+
+#define REG_A4XX_RBBM_PERFCTR_CCU_1_HI 0x000000ef
+
+#define REG_A4XX_RBBM_PERFCTR_CCU_2_LO 0x000000f0
+
+#define REG_A4XX_RBBM_PERFCTR_CCU_2_HI 0x000000f1
+
+#define REG_A4XX_RBBM_PERFCTR_CCU_3_LO 0x000000f2
+
+#define REG_A4XX_RBBM_PERFCTR_CCU_3_HI 0x000000f3
+
+#define REG_A4XX_RBBM_PERFCTR_TSE_0_LO 0x000000f4
+
+#define REG_A4XX_RBBM_PERFCTR_TSE_0_HI 0x000000f5
+
+#define REG_A4XX_RBBM_PERFCTR_TSE_1_LO 0x000000f6
+
+#define REG_A4XX_RBBM_PERFCTR_TSE_1_HI 0x000000f7
+
+#define REG_A4XX_RBBM_PERFCTR_TSE_2_LO 0x000000f8
+
+#define REG_A4XX_RBBM_PERFCTR_TSE_2_HI 0x000000f9
+
+#define REG_A4XX_RBBM_PERFCTR_TSE_3_LO 0x000000fa
+
+#define REG_A4XX_RBBM_PERFCTR_TSE_3_HI 0x000000fb
+
+#define REG_A4XX_RBBM_PERFCTR_RAS_0_LO 0x000000fc
+
+#define REG_A4XX_RBBM_PERFCTR_RAS_0_HI 0x000000fd
+
+#define REG_A4XX_RBBM_PERFCTR_RAS_1_LO 0x000000fe
+
+#define REG_A4XX_RBBM_PERFCTR_RAS_1_HI 0x000000ff
+
+#define REG_A4XX_RBBM_PERFCTR_RAS_2_LO 0x00000100
+
+#define REG_A4XX_RBBM_PERFCTR_RAS_2_HI 0x00000101
+
+#define REG_A4XX_RBBM_PERFCTR_RAS_3_LO 0x00000102
+
+#define REG_A4XX_RBBM_PERFCTR_RAS_3_HI 0x00000103
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_0_LO 0x00000104
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_0_HI 0x00000105
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_1_LO 0x00000106
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_1_HI 0x00000107
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_2_LO 0x00000108
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_2_HI 0x00000109
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_3_LO 0x0000010a
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_3_HI 0x0000010b
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_4_LO 0x0000010c
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_4_HI 0x0000010d
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_5_LO 0x0000010e
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_5_HI 0x0000010f
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_6_LO 0x00000110
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_6_HI 0x00000111
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_7_LO 0x00000112
+
+#define REG_A4XX_RBBM_PERFCTR_UCHE_7_HI 0x00000113
+
+#define REG_A4XX_RBBM_PERFCTR_TP_0_LO 0x00000114
+
+#define REG_A4XX_RBBM_PERFCTR_TP_0_HI 0x00000115
+
+#define REG_A4XX_RBBM_PERFCTR_TP_0_LO 0x00000114
+
+#define REG_A4XX_RBBM_PERFCTR_TP_0_HI 0x00000115
+
+#define REG_A4XX_RBBM_PERFCTR_TP_1_LO 0x00000116
+
+#define REG_A4XX_RBBM_PERFCTR_TP_1_HI 0x00000117
+
+#define REG_A4XX_RBBM_PERFCTR_TP_2_LO 0x00000118
+
+#define REG_A4XX_RBBM_PERFCTR_TP_2_HI 0x00000119
+
+#define REG_A4XX_RBBM_PERFCTR_TP_3_LO 0x0000011a
+
+#define REG_A4XX_RBBM_PERFCTR_TP_3_HI 0x0000011b
+
+#define REG_A4XX_RBBM_PERFCTR_TP_4_LO 0x0000011c
+
+#define REG_A4XX_RBBM_PERFCTR_TP_4_HI 0x0000011d
+
+#define REG_A4XX_RBBM_PERFCTR_TP_5_LO 0x0000011e
+
+#define REG_A4XX_RBBM_PERFCTR_TP_5_HI 0x0000011f
+
+#define REG_A4XX_RBBM_PERFCTR_TP_6_LO 0x00000120
+
+#define REG_A4XX_RBBM_PERFCTR_TP_6_HI 0x00000121
+
+#define REG_A4XX_RBBM_PERFCTR_TP_7_LO 0x00000122
+
+#define REG_A4XX_RBBM_PERFCTR_TP_7_HI 0x00000123
+
+#define REG_A4XX_RBBM_PERFCTR_SP_0_LO 0x00000124
+
+#define REG_A4XX_RBBM_PERFCTR_SP_0_HI 0x00000125
+
+#define REG_A4XX_RBBM_PERFCTR_SP_1_LO 0x00000126
+
+#define REG_A4XX_RBBM_PERFCTR_SP_1_HI 0x00000127
+
+#define REG_A4XX_RBBM_PERFCTR_SP_2_LO 0x00000128
+
+#define REG_A4XX_RBBM_PERFCTR_SP_2_HI 0x00000129
+
+#define REG_A4XX_RBBM_PERFCTR_SP_3_LO 0x0000012a
+
+#define REG_A4XX_RBBM_PERFCTR_SP_3_HI 0x0000012b
+
+#define REG_A4XX_RBBM_PERFCTR_SP_4_LO 0x0000012c
+
+#define REG_A4XX_RBBM_PERFCTR_SP_4_HI 0x0000012d
+
+#define REG_A4XX_RBBM_PERFCTR_SP_5_LO 0x0000012e
+
+#define REG_A4XX_RBBM_PERFCTR_SP_5_HI 0x0000012f
+
+#define REG_A4XX_RBBM_PERFCTR_SP_6_LO 0x00000130
+
+#define REG_A4XX_RBBM_PERFCTR_SP_6_HI 0x00000131
+
+#define REG_A4XX_RBBM_PERFCTR_SP_7_LO 0x00000132
+
+#define REG_A4XX_RBBM_PERFCTR_SP_7_HI 0x00000133
+
+#define REG_A4XX_RBBM_PERFCTR_SP_8_LO 0x00000134
+
+#define REG_A4XX_RBBM_PERFCTR_SP_8_HI 0x00000135
+
+#define REG_A4XX_RBBM_PERFCTR_SP_9_LO 0x00000136
+
+#define REG_A4XX_RBBM_PERFCTR_SP_9_HI 0x00000137
+
+#define REG_A4XX_RBBM_PERFCTR_SP_10_LO 0x00000138
+
+#define REG_A4XX_RBBM_PERFCTR_SP_10_HI 0x00000139
+
+#define REG_A4XX_RBBM_PERFCTR_SP_11_LO 0x0000013a
+
+#define REG_A4XX_RBBM_PERFCTR_SP_11_HI 0x0000013b
+
+#define REG_A4XX_RBBM_PERFCTR_RB_0_LO 0x0000013c
+
+#define REG_A4XX_RBBM_PERFCTR_RB_0_HI 0x0000013d
+
+#define REG_A4XX_RBBM_PERFCTR_RB_1_LO 0x0000013e
+
+#define REG_A4XX_RBBM_PERFCTR_RB_1_HI 0x0000013f
+
+#define REG_A4XX_RBBM_PERFCTR_RB_2_LO 0x00000140
+
+#define REG_A4XX_RBBM_PERFCTR_RB_2_HI 0x00000141
+
+#define REG_A4XX_RBBM_PERFCTR_RB_3_LO 0x00000142
+
+#define REG_A4XX_RBBM_PERFCTR_RB_3_HI 0x00000143
+
+#define REG_A4XX_RBBM_PERFCTR_RB_4_LO 0x00000144
+
+#define REG_A4XX_RBBM_PERFCTR_RB_4_HI 0x00000145
+
+#define REG_A4XX_RBBM_PERFCTR_RB_5_LO 0x00000146
+
+#define REG_A4XX_RBBM_PERFCTR_RB_5_HI 0x00000147
+
+#define REG_A4XX_RBBM_PERFCTR_RB_6_LO 0x00000148
+
+#define REG_A4XX_RBBM_PERFCTR_RB_6_HI 0x00000149
+
+#define REG_A4XX_RBBM_PERFCTR_RB_7_LO 0x0000014a
+
+#define REG_A4XX_RBBM_PERFCTR_RB_7_HI 0x0000014b
+
+#define REG_A4XX_RBBM_PERFCTR_VSC_0_LO 0x0000014c
+
+#define REG_A4XX_RBBM_PERFCTR_VSC_0_HI 0x0000014d
+
+#define REG_A4XX_RBBM_PERFCTR_VSC_1_LO 0x0000014e
+
+#define REG_A4XX_RBBM_PERFCTR_VSC_1_HI 0x0000014f
+
+#define REG_A4XX_RBBM_PERFCTR_PWR_0_LO 0x00000166
+
+#define REG_A4XX_RBBM_PERFCTR_PWR_0_HI 0x00000167
+
+#define REG_A4XX_RBBM_PERFCTR_PWR_1_LO 0x00000168
+
+#define REG_A4XX_RBBM_PERFCTR_PWR_1_HI 0x00000169
+
+#define REG_A4XX_RBBM_ALWAYSON_COUNTER_LO 0x0000016e
+
+#define REG_A4XX_RBBM_ALWAYSON_COUNTER_HI 0x0000016f
+
static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL_SP(uint32_t i0) { return 0x00000068 + 0x1*i0; }
static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL_SP_REG(uint32_t i0) { return 0x00000068 + 0x1*i0; }
@@ -1136,6 +2056,14 @@ static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1_REG(uint32_t i0)
#define REG_A4XX_RBBM_PERFCTR_LOAD_VALUE_HI 0x00000175
+#define REG_A4XX_RBBM_PERFCTR_RBBM_SEL_0 0x00000176
+
+#define REG_A4XX_RBBM_PERFCTR_RBBM_SEL_1 0x00000177
+
+#define REG_A4XX_RBBM_PERFCTR_RBBM_SEL_2 0x00000178
+
+#define REG_A4XX_RBBM_PERFCTR_RBBM_SEL_3 0x00000179
+
#define REG_A4XX_RBBM_GPU_BUSY_MASKED 0x0000017a
#define REG_A4XX_RBBM_INT_0_STATUS 0x0000017d
@@ -1272,6 +2200,20 @@ static inline uint32_t REG_A4XX_CP_PROTECT_REG(uint32_t i0) { return 0x00000240
#define REG_A4XX_CP_PERFCTR_CP_SEL_0 0x00000500
+#define REG_A4XX_CP_PERFCTR_CP_SEL_1 0x00000501
+
+#define REG_A4XX_CP_PERFCTR_CP_SEL_2 0x00000502
+
+#define REG_A4XX_CP_PERFCTR_CP_SEL_3 0x00000503
+
+#define REG_A4XX_CP_PERFCTR_CP_SEL_4 0x00000504
+
+#define REG_A4XX_CP_PERFCTR_CP_SEL_5 0x00000505
+
+#define REG_A4XX_CP_PERFCTR_CP_SEL_6 0x00000506
+
+#define REG_A4XX_CP_PERFCTR_CP_SEL_7 0x00000507
+
#define REG_A4XX_CP_PERFCOMBINER_SELECT 0x0000050b
static inline uint32_t REG_A4XX_CP_SCRATCH(uint32_t i0) { return 0x00000578 + 0x1*i0; }
@@ -1802,6 +2744,12 @@ static inline uint32_t A4XX_SP_GS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val)
#define REG_A4XX_VPC_DEBUG_ECO_CONTROL 0x00000e64
+#define REG_A4XX_VPC_PERFCTR_VPC_SEL_0 0x00000e65
+
+#define REG_A4XX_VPC_PERFCTR_VPC_SEL_1 0x00000e66
+
+#define REG_A4XX_VPC_PERFCTR_VPC_SEL_2 0x00000e67
+
#define REG_A4XX_VPC_PERFCTR_VPC_SEL_3 0x00000e68
#define REG_A4XX_VPC_ATTR 0x00002140
@@ -1914,6 +2862,20 @@ static inline uint32_t REG_A4XX_VSC_PIPE_DATA_LENGTH_REG(uint32_t i0) { return 0
#define REG_A4XX_VFD_DEBUG_CONTROL 0x00000e40
+#define REG_A4XX_VFD_PERFCTR_VFD_SEL_0 0x00000e43
+
+#define REG_A4XX_VFD_PERFCTR_VFD_SEL_1 0x00000e44
+
+#define REG_A4XX_VFD_PERFCTR_VFD_SEL_2 0x00000e45
+
+#define REG_A4XX_VFD_PERFCTR_VFD_SEL_3 0x00000e46
+
+#define REG_A4XX_VFD_PERFCTR_VFD_SEL_4 0x00000e47
+
+#define REG_A4XX_VFD_PERFCTR_VFD_SEL_5 0x00000e48
+
+#define REG_A4XX_VFD_PERFCTR_VFD_SEL_6 0x00000e49
+
#define REG_A4XX_VFD_PERFCTR_VFD_SEL_7 0x00000e4a
#define REG_A4XX_VGT_CL_INITIATOR 0x000021d0
@@ -2070,6 +3032,20 @@ static inline uint32_t A4XX_VFD_DECODE_INSTR_SHIFTCNT(uint32_t val)
#define REG_A4XX_TPL1_TP_MODE_CONTROL 0x00000f03
+#define REG_A4XX_TPL1_PERFCTR_TP_SEL_0 0x00000f04
+
+#define REG_A4XX_TPL1_PERFCTR_TP_SEL_1 0x00000f05
+
+#define REG_A4XX_TPL1_PERFCTR_TP_SEL_2 0x00000f06
+
+#define REG_A4XX_TPL1_PERFCTR_TP_SEL_3 0x00000f07
+
+#define REG_A4XX_TPL1_PERFCTR_TP_SEL_4 0x00000f08
+
+#define REG_A4XX_TPL1_PERFCTR_TP_SEL_5 0x00000f09
+
+#define REG_A4XX_TPL1_PERFCTR_TP_SEL_6 0x00000f0a
+
#define REG_A4XX_TPL1_PERFCTR_TP_SEL_7 0x00000f0b
#define REG_A4XX_TPL1_TP_TEX_OFFSET 0x00002380
@@ -2124,8 +3100,20 @@ static inline uint32_t A4XX_TPL1_TP_TEX_COUNT_GS(uint32_t val)
#define REG_A4XX_GRAS_PERFCTR_TSE_SEL_0 0x00000c88
+#define REG_A4XX_GRAS_PERFCTR_TSE_SEL_1 0x00000c89
+
+#define REG_A4XX_GRAS_PERFCTR_TSE_SEL_2 0x00000c8a
+
#define REG_A4XX_GRAS_PERFCTR_TSE_SEL_3 0x00000c8b
+#define REG_A4XX_GRAS_PERFCTR_RAS_SEL_0 0x00000c8c
+
+#define REG_A4XX_GRAS_PERFCTR_RAS_SEL_1 0x00000c8d
+
+#define REG_A4XX_GRAS_PERFCTR_RAS_SEL_2 0x00000c8e
+
+#define REG_A4XX_GRAS_PERFCTR_RAS_SEL_3 0x00000c8f
+
#define REG_A4XX_GRAS_CL_CLIP_CNTL 0x00002000
#define A4XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE 0x00008000
#define A4XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z 0x00400000
@@ -2391,6 +3379,20 @@ static inline uint32_t A4XX_GRAS_SC_EXTENT_WINDOW_TL_Y(uint32_t val)
#define REG_A4XX_UCHE_CACHE_WAYS_VFD 0x00000e8c
+#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_0 0x00000e8e
+
+#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_1 0x00000e8f
+
+#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_2 0x00000e90
+
+#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_3 0x00000e91
+
+#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_4 0x00000e92
+
+#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_5 0x00000e93
+
+#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_6 0x00000e94
+
#define REG_A4XX_UCHE_PERFCTR_UCHE_SEL_7 0x00000e95
#define REG_A4XX_HLSQ_TIMEOUT_THRESHOLD 0x00000e00
@@ -2401,6 +3403,22 @@ static inline uint32_t A4XX_GRAS_SC_EXTENT_WINDOW_TL_Y(uint32_t val)
#define REG_A4XX_HLSQ_PERF_PIPE_MASK 0x00000e0e
+#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_0 0x00000e06
+
+#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_1 0x00000e07
+
+#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_2 0x00000e08
+
+#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_3 0x00000e09
+
+#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_4 0x00000e0a
+
+#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_5 0x00000e0b
+
+#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_6 0x00000e0c
+
+#define REG_A4XX_HLSQ_PERFCTR_HLSQ_SEL_7 0x00000e0d
+
#define REG_A4XX_HLSQ_CONTROL_0_REG 0x000023c0
#define A4XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK 0x00000010
#define A4XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT 4
@@ -2655,6 +3673,18 @@ static inline uint32_t A4XX_HLSQ_GS_CONTROL_REG_INSTRLENGTH(uint32_t val)
#define REG_A4XX_PC_PERFCTR_PC_SEL_0 0x00000d10
+#define REG_A4XX_PC_PERFCTR_PC_SEL_1 0x00000d11
+
+#define REG_A4XX_PC_PERFCTR_PC_SEL_2 0x00000d12
+
+#define REG_A4XX_PC_PERFCTR_PC_SEL_3 0x00000d13
+
+#define REG_A4XX_PC_PERFCTR_PC_SEL_4 0x00000d14
+
+#define REG_A4XX_PC_PERFCTR_PC_SEL_5 0x00000d15
+
+#define REG_A4XX_PC_PERFCTR_PC_SEL_6 0x00000d16
+
#define REG_A4XX_PC_PERFCTR_PC_SEL_7 0x00000d17
#define REG_A4XX_PC_BIN_BASE 0x000021c0
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_context.h b/src/gallium/drivers/freedreno/a4xx/fd4_context.h
index 074c5a752bf..0c1027d5804 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_context.h
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_context.h
@@ -49,6 +49,8 @@ struct fd4_context {
/* This only needs to be 4 * num_of_pipes bytes (ie. 32 bytes). We
* could combine it with another allocation.
+ *
+ * (upper area used as scratch bo.. see fd4_query)
*/
struct fd_bo *vsc_size_mem;
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
index 4a3f1da30ed..72154bf286a 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
@@ -33,6 +33,7 @@
#include "util/u_format.h"
#include "freedreno_resource.h"
+#include "freedreno_query_hw.h"
#include "fd4_emit.h"
#include "fd4_blend.h"
@@ -882,6 +883,8 @@ fd4_emit_restore(struct fd_context *ctx)
OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
OUT_RING(ring, 0x0);
+ fd_hw_query_enable(ctx, ring);
+
ctx->needs_rb_fbd = true;
}
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_query.c b/src/gallium/drivers/freedreno/a4xx/fd4_query.c
index 4f69e0c1694..14a809431ac 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_query.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_query.c
@@ -31,6 +31,7 @@
#include "freedreno_util.h"
#include "fd4_query.h"
+#include "fd4_context.h"
#include "fd4_draw.h"
#include "fd4_format.h"
@@ -81,7 +82,12 @@ static uint64_t
count_samples(const struct fd_rb_samp_ctrs *start,
const struct fd_rb_samp_ctrs *end)
{
- return end->ctr[0] - start->ctr[0];
+ uint64_t n = 0;
+
+ for (unsigned i = 0; i < 16; i += 4)
+ n += end->ctr[i] - start->ctr[i];
+
+ return n / 2;
}
static void
@@ -102,6 +108,127 @@ occlusion_predicate_accumulate_result(struct fd_context *ctx,
result->b |= (n > 0);
}
+/*
+ * Time Elapsed Query:
+ *
+ * Note: we could in theory support timestamp queries, but they
+ * won't give sensible results for tilers.
+ */
+
+static void
+time_elapsed_enable(struct fd_context *ctx, struct fd_ringbuffer *ring)
+{
+ /* Right now, the assignment of countable to counter register is
+ * just hard coded. If we start exposing more countables than we
+ * have counters, we will need to be more clever.
+ */
+ fd_wfi(ctx, ring);
+ OUT_PKT0(ring, REG_A4XX_CP_PERFCTR_CP_SEL_0, 1);
+ OUT_RING(ring, CP_ALWAYS_COUNT);
+}
+
+static struct fd_hw_sample *
+time_elapsed_get_sample(struct fd_context *ctx, struct fd_ringbuffer *ring)
+{
+ struct fd_hw_sample *samp = fd_hw_sample_init(ctx, sizeof(uint64_t));
+
+ /* use unused part of vsc_size_mem as scratch space, to avoid
+ * extra allocation:
+ */
+ struct fd_bo *scratch_bo = fd4_context(ctx)->vsc_size_mem;
+ const int sample_off = 128;
+ const int addr_off = sample_off + 8;
+
+ debug_assert(ctx->screen->max_freq > 0);
+
+ /* Basic issue is that we need to read counter value to a relative
+ * destination (with per-tile offset) rather than absolute dest
+ * addr. But there is no pm4 packet that can do that. This is
+ * where it would be *really* nice if we could write our own fw
+ * since afaict implementing the sort of packet we need would be
+ * trivial.
+ *
+ * Instead, we:
+ * (1) CP_REG_TO_MEM to do a 64b copy of counter to scratch buffer
+ * (2) CP_MEM_WRITE to write per-sample offset to scratch buffer
+ * (3) CP_REG_TO_MEM w/ accumulate flag to add the per-tile base
+ * address to the per-sample offset in the scratch buffer
+ * (4) CP_MEM_TO_REG to copy resulting address from steps #2 and #3
+ * to CP_ME_NRT_ADDR
+ * (5) CP_MEM_TO_REG's to copy saved counter value from scratch
+ * buffer to CP_ME_NRT_DATA to trigger the write out to query
+ * result buffer
+ *
+ * Straightforward, right?
+ *
+ * Maybe could swap the order of things in the scratch buffer to
+ * put address first, and copy back to CP_ME_NRT_ADDR+DATA in one
+ * shot, but that's really just polishing a turd..
+ */
+
+ fd_wfi(ctx, ring);
+
+ /* copy sample counter _LO and _HI to scratch: */
+ OUT_PKT3(ring, CP_REG_TO_MEM, 2);
+ OUT_RING(ring, CP_REG_TO_MEM_0_REG(REG_A4XX_RBBM_PERFCTR_CP_0_LO) |
+ CP_REG_TO_MEM_0_64B |
+ CP_REG_TO_MEM_0_CNT(2-1)); /* write 2 regs to mem */
+ OUT_RELOC(ring, scratch_bo, sample_off, 0, 0);
+
+ /* ok... here we really *would* like to use the CP_SET_CONSTANT
+ * mode which can add a constant to value in reg2 and write to
+ * reg1... *but* that only works for banked/context registers,
+ * and CP_ME_NRT_DATA isn't one of those.. so we need to do some
+ * CP math to the scratch buffer instead:
+ *
+ * (note first 8 bytes are counter value, use offset 0x8 for
+ * address calculation)
+ */
+
+ /* per-sample offset to scratch bo: */
+ OUT_PKT3(ring, CP_MEM_WRITE, 2);
+ OUT_RELOC(ring, scratch_bo, addr_off, 0, 0);
+ OUT_RING(ring, samp->offset);
+
+ /* now add to that the per-tile base: */
+ OUT_PKT3(ring, CP_REG_TO_MEM, 2);
+ OUT_RING(ring, CP_REG_TO_MEM_0_REG(HW_QUERY_BASE_REG) |
+ CP_REG_TO_MEM_0_ACCUMULATE |
+ CP_REG_TO_MEM_0_CNT(1-1)); /* readback 1 regs */
+ OUT_RELOC(ring, scratch_bo, addr_off, 0, 0);
+
+ /* now copy that back to CP_ME_NRT_ADDR: */
+ OUT_PKT3(ring, CP_MEM_TO_REG, 2);
+ OUT_RING(ring, REG_A4XX_CP_ME_NRT_ADDR);
+ OUT_RELOC(ring, scratch_bo, addr_off, 0, 0);
+
+ /* and finally, copy sample from scratch buffer to CP_ME_NRT_DATA
+ * to trigger the write to result buffer
+ */
+ OUT_PKT3(ring, CP_MEM_TO_REG, 2);
+ OUT_RING(ring, REG_A4XX_CP_ME_NRT_DATA);
+ OUT_RELOC(ring, scratch_bo, sample_off, 0, 0);
+
+ /* and again to get the value of the _HI reg from scratch: */
+ OUT_PKT3(ring, CP_MEM_TO_REG, 2);
+ OUT_RING(ring, REG_A4XX_CP_ME_NRT_DATA);
+ OUT_RELOC(ring, scratch_bo, sample_off + 0x4, 0, 0);
+
+ /* Sigh.. */
+
+ return samp;
+}
+
+static void
+time_elapsed_accumulate_result(struct fd_context *ctx,
+ const void *start, const void *end,
+ union pipe_query_result *result)
+{
+ uint64_t n = *(uint64_t *)end - *(uint64_t *)start;
+ /* max_freq is in Hz, convert cycle count to ns: */
+ result->u64 += n * 1000000000 / ctx->screen->max_freq;
+}
+
static const struct fd_hw_sample_provider occlusion_counter = {
.query_type = PIPE_QUERY_OCCLUSION_COUNTER,
.active = FD_STAGE_DRAW,
@@ -116,8 +243,17 @@ static const struct fd_hw_sample_provider occlusion_predicate = {
.accumulate_result = occlusion_predicate_accumulate_result,
};
+static const struct fd_hw_sample_provider time_elapsed = {
+ .query_type = PIPE_QUERY_TIME_ELAPSED,
+ .active = FD_STAGE_DRAW,
+ .enable = time_elapsed_enable,
+ .get_sample = time_elapsed_get_sample,
+ .accumulate_result = time_elapsed_accumulate_result,
+};
+
void fd4_query_context_init(struct pipe_context *pctx)
{
fd_hw_query_register_provider(pctx, &occlusion_counter);
fd_hw_query_register_provider(pctx, &occlusion_predicate);
+ fd_hw_query_register_provider(pctx, &time_elapsed);
}
diff --git a/src/gallium/drivers/freedreno/adreno_common.xml.h b/src/gallium/drivers/freedreno/adreno_common.xml.h
index f9c0e6aaa83..ac5343f1a78 100644
--- a/src/gallium/drivers/freedreno/adreno_common.xml.h
+++ b/src/gallium/drivers/freedreno/adreno_common.xml.h
@@ -9,16 +9,17 @@ git clone https://github.com/freedreno/envytools.git
The rules-ng-ng source files this header was generated from are:
- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31)
-- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2016-02-10 17:07:21)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2015-11-24 14:39:00)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 15149 bytes, from 2015-11-20 16:22:25)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 69600 bytes, from 2015-11-24 14:39:00)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 67220 bytes, from 2015-12-13 17:58:09)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2016-02-10 21:03:25)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 16166 bytes, from 2016-02-11 21:20:31)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83967 bytes, from 2016-02-10 17:07:21)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 109858 bytes, from 2016-02-10 17:07:21)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
-Copyright (C) 2013-2015 by the following authors:
+Copyright (C) 2013-2016 by the following authors:
- Rob Clark <[email protected]> (robclark)
+- Ilia Mirkin <[email protected]> (imirkin)
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
diff --git a/src/gallium/drivers/freedreno/adreno_pm4.xml.h b/src/gallium/drivers/freedreno/adreno_pm4.xml.h
index c6741890c69..09b26a253f0 100644
--- a/src/gallium/drivers/freedreno/adreno_pm4.xml.h
+++ b/src/gallium/drivers/freedreno/adreno_pm4.xml.h
@@ -9,16 +9,17 @@ git clone https://github.com/freedreno/envytools.git
The rules-ng-ng source files this header was generated from are:
- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31)
-- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1572 bytes, from 2016-02-10 17:07:21)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2015-11-24 14:39:00)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 15149 bytes, from 2015-11-20 16:22:25)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 69600 bytes, from 2015-11-24 14:39:00)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 67220 bytes, from 2015-12-13 17:58:09)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2016-02-10 21:03:25)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 16166 bytes, from 2016-02-11 21:20:31)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83967 bytes, from 2016-02-10 17:07:21)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 109858 bytes, from 2016-02-10 17:07:21)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
-Copyright (C) 2013-2015 by the following authors:
+Copyright (C) 2013-2016 by the following authors:
- Rob Clark <[email protected]> (robclark)
+- Ilia Mirkin <[email protected]> (imirkin)
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
@@ -172,6 +173,11 @@ enum adreno_pm4_type3_packets {
CP_UNKNOWN_1A = 26,
CP_UNKNOWN_4E = 78,
CP_WIDE_REG_WRITE = 116,
+ CP_SCRATCH_TO_REG = 77,
+ CP_REG_TO_SCRATCH = 74,
+ CP_WAIT_MEM_WRITES = 18,
+ CP_COND_REG_EXEC = 71,
+ CP_MEM_TO_REG = 66,
IN_IB_PREFETCH_END = 23,
IN_SUBBLK_PREFETCH = 31,
IN_INSTR_PREFETCH = 32,
@@ -503,5 +509,29 @@ static inline uint32_t CP_SET_BIN_DATA_1_BIN_SIZE_ADDRESS(uint32_t val)
return ((val) << CP_SET_BIN_DATA_1_BIN_SIZE_ADDRESS__SHIFT) & CP_SET_BIN_DATA_1_BIN_SIZE_ADDRESS__MASK;
}
+#define REG_CP_REG_TO_MEM_0 0x00000000
+#define CP_REG_TO_MEM_0_REG__MASK 0x0000ffff
+#define CP_REG_TO_MEM_0_REG__SHIFT 0
+static inline uint32_t CP_REG_TO_MEM_0_REG(uint32_t val)
+{
+ return ((val) << CP_REG_TO_MEM_0_REG__SHIFT) & CP_REG_TO_MEM_0_REG__MASK;
+}
+#define CP_REG_TO_MEM_0_CNT__MASK 0x3ff80000
+#define CP_REG_TO_MEM_0_CNT__SHIFT 19
+static inline uint32_t CP_REG_TO_MEM_0_CNT(uint32_t val)
+{
+ return ((val) << CP_REG_TO_MEM_0_CNT__SHIFT) & CP_REG_TO_MEM_0_CNT__MASK;
+}
+#define CP_REG_TO_MEM_0_64B 0x40000000
+#define CP_REG_TO_MEM_0_ACCUMULATE 0x80000000
+
+#define REG_CP_REG_TO_MEM_1 0x00000001
+#define CP_REG_TO_MEM_1_DEST__MASK 0xffffffff
+#define CP_REG_TO_MEM_1_DEST__SHIFT 0
+static inline uint32_t CP_REG_TO_MEM_1_DEST(uint32_t val)
+{
+ return ((val) << CP_REG_TO_MEM_1_DEST__SHIFT) & CP_REG_TO_MEM_1_DEST__MASK;
+}
+
#endif /* ADRENO_PM4_XML */
diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h
index 9e7130ab915..85ce97c16b7 100644
--- a/src/gallium/drivers/freedreno/freedreno_context.h
+++ b/src/gallium/drivers/freedreno/freedreno_context.h
@@ -164,6 +164,9 @@ struct fd_context {
*/
struct fd_hw_sample *sample_cache[MAX_HW_SAMPLE_PROVIDERS];
+ /* which sample providers were active in the current batch: */
+ uint32_t active_providers;
+
/* tracking for current stage, to know when to start/stop
* any active queries:
*/
diff --git a/src/gallium/drivers/freedreno/freedreno_query.h b/src/gallium/drivers/freedreno/freedreno_query.h
index c2c71da2790..1e4f45ffcd3 100644
--- a/src/gallium/drivers/freedreno/freedreno_query.h
+++ b/src/gallium/drivers/freedreno/freedreno_query.h
@@ -65,4 +65,16 @@ fd_query(struct pipe_query *pq)
void fd_query_screen_init(struct pipe_screen *pscreen);
void fd_query_context_init(struct pipe_context *pctx);
+static inline bool
+skip_begin_query(int type)
+{
+ switch (type) {
+ case PIPE_QUERY_TIMESTAMP:
+ case PIPE_QUERY_GPU_FINISHED:
+ return true;
+ default:
+ return false;
+ }
+}
+
#endif /* FREEDRENO_QUERY_H_ */
diff --git a/src/gallium/drivers/freedreno/freedreno_query_hw.c b/src/gallium/drivers/freedreno/freedreno_query_hw.c
index 027fdc9de23..2ac03f22b41 100644
--- a/src/gallium/drivers/freedreno/freedreno_query_hw.c
+++ b/src/gallium/drivers/freedreno/freedreno_query_hw.c
@@ -47,6 +47,8 @@ static int pidx(unsigned query_type)
return 0;
case PIPE_QUERY_OCCLUSION_PREDICATE:
return 1;
+ case PIPE_QUERY_TIME_ELAPSED:
+ return 2;
default:
return -1;
}
@@ -89,7 +91,9 @@ static void
resume_query(struct fd_context *ctx, struct fd_hw_query *hq,
struct fd_ringbuffer *ring)
{
+ int idx = pidx(hq->provider->query_type);
assert(!hq->period);
+ ctx->active_providers |= (1 << idx);
hq->period = util_slab_alloc(&ctx->sample_period_pool);
list_inithead(&hq->period->list);
hq->period->start = get_sample(ctx, ring, hq->base.type);
@@ -101,7 +105,9 @@ static void
pause_query(struct fd_context *ctx, struct fd_hw_query *hq,
struct fd_ringbuffer *ring)
{
+ int idx = pidx(hq->provider->query_type);
assert(hq->period && !hq->period->end);
+ assert(ctx->active_providers & (1 << idx));
hq->period->end = get_sample(ctx, ring, hq->base.type);
list_addtail(&hq->period->list, &hq->current_periods);
hq->period = NULL;
@@ -156,6 +162,12 @@ static void
fd_hw_end_query(struct fd_context *ctx, struct fd_query *q)
{
struct fd_hw_query *hq = fd_hw_query(q);
+ /* there are a couple special cases, which don't have
+ * a matching ->begin_query():
+ */
+ if (skip_begin_query(q->type) && !q->active) {
+ fd_hw_begin_query(ctx, q);
+ }
if (!q->active)
return;
if (is_active(hq, ctx->stage))
@@ -291,6 +303,8 @@ fd_hw_sample_init(struct fd_context *ctx, uint32_t size)
struct fd_hw_sample *samp = util_slab_alloc(&ctx->sample_pool);
pipe_reference_init(&samp->reference, 1);
samp->size = size;
+ debug_assert(util_is_power_of_two(size));
+ ctx->next_sample_offset = align(ctx->next_sample_offset, size);
samp->offset = ctx->next_sample_offset;
/* NOTE: util_slab_alloc() does not zero out the buffer: */
samp->bo = NULL;
@@ -318,7 +332,7 @@ prepare_sample(struct fd_hw_sample *samp, struct fd_bo *bo,
assert(samp->tile_stride == tile_stride);
return;
}
- samp->bo = bo;
+ samp->bo = fd_bo_ref(bo);
samp->num_tiles = num_tiles;
samp->tile_stride = tile_stride;
}
@@ -431,6 +445,23 @@ fd_hw_query_set_stage(struct fd_context *ctx, struct fd_ringbuffer *ring,
ctx->stage = stage;
}
+/* call the provider->enable() for all the hw queries that were active
+ * in the current batch. This sets up perfctr selector regs statically
+ * for the duration of the batch.
+ */
+void
+fd_hw_query_enable(struct fd_context *ctx, struct fd_ringbuffer *ring)
+{
+ for (int idx = 0; idx < MAX_HW_SAMPLE_PROVIDERS; idx++) {
+ if (ctx->active_providers & (1 << idx)) {
+ assert(ctx->sample_providers[idx]);
+ if (ctx->sample_providers[idx]->enable)
+ ctx->sample_providers[idx]->enable(ctx, ring);
+ }
+ }
+ ctx->active_providers = 0; /* clear it for next frame */
+}
+
void
fd_hw_query_register_provider(struct pipe_context *pctx,
const struct fd_hw_sample_provider *provider)
diff --git a/src/gallium/drivers/freedreno/freedreno_query_hw.h b/src/gallium/drivers/freedreno/freedreno_query_hw.h
index 8f4b1f58ee5..8a5d114d806 100644
--- a/src/gallium/drivers/freedreno/freedreno_query_hw.h
+++ b/src/gallium/drivers/freedreno/freedreno_query_hw.h
@@ -76,6 +76,11 @@ struct fd_hw_sample_provider {
/* stages applicable to the query type: */
enum fd_render_stage active;
+ /* Optional hook for enabling a counter. Guaranteed to happen
+ * at least once before the first ->get_sample() in a batch.
+ */
+ void (*enable)(struct fd_context *ctx, struct fd_ringbuffer *ring);
+
/* when a new sample is required, emit appropriate cmdstream
* and return a sample object:
*/
@@ -144,6 +149,7 @@ void fd_hw_query_prepare_tile(struct fd_context *ctx, uint32_t n,
struct fd_ringbuffer *ring);
void fd_hw_query_set_stage(struct fd_context *ctx,
struct fd_ringbuffer *ring, enum fd_render_stage stage);
+void fd_hw_query_enable(struct fd_context *ctx, struct fd_ringbuffer *ring);
void fd_hw_query_register_provider(struct pipe_context *pctx,
const struct fd_hw_sample_provider *provider);
void fd_hw_query_init(struct pipe_context *pctx);
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
index 27f4d267438..2b3ecfe664e 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -298,12 +298,14 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
return is_a3xx(screen) ? 1 : 0;
/* Queries. */
- case PIPE_CAP_QUERY_TIME_ELAPSED:
case PIPE_CAP_QUERY_TIMESTAMP:
case PIPE_CAP_QUERY_BUFFER_OBJECT:
return 0;
case PIPE_CAP_OCCLUSION_QUERY:
return is_a3xx(screen) || is_a4xx(screen);
+ case PIPE_CAP_QUERY_TIME_ELAPSED:
+ /* only a4xx, requires new enough kernel so we know max_freq: */
+ return (screen->max_freq > 0) && is_a4xx(screen);
case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
case PIPE_CAP_MIN_TEXEL_OFFSET:
@@ -434,9 +436,12 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
return 16;
case PIPE_SHADER_CAP_PREFERRED_IR:
return PIPE_SHADER_IR_TGSI;
+ case PIPE_SHADER_CAP_SUPPORTED_IRS:
+ return 0;
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
return 32;
case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
+ case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
return 0;
}
debug_printf("unknown shader param %d\n", param);
@@ -534,6 +539,16 @@ fd_screen_create(struct fd_device *dev)
}
screen->device_id = val;
+ if (fd_pipe_get_param(screen->pipe, FD_MAX_FREQ, &val)) {
+ DBG("could not get gpu freq");
+ /* this limits what performance related queries are
+ * supported but is not fatal
+ */
+ screen->max_freq = 0;
+ } else {
+ screen->max_freq = val;
+ }
+
if (fd_pipe_get_param(screen->pipe, FD_GPU_ID, &val)) {
DBG("could not get gpu-id");
goto fail;
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.h b/src/gallium/drivers/freedreno/freedreno_screen.h
index 8fb096a10dd..a81c7786390 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.h
+++ b/src/gallium/drivers/freedreno/freedreno_screen.h
@@ -56,6 +56,7 @@ struct fd_screen {
uint32_t device_id;
uint32_t gpu_id; /* 220, 305, etc */
uint32_t chip_id; /* coreid:8 majorrev:8 minorrev:8 patch:8 */
+ uint32_t max_freq;
uint32_t max_rts; /* max # of render targets */
void *compiler; /* currently unused for a2xx */
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index ffa75775505..7a1812f2518 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -1365,7 +1365,6 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
struct ir3_block *b = ctx->block;
struct ir3_instruction **dst, *sam, *src0[12], *src1[4];
struct ir3_instruction **coord, *lod, *compare, *proj, **off, **ddx, **ddy;
- struct ir3_instruction *const_off[4];
bool has_bias = false, has_lod = false, has_proj = false, has_off = false;
unsigned i, coords, flags;
unsigned nsrc0 = 0, nsrc1 = 0;