diff options
Diffstat (limited to 'src/gallium')
35 files changed, 8955 insertions, 3 deletions
diff --git a/src/gallium/drivers/freedreno/Makefile.am b/src/gallium/drivers/freedreno/Makefile.am index 64dfda62399..b6dbbd19183 100644 --- a/src/gallium/drivers/freedreno/Makefile.am +++ b/src/gallium/drivers/freedreno/Makefile.am @@ -5,12 +5,13 @@ noinst_LTLIBRARIES = libfreedreno.la AM_CFLAGS = \ -Wno-packed-bitfield-compat \ -I$(top_srcdir)/src/gallium/drivers \ + -I$(top_srcdir)/src/gallium/drivers/freedreno/a3xx \ -I$(top_srcdir)/src/gallium/drivers/freedreno/a2xx \ $(GALLIUM_CFLAGS) \ $(FREEDRENO_CFLAGS) \ $(VISIBILITY_CFLAGS) -SUBDIRS = a2xx +SUBDIRS = a2xx a3xx libfreedreno_la_SOURCES = \ freedreno_util.c \ @@ -25,5 +26,6 @@ libfreedreno_la_SOURCES = \ freedreno_gmem.c libfreedreno_la_LIBADD = \ + a3xx/libfd3xx.la \ a2xx/libfd2xx.la diff --git a/src/gallium/drivers/freedreno/a3xx/Makefile.am b/src/gallium/drivers/freedreno/a3xx/Makefile.am new file mode 100644 index 00000000000..a7e415f7fbd --- /dev/null +++ b/src/gallium/drivers/freedreno/a3xx/Makefile.am @@ -0,0 +1,27 @@ +include $(top_srcdir)/src/gallium/Automake.inc + +noinst_LTLIBRARIES = libfd3xx.la + +AM_CFLAGS = \ + -Wno-packed-bitfield-compat \ + -I$(top_srcdir)/src/gallium/drivers \ + -I$(top_srcdir)/src/gallium/drivers/freedreno \ + $(GALLIUM_CFLAGS) \ + $(FREEDRENO_CFLAGS) \ + $(VISIBILITY_CFLAGS) + +libfd3xx_la_SOURCES = \ + fd3_blend.c \ + fd3_compiler.c \ + fd3_context.c \ + fd3_draw.c \ + fd3_emit.c \ + fd3_gmem.c \ + fd3_program.c \ + fd3_rasterizer.c \ + fd3_screen.c \ + fd3_texture.c \ + fd3_util.c \ + fd3_zsa.c \ + disasm-a3xx.c \ + ir-a3xx.c diff --git a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h new file mode 100644 index 00000000000..c7f5085d032 --- /dev/null +++ b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h @@ -0,0 +1,1838 @@ +#ifndef A3XX_XML +#define A3XX_XML + +/* Autogenerated file, DO NOT EDIT manually! + +This file was generated by the rules-ng-ng headergen tool in this git repository: +http://0x04.net/cgit/index.cgi/rules-ng-ng +git clone git://0x04.net/rules-ng-ng + +The rules-ng-ng source files this header was generated from are: +- /home/robclark/src/freedreno/envytools/rnndb/a3xx.xml ( 42578 bytes, from 2013-06-02 13:10:46) +- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2013-03-31 16:51:27) +- /home/robclark/src/freedreno/envytools/rnndb/adreno_common.xml ( 3094 bytes, from 2013-05-05 18:29:22) +- /home/robclark/src/freedreno/envytools/rnndb/adreno_pm4.xml ( 9712 bytes, from 2013-05-26 15:22:37) + +Copyright (C) 2013 by the following authors: +- Rob Clark <[email protected]> (robclark) + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + +enum a3xx_render_mode { + RB_RENDERING_PASS = 0, + RB_TILING_PASS = 1, + RB_RESOLVE_PASS = 2, +}; + +enum a3xx_tile_mode { + LINEAR = 0, + TILE_32X32 = 2, +}; + +enum a3xx_threadmode { + MULTI = 0, + SINGLE = 1, +}; + +enum a3xx_instrbuffermode { + BUFFER = 1, +}; + +enum a3xx_threadsize { + TWO_QUADS = 0, + FOUR_QUADS = 1, +}; + +enum a3xx_state_block_id { + HLSQ_BLOCK_ID_TP_TEX = 2, + HLSQ_BLOCK_ID_TP_MIPMAP = 3, + HLSQ_BLOCK_ID_SP_VS = 4, + HLSQ_BLOCK_ID_SP_FS = 6, +}; + +enum a3xx_cache_opcode { + INVALIDATE = 1, +}; + +enum a3xx_vtx_fmt { + VFMT_FLOAT_32 = 0, + VFMT_FLOAT_32_32 = 1, + VFMT_FLOAT_32_32_32 = 2, + VFMT_FLOAT_32_32_32_32 = 3, + VFMT_FLOAT_16 = 4, + VFMT_FLOAT_16_16 = 5, + VFMT_FLOAT_16_16_16 = 6, + VFMT_FLOAT_16_16_16_16 = 7, + VFMT_FIXED_32 = 8, + VFMT_FIXED_32_32 = 9, + VFMT_FIXED_32_32_32 = 10, + VFMT_FIXED_32_32_32_32 = 11, + VFMT_SHORT_16 = 16, + VFMT_SHORT_16_16 = 17, + VFMT_SHORT_16_16_16 = 18, + VFMT_SHORT_16_16_16_16 = 19, + VFMT_USHORT_16 = 20, + VFMT_USHORT_16_16 = 21, + VFMT_USHORT_16_16_16 = 22, + VFMT_USHORT_16_16_16_16 = 23, + VFMT_NORM_SHORT_16 = 24, + VFMT_NORM_SHORT_16_16 = 25, + VFMT_NORM_SHORT_16_16_16 = 26, + VFMT_NORM_SHORT_16_16_16_16 = 27, + VFMT_NORM_USHORT_16 = 28, + VFMT_NORM_USHORT_16_16 = 29, + VFMT_NORM_USHORT_16_16_16 = 30, + VFMT_NORM_USHORT_16_16_16_16 = 31, + VFMT_UBYTE_8 = 40, + VFMT_UBYTE_8_8 = 41, + VFMT_UBYTE_8_8_8 = 42, + VFMT_UBYTE_8_8_8_8 = 43, + VFMT_NORM_UBYTE_8 = 44, + VFMT_NORM_UBYTE_8_8 = 45, + VFMT_NORM_UBYTE_8_8_8 = 46, + VFMT_NORM_UBYTE_8_8_8_8 = 47, + VFMT_BYTE_8 = 48, + VFMT_BYTE_8_8 = 49, + VFMT_BYTE_8_8_8 = 50, + VFMT_BYTE_8_8_8_8 = 51, + VFMT_NORM_BYTE_8 = 52, + VFMT_NORM_BYTE_8_8 = 53, + VFMT_NORM_BYTE_8_8_8 = 54, + VFMT_NORM_BYTE_8_8_8_8 = 55, + VFMT_UINT_10_10_10_2 = 60, + VFMT_NORM_UINT_10_10_10_2 = 61, + VFMT_INT_10_10_10_2 = 62, + VFMT_NORM_INT_10_10_10_2 = 63, +}; + +enum a3xx_tex_fmt { + TFMT_NORM_USHORT_565 = 4, + TFMT_NORM_USHORT_5551 = 6, + TFMT_NORM_USHORT_4444 = 7, + TFMT_NORM_UINT_X8Z24 = 10, + TFMT_NORM_UINT_2_10_10_10 = 41, + TFMT_NORM_UINT_A8 = 44, + TFMT_NORM_UINT_L8_A8 = 47, + TFMT_NORM_UINT_8 = 48, + TFMT_NORM_UINT_8_8 = 49, + TFMT_NORM_UINT_8_8_8 = 50, + TFMT_NORM_UINT_8_8_8_8 = 51, + TFMT_FLOAT_16 = 64, + TFMT_FLOAT_16_16 = 65, + TFMT_FLOAT_16_16_16_16 = 67, + TFMT_FLOAT_32 = 84, + TFMT_FLOAT_32_32 = 85, + TFMT_FLOAT_32_32_32_32 = 87, +}; + +enum a3xx_tex_fetchsize { + TFETCH_DISABLE = 0, + TFETCH_1_BYTE = 1, + TFETCH_2_BYTE = 2, + TFETCH_4_BYTE = 3, + TFETCH_8_BYTE = 4, + TFETCH_16_BYTE = 5, +}; + +enum a3xx_color_fmt { + RB_R8G8B8_UNORM = 4, + RB_R8G8B8A8_UNORM = 8, + RB_Z16_UNORM = 12, + RB_A8_UNORM = 20, +}; + +enum a3xx_color_swap { + WZYX = 0, + WXYZ = 1, + ZYXW = 2, + XYZW = 3, +}; + +enum a3xx_msaa_samples { + MSAA_ONE = 0, + MSAA_TWO = 1, + MSAA_FOUR = 2, +}; + +enum a3xx_sp_perfcounter_select { + SP_FS_CFLOW_INSTRUCTIONS = 12, + SP_FS_FULL_ALU_INSTRUCTIONS = 14, + SP0_ICL1_MISSES = 26, + SP_ALU_ACTIVE_CYCLES = 29, +}; + +enum adreno_rb_copy_control_mode { + RB_COPY_RESOLVE = 1, + RB_COPY_DEPTH_STENCIL = 5, +}; + +enum a3xx_tex_filter { + A3XX_TEX_NEAREST = 0, + A3XX_TEX_LINEAR = 1, +}; + +enum a3xx_tex_clamp { + A3XX_TEX_REPEAT = 0, + A3XX_TEX_CLAMP_TO_EDGE = 1, + A3XX_TEX_MIRROR_REPEAT = 2, + A3XX_TEX_CLAMP_NONE = 3, +}; + +enum a3xx_tex_swiz { + A3XX_TEX_X = 0, + A3XX_TEX_Y = 1, + A3XX_TEX_Z = 2, + A3XX_TEX_W = 3, + A3XX_TEX_ZERO = 4, + A3XX_TEX_ONE = 5, +}; + +#define REG_A3XX_RBBM_HW_VERSION 0x00000000 + +#define REG_A3XX_RBBM_HW_RELEASE 0x00000001 + +#define REG_A3XX_RBBM_HW_CONFIGURATION 0x00000002 + +#define REG_A3XX_RBBM_CLOCK_CTL 0x00000010 + +#define REG_A3XX_RBBM_SP_HYST_CNT 0x00000012 + +#define REG_A3XX_RBBM_SW_RESET_CMD 0x00000018 + +#define REG_A3XX_RBBM_AHB_CTL0 0x00000020 + +#define REG_A3XX_RBBM_AHB_CTL1 0x00000021 + +#define REG_A3XX_RBBM_AHB_CMD 0x00000022 + +#define REG_A3XX_RBBM_AHB_ERROR_STATUS 0x00000027 + +#define REG_A3XX_RBBM_GPR0_CTL 0x0000002e + +#define REG_A3XX_RBBM_STATUS 0x00000030 + +#define REG_A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL 0x00000033 + +#define REG_A3XX_RBBM_INTERFACE_HANG_INT_CTL 0x00000050 + +#define REG_A3XX_RBBM_INTERFACE_HANG_MASK_CTL0 0x00000051 + +#define REG_A3XX_RBBM_INTERFACE_HANG_MASK_CTL1 0x00000054 + +#define REG_A3XX_RBBM_INTERFACE_HANG_MASK_CTL2 0x00000057 + +#define REG_A3XX_RBBM_INTERFACE_HANG_MASK_CTL3 0x0000005a + +#define REG_A3XX_RBBM_INT_CLEAR_CMD 0x00000061 + +#define REG_A3XX_RBBM_INT_0_MASK 0x00000063 + +#define REG_A3XX_RBBM_INT_0_STATUS 0x00000064 + +#define REG_A3XX_RBBM_PERFCTR_CTL 0x00000080 + +#define REG_A3XX_RBBM_GPU_BUSY_MASKED 0x00000088 + +#define REG_A3XX_RBBM_PERFCTR_SP_7_LO 0x000000e0 + +#define REG_A3XX_RBBM_PERFCTR_SP_7_HI 0x000000e1 + +#define REG_A3XX_RBBM_PERFCTR_PWR_1_LO 0x000000ec + +#define REG_A3XX_RBBM_PERFCTR_PWR_1_HI 0x000000ed + +#define REG_A3XX_RBBM_RBBM_CTL 0x00000100 + +#define REG_A3XX_RBBM_RBBM_CTL 0x00000100 + +#define REG_A3XX_RBBM_DEBUG_BUS_CTL 0x00000111 + +#define REG_A3XX_RBBM_DEBUG_BUS_DATA_STATUS 0x00000112 + +#define REG_A3XX_CP_PFP_UCODE_ADDR 0x000001c9 + +#define REG_A3XX_CP_PFP_UCODE_DATA 0x000001ca + +#define REG_A3XX_CP_ROQ_ADDR 0x000001cc + +#define REG_A3XX_CP_ROQ_DATA 0x000001cd + +#define REG_A3XX_CP_MERCIU_ADDR 0x000001d1 + +#define REG_A3XX_CP_MERCIU_DATA 0x000001d2 + +#define REG_A3XX_CP_MERCIU_DATA2 0x000001d3 + +#define REG_A3XX_CP_MEQ_ADDR 0x000001da + +#define REG_A3XX_CP_MEQ_DATA 0x000001db + +#define REG_A3XX_CP_HW_FAULT 0x0000045c + +#define REG_A3XX_CP_PROTECT_CTRL 0x0000045e + +#define REG_A3XX_CP_PROTECT_STATUS 0x0000045f + +#define REG_A3XX_CP_PROTECT(i0) (0x00000460 + 0x1*(i0)) + +#define REG_A3XX_CP_PROTECT_REG(i0) (0x00000460 + 0x1*(i0)) + +#define REG_A3XX_CP_AHB_FAULT 0x0000054d + +#define REG_A3XX_CP_SCRATCH_REG2 0x0000057a + +#define REG_A3XX_CP_SCRATCH_REG3 0x0000057b + +#define REG_A3XX_GRAS_CL_CLIP_CNTL 0x00002040 +#define A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER 0x00001000 +#define A3XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE 0x00010000 +#define A3XX_GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE 0x00020000 +#define A3XX_GRAS_CL_CLIP_CNTL_VP_CLIP_CODE_IGNORE 0x00080000 +#define A3XX_GRAS_CL_CLIP_CNTL_VP_XFORM_DISABLE 0x00100000 +#define A3XX_GRAS_CL_CLIP_CNTL_PERSP_DIVISION_DISABLE 0x00200000 + +#define REG_A3XX_GRAS_CL_GB_CLIP_ADJ 0x00002044 +#define A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ__MASK 0x000003ff +#define A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ__SHIFT 0 +static inline uint32_t A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ(uint32_t val) +{ + return ((val) << A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ__SHIFT) & A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ__MASK; +} +#define A3XX_GRAS_CL_GB_CLIP_ADJ_VERT__MASK 0x000ffc00 +#define A3XX_GRAS_CL_GB_CLIP_ADJ_VERT__SHIFT 10 +static inline uint32_t A3XX_GRAS_CL_GB_CLIP_ADJ_VERT(uint32_t val) +{ + return ((val) << A3XX_GRAS_CL_GB_CLIP_ADJ_VERT__SHIFT) & A3XX_GRAS_CL_GB_CLIP_ADJ_VERT__MASK; +} + +#define REG_A3XX_GRAS_CL_VPORT_XOFFSET 0x00002048 +#define A3XX_GRAS_CL_VPORT_XOFFSET__MASK 0xffffffff +#define A3XX_GRAS_CL_VPORT_XOFFSET__SHIFT 0 +static inline uint32_t A3XX_GRAS_CL_VPORT_XOFFSET(float val) +{ + return ((fui(val)) << A3XX_GRAS_CL_VPORT_XOFFSET__SHIFT) & A3XX_GRAS_CL_VPORT_XOFFSET__MASK; +} + +#define REG_A3XX_GRAS_CL_VPORT_XSCALE 0x00002049 +#define A3XX_GRAS_CL_VPORT_XSCALE__MASK 0xffffffff +#define A3XX_GRAS_CL_VPORT_XSCALE__SHIFT 0 +static inline uint32_t A3XX_GRAS_CL_VPORT_XSCALE(float val) +{ + return ((fui(val)) << A3XX_GRAS_CL_VPORT_XSCALE__SHIFT) & A3XX_GRAS_CL_VPORT_XSCALE__MASK; +} + +#define REG_A3XX_GRAS_CL_VPORT_YOFFSET 0x0000204a +#define A3XX_GRAS_CL_VPORT_YOFFSET__MASK 0xffffffff +#define A3XX_GRAS_CL_VPORT_YOFFSET__SHIFT 0 +static inline uint32_t A3XX_GRAS_CL_VPORT_YOFFSET(float val) +{ + return ((fui(val)) << A3XX_GRAS_CL_VPORT_YOFFSET__SHIFT) & A3XX_GRAS_CL_VPORT_YOFFSET__MASK; +} + +#define REG_A3XX_GRAS_CL_VPORT_YSCALE 0x0000204b +#define A3XX_GRAS_CL_VPORT_YSCALE__MASK 0xffffffff +#define A3XX_GRAS_CL_VPORT_YSCALE__SHIFT 0 +static inline uint32_t A3XX_GRAS_CL_VPORT_YSCALE(float val) +{ + return ((fui(val)) << A3XX_GRAS_CL_VPORT_YSCALE__SHIFT) & A3XX_GRAS_CL_VPORT_YSCALE__MASK; +} + +#define REG_A3XX_GRAS_CL_VPORT_ZOFFSET 0x0000204c +#define A3XX_GRAS_CL_VPORT_ZOFFSET__MASK 0xffffffff +#define A3XX_GRAS_CL_VPORT_ZOFFSET__SHIFT 0 +static inline uint32_t A3XX_GRAS_CL_VPORT_ZOFFSET(float val) +{ + return ((fui(val)) << A3XX_GRAS_CL_VPORT_ZOFFSET__SHIFT) & A3XX_GRAS_CL_VPORT_ZOFFSET__MASK; +} + +#define REG_A3XX_GRAS_CL_VPORT_ZSCALE 0x0000204d +#define A3XX_GRAS_CL_VPORT_ZSCALE__MASK 0xffffffff +#define A3XX_GRAS_CL_VPORT_ZSCALE__SHIFT 0 +static inline uint32_t A3XX_GRAS_CL_VPORT_ZSCALE(float val) +{ + return ((fui(val)) << A3XX_GRAS_CL_VPORT_ZSCALE__SHIFT) & A3XX_GRAS_CL_VPORT_ZSCALE__MASK; +} + +#define REG_A3XX_GRAS_SU_POINT_MINMAX 0x00002068 + +#define REG_A3XX_GRAS_SU_POINT_SIZE 0x00002069 + +#define REG_A3XX_GRAS_SU_POLY_OFFSET_SCALE 0x0000206c +#define A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__MASK 0x00ffffff +#define A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__SHIFT 0 +static inline uint32_t A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL(float val) +{ + return ((((uint32_t)(val * 40.0))) << A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__SHIFT) & A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__MASK; +} + +#define REG_A3XX_GRAS_SU_POLY_OFFSET_OFFSET 0x0000206d +#define A3XX_GRAS_SU_POLY_OFFSET_OFFSET__MASK 0xffffffff +#define A3XX_GRAS_SU_POLY_OFFSET_OFFSET__SHIFT 0 +static inline uint32_t A3XX_GRAS_SU_POLY_OFFSET_OFFSET(float val) +{ + return ((((uint32_t)(val * 44.0))) << A3XX_GRAS_SU_POLY_OFFSET_OFFSET__SHIFT) & A3XX_GRAS_SU_POLY_OFFSET_OFFSET__MASK; +} + +#define REG_A3XX_GRAS_SU_MODE_CONTROL 0x00002070 +#define A3XX_GRAS_SU_MODE_CONTROL_CULL_FRONT 0x00000001 +#define A3XX_GRAS_SU_MODE_CONTROL_CULL_BACK 0x00000002 +#define A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__MASK 0x000007fc +#define A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__SHIFT 2 +static inline uint32_t A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(uint32_t val) +{ + return ((val) << A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__SHIFT) & A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__MASK; +} +#define A3XX_GRAS_SU_MODE_CONTROL_POLY_OFFSET 0x00000800 + +#define REG_A3XX_GRAS_SC_CONTROL 0x00002072 +#define A3XX_GRAS_SC_CONTROL_RENDER_MODE__MASK 0x000000f0 +#define A3XX_GRAS_SC_CONTROL_RENDER_MODE__SHIFT 4 +static inline uint32_t A3XX_GRAS_SC_CONTROL_RENDER_MODE(enum a3xx_render_mode val) +{ + return ((val) << A3XX_GRAS_SC_CONTROL_RENDER_MODE__SHIFT) & A3XX_GRAS_SC_CONTROL_RENDER_MODE__MASK; +} +#define A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES__MASK 0x00000f00 +#define A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES__SHIFT 8 +static inline uint32_t A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(enum a3xx_msaa_samples val) +{ + return ((val) << A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES__SHIFT) & A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES__MASK; +} +#define A3XX_GRAS_SC_CONTROL_RASTER_MODE__MASK 0x0000f000 +#define A3XX_GRAS_SC_CONTROL_RASTER_MODE__SHIFT 12 +static inline uint32_t A3XX_GRAS_SC_CONTROL_RASTER_MODE(uint32_t val) +{ + return ((val) << A3XX_GRAS_SC_CONTROL_RASTER_MODE__SHIFT) & A3XX_GRAS_SC_CONTROL_RASTER_MODE__MASK; +} + +#define REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL 0x00002074 +#define A3XX_GRAS_SC_SCREEN_SCISSOR_TL_WINDOW_OFFSET_DISABLE 0x80000000 +#define A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X__MASK 0x00007fff +#define A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X__SHIFT 0 +static inline uint32_t A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(uint32_t val) +{ + return ((val) << A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X__SHIFT) & A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X__MASK; +} +#define A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y__MASK 0x7fff0000 +#define A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y__SHIFT 16 +static inline uint32_t A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(uint32_t val) +{ + return ((val) << A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y__SHIFT) & A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y__MASK; +} + +#define REG_A3XX_GRAS_SC_SCREEN_SCISSOR_BR 0x00002075 +#define A3XX_GRAS_SC_SCREEN_SCISSOR_BR_WINDOW_OFFSET_DISABLE 0x80000000 +#define A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X__MASK 0x00007fff +#define A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X__SHIFT 0 +static inline uint32_t A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(uint32_t val) +{ + return ((val) << A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X__SHIFT) & A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X__MASK; +} +#define A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y__MASK 0x7fff0000 +#define A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y__SHIFT 16 +static inline uint32_t A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(uint32_t val) +{ + return ((val) << A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y__SHIFT) & A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y__MASK; +} + +#define REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL 0x00002079 +#define A3XX_GRAS_SC_WINDOW_SCISSOR_TL_WINDOW_OFFSET_DISABLE 0x80000000 +#define A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X__MASK 0x00007fff +#define A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X__SHIFT 0 +static inline uint32_t A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(uint32_t val) +{ + return ((val) << A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X__SHIFT) & A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X__MASK; +} +#define A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__MASK 0x7fff0000 +#define A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__SHIFT 16 +static inline uint32_t A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(uint32_t val) +{ + return ((val) << A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__SHIFT) & A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__MASK; +} + +#define REG_A3XX_GRAS_SC_WINDOW_SCISSOR_BR 0x0000207a +#define A3XX_GRAS_SC_WINDOW_SCISSOR_BR_WINDOW_OFFSET_DISABLE 0x80000000 +#define A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X__MASK 0x00007fff +#define A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X__SHIFT 0 +static inline uint32_t A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(uint32_t val) +{ + return ((val) << A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X__SHIFT) & A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X__MASK; +} +#define A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__MASK 0x7fff0000 +#define A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__SHIFT 16 +static inline uint32_t A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(uint32_t val) +{ + return ((val) << A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__SHIFT) & A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__MASK; +} + +#define REG_A3XX_RB_MODE_CONTROL 0x000020c0 +#define A3XX_RB_MODE_CONTROL_GMEM_BYPASS 0x00000080 +#define A3XX_RB_MODE_CONTROL_RENDER_MODE__MASK 0x00000700 +#define A3XX_RB_MODE_CONTROL_RENDER_MODE__SHIFT 8 +static inline uint32_t A3XX_RB_MODE_CONTROL_RENDER_MODE(enum a3xx_render_mode val) +{ + return ((val) << A3XX_RB_MODE_CONTROL_RENDER_MODE__SHIFT) & A3XX_RB_MODE_CONTROL_RENDER_MODE__MASK; +} +#define A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE 0x00008000 +#define A3XX_RB_MODE_CONTROL_PACKER_TIMER_ENABLE 0x00010000 + +#define REG_A3XX_RB_RENDER_CONTROL 0x000020c1 +#define A3XX_RB_RENDER_CONTROL_BIN_WIDTH__MASK 0x00000ff0 +#define A3XX_RB_RENDER_CONTROL_BIN_WIDTH__SHIFT 4 +static inline uint32_t A3XX_RB_RENDER_CONTROL_BIN_WIDTH(uint32_t val) +{ + return ((val >> 5) << A3XX_RB_RENDER_CONTROL_BIN_WIDTH__SHIFT) & A3XX_RB_RENDER_CONTROL_BIN_WIDTH__MASK; +} +#define A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE 0x00001000 +#define A3XX_RB_RENDER_CONTROL_ENABLE_GMEM 0x00002000 +#define A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__MASK 0x07000000 +#define A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__SHIFT 24 +static inline uint32_t A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(enum adreno_compare_func val) +{ + return ((val) << A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__SHIFT) & A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__MASK; +} + +#define REG_A3XX_RB_MSAA_CONTROL 0x000020c2 +#define A3XX_RB_MSAA_CONTROL_DISABLE 0x00000400 +#define A3XX_RB_MSAA_CONTROL_SAMPLES__MASK 0x0000f000 +#define A3XX_RB_MSAA_CONTROL_SAMPLES__SHIFT 12 +static inline uint32_t A3XX_RB_MSAA_CONTROL_SAMPLES(enum a3xx_msaa_samples val) +{ + return ((val) << A3XX_RB_MSAA_CONTROL_SAMPLES__SHIFT) & A3XX_RB_MSAA_CONTROL_SAMPLES__MASK; +} +#define A3XX_RB_MSAA_CONTROL_SAMPLE_MASK__MASK 0xffff0000 +#define A3XX_RB_MSAA_CONTROL_SAMPLE_MASK__SHIFT 16 +static inline uint32_t A3XX_RB_MSAA_CONTROL_SAMPLE_MASK(uint32_t val) +{ + return ((val) << A3XX_RB_MSAA_CONTROL_SAMPLE_MASK__SHIFT) & A3XX_RB_MSAA_CONTROL_SAMPLE_MASK__MASK; +} + +#define REG_A3XX_UNKNOWN_20C3 0x000020c3 + +#define REG_A3XX_RB_MRT(i0) (0x000020c4 + 0x4*(i0)) + +#define REG_A3XX_RB_MRT_CONTROL(i0) (0x000020c4 + 0x4*(i0)) +#define A3XX_RB_MRT_CONTROL_READ_DEST_ENABLE 0x00000008 +#define A3XX_RB_MRT_CONTROL_BLEND 0x00000010 +#define A3XX_RB_MRT_CONTROL_BLEND2 0x00000020 +#define A3XX_RB_MRT_CONTROL_ROP_CODE__MASK 0x00000f00 +#define A3XX_RB_MRT_CONTROL_ROP_CODE__SHIFT 8 +static inline uint32_t A3XX_RB_MRT_CONTROL_ROP_CODE(uint32_t val) +{ + return ((val) << A3XX_RB_MRT_CONTROL_ROP_CODE__SHIFT) & A3XX_RB_MRT_CONTROL_ROP_CODE__MASK; +} +#define A3XX_RB_MRT_CONTROL_DITHER_MODE__MASK 0x00003000 +#define A3XX_RB_MRT_CONTROL_DITHER_MODE__SHIFT 12 +static inline uint32_t A3XX_RB_MRT_CONTROL_DITHER_MODE(enum adreno_rb_dither_mode val) +{ + return ((val) << A3XX_RB_MRT_CONTROL_DITHER_MODE__SHIFT) & A3XX_RB_MRT_CONTROL_DITHER_MODE__MASK; +} +#define A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK 0x0f000000 +#define A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT 24 +static inline uint32_t A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(uint32_t val) +{ + return ((val) << A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT) & A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK; +} + +#define REG_A3XX_RB_MRT_BUF_INFO(i0) (0x000020c5 + 0x4*(i0)) +#define A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK 0x0000003f +#define A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT__SHIFT 0 +static inline uint32_t A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT(enum a3xx_color_fmt val) +{ + return ((val) << A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT__SHIFT) & A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK; +} +#define A3XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__MASK 0x000000c0 +#define A3XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__SHIFT 6 +static inline uint32_t A3XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(enum a3xx_tile_mode val) +{ + return ((val) << A3XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__SHIFT) & A3XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__MASK; +} +#define A3XX_RB_MRT_BUF_INFO_COLOR_SWAP__MASK 0x00000c00 +#define A3XX_RB_MRT_BUF_INFO_COLOR_SWAP__SHIFT 10 +static inline uint32_t A3XX_RB_MRT_BUF_INFO_COLOR_SWAP(enum a3xx_color_swap val) +{ + return ((val) << A3XX_RB_MRT_BUF_INFO_COLOR_SWAP__SHIFT) & A3XX_RB_MRT_BUF_INFO_COLOR_SWAP__MASK; +} +#define A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__MASK 0xfffe0000 +#define A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__SHIFT 17 +static inline uint32_t A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(uint32_t val) +{ + return ((val >> 5) << A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__SHIFT) & A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__MASK; +} + +#define REG_A3XX_RB_MRT_BUF_BASE(i0) (0x000020c6 + 0x4*(i0)) +#define A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE__MASK 0xfffffff0 +#define A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE__SHIFT 4 +static inline uint32_t A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE(uint32_t val) +{ + return ((val >> 5) << A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE__SHIFT) & A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE__MASK; +} + +#define REG_A3XX_RB_MRT_BLEND_CONTROL(i0) (0x000020c7 + 0x4*(i0)) +#define A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__MASK 0x0000001f +#define A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__SHIFT 0 +static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(enum adreno_rb_blend_factor val) +{ + return ((val) << A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__SHIFT) & A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__MASK; +} +#define A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__MASK 0x000000e0 +#define A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__SHIFT 5 +static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(enum adreno_rb_blend_opcode val) +{ + return ((val) << A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__SHIFT) & A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__MASK; +} +#define A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__MASK 0x00001f00 +#define A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__SHIFT 8 +static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(enum adreno_rb_blend_factor val) +{ + return ((val) << A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__SHIFT) & A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__MASK; +} +#define A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__MASK 0x001f0000 +#define A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__SHIFT 16 +static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(enum adreno_rb_blend_factor val) +{ + return ((val) << A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__SHIFT) & A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__MASK; +} +#define A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__MASK 0x00e00000 +#define A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__SHIFT 21 +static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(enum adreno_rb_blend_opcode val) +{ + return ((val) << A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__SHIFT) & A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__MASK; +} +#define A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__MASK 0x1f000000 +#define A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__SHIFT 24 +static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(enum adreno_rb_blend_factor val) +{ + return ((val) << A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__SHIFT) & A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__MASK; +} +#define A3XX_RB_MRT_BLEND_CONTROL_CLAMP_ENABLE 0x20000000 + +#define REG_A3XX_RB_BLEND_RED 0x000020e4 + +#define REG_A3XX_RB_BLEND_GREEN 0x000020e5 + +#define REG_A3XX_RB_BLEND_BLUE 0x000020e6 + +#define REG_A3XX_RB_BLEND_ALPHA 0x000020e7 + +#define REG_A3XX_UNKNOWN_20E8 0x000020e8 + +#define REG_A3XX_UNKNOWN_20E9 0x000020e9 + +#define REG_A3XX_UNKNOWN_20EA 0x000020ea + +#define REG_A3XX_UNKNOWN_20EB 0x000020eb + +#define REG_A3XX_RB_COPY_CONTROL 0x000020ec +#define A3XX_RB_COPY_CONTROL_MSAA_RESOLVE__MASK 0x00000003 +#define A3XX_RB_COPY_CONTROL_MSAA_RESOLVE__SHIFT 0 +static inline uint32_t A3XX_RB_COPY_CONTROL_MSAA_RESOLVE(enum a3xx_msaa_samples val) +{ + return ((val) << A3XX_RB_COPY_CONTROL_MSAA_RESOLVE__SHIFT) & A3XX_RB_COPY_CONTROL_MSAA_RESOLVE__MASK; +} +#define A3XX_RB_COPY_CONTROL_MODE__MASK 0x00000070 +#define A3XX_RB_COPY_CONTROL_MODE__SHIFT 4 +static inline uint32_t A3XX_RB_COPY_CONTROL_MODE(enum adreno_rb_copy_control_mode val) +{ + return ((val) << A3XX_RB_COPY_CONTROL_MODE__SHIFT) & A3XX_RB_COPY_CONTROL_MODE__MASK; +} +#define A3XX_RB_COPY_CONTROL_GMEM_BASE__MASK 0xfffffc00 +#define A3XX_RB_COPY_CONTROL_GMEM_BASE__SHIFT 10 +static inline uint32_t A3XX_RB_COPY_CONTROL_GMEM_BASE(uint32_t val) +{ + return ((val >> 10) << A3XX_RB_COPY_CONTROL_GMEM_BASE__SHIFT) & A3XX_RB_COPY_CONTROL_GMEM_BASE__MASK; +} + +#define REG_A3XX_RB_COPY_DEST_BASE 0x000020ed +#define A3XX_RB_COPY_DEST_BASE_BASE__MASK 0xfffffff0 +#define A3XX_RB_COPY_DEST_BASE_BASE__SHIFT 4 +static inline uint32_t A3XX_RB_COPY_DEST_BASE_BASE(uint32_t val) +{ + return ((val >> 5) << A3XX_RB_COPY_DEST_BASE_BASE__SHIFT) & A3XX_RB_COPY_DEST_BASE_BASE__MASK; +} + +#define REG_A3XX_RB_COPY_DEST_PITCH 0x000020ee +#define A3XX_RB_COPY_DEST_PITCH_PITCH__MASK 0xffffffff +#define A3XX_RB_COPY_DEST_PITCH_PITCH__SHIFT 0 +static inline uint32_t A3XX_RB_COPY_DEST_PITCH_PITCH(uint32_t val) +{ + return ((val >> 5) << A3XX_RB_COPY_DEST_PITCH_PITCH__SHIFT) & A3XX_RB_COPY_DEST_PITCH_PITCH__MASK; +} + +#define REG_A3XX_RB_COPY_DEST_INFO 0x000020ef +#define A3XX_RB_COPY_DEST_INFO_TILE__MASK 0x00000003 +#define A3XX_RB_COPY_DEST_INFO_TILE__SHIFT 0 +static inline uint32_t A3XX_RB_COPY_DEST_INFO_TILE(enum a3xx_tile_mode val) +{ + return ((val) << A3XX_RB_COPY_DEST_INFO_TILE__SHIFT) & A3XX_RB_COPY_DEST_INFO_TILE__MASK; +} +#define A3XX_RB_COPY_DEST_INFO_FORMAT__MASK 0x000000fc +#define A3XX_RB_COPY_DEST_INFO_FORMAT__SHIFT 2 +static inline uint32_t A3XX_RB_COPY_DEST_INFO_FORMAT(enum a3xx_color_fmt val) +{ + return ((val) << A3XX_RB_COPY_DEST_INFO_FORMAT__SHIFT) & A3XX_RB_COPY_DEST_INFO_FORMAT__MASK; +} +#define A3XX_RB_COPY_DEST_INFO_SWAP__MASK 0x00000300 +#define A3XX_RB_COPY_DEST_INFO_SWAP__SHIFT 8 +static inline uint32_t A3XX_RB_COPY_DEST_INFO_SWAP(enum a3xx_color_swap val) +{ + return ((val) << A3XX_RB_COPY_DEST_INFO_SWAP__SHIFT) & A3XX_RB_COPY_DEST_INFO_SWAP__MASK; +} +#define A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__MASK 0x0003c000 +#define A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__SHIFT 14 +static inline uint32_t A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(uint32_t val) +{ + return ((val) << A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__SHIFT) & A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__MASK; +} +#define A3XX_RB_COPY_DEST_INFO_ENDIAN__MASK 0x001c0000 +#define A3XX_RB_COPY_DEST_INFO_ENDIAN__SHIFT 18 +static inline uint32_t A3XX_RB_COPY_DEST_INFO_ENDIAN(enum adreno_rb_surface_endian val) +{ + return ((val) << A3XX_RB_COPY_DEST_INFO_ENDIAN__SHIFT) & A3XX_RB_COPY_DEST_INFO_ENDIAN__MASK; +} + +#define REG_A3XX_RB_DEPTH_CONTROL 0x00002100 +#define A3XX_RB_DEPTH_CONTROL_Z_ENABLE 0x00000002 +#define A3XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE 0x00000004 +#define A3XX_RB_DEPTH_CONTROL_EARLY_Z_ENABLE 0x00000008 +#define A3XX_RB_DEPTH_CONTROL_ZFUNC__MASK 0x00000070 +#define A3XX_RB_DEPTH_CONTROL_ZFUNC__SHIFT 4 +static inline uint32_t A3XX_RB_DEPTH_CONTROL_ZFUNC(enum adreno_compare_func val) +{ + return ((val) << A3XX_RB_DEPTH_CONTROL_ZFUNC__SHIFT) & A3XX_RB_DEPTH_CONTROL_ZFUNC__MASK; +} +#define A3XX_RB_DEPTH_CONTROL_BF_ENABLE 0x00000080 +#define A3XX_RB_DEPTH_CONTROL_Z_TEST_ENABLE 0x80000000 + +#define REG_A3XX_UNKNOWN_2101 0x00002101 + +#define REG_A3XX_RB_DEPTH_INFO 0x00002102 +#define A3XX_RB_DEPTH_INFO_DEPTH_FORMAT__MASK 0x00000001 +#define A3XX_RB_DEPTH_INFO_DEPTH_FORMAT__SHIFT 0 +static inline uint32_t A3XX_RB_DEPTH_INFO_DEPTH_FORMAT(enum adreno_rb_depth_format val) +{ + return ((val) << A3XX_RB_DEPTH_INFO_DEPTH_FORMAT__SHIFT) & A3XX_RB_DEPTH_INFO_DEPTH_FORMAT__MASK; +} +#define A3XX_RB_DEPTH_INFO_DEPTH_BASE__MASK 0xfffff800 +#define A3XX_RB_DEPTH_INFO_DEPTH_BASE__SHIFT 11 +static inline uint32_t A3XX_RB_DEPTH_INFO_DEPTH_BASE(uint32_t val) +{ + return ((val >> 10) << A3XX_RB_DEPTH_INFO_DEPTH_BASE__SHIFT) & A3XX_RB_DEPTH_INFO_DEPTH_BASE__MASK; +} + +#define REG_A3XX_RB_DEPTH_PITCH 0x00002103 +#define A3XX_RB_DEPTH_PITCH__MASK 0xffffffff +#define A3XX_RB_DEPTH_PITCH__SHIFT 0 +static inline uint32_t A3XX_RB_DEPTH_PITCH(uint32_t val) +{ + return ((val >> 3) << A3XX_RB_DEPTH_PITCH__SHIFT) & A3XX_RB_DEPTH_PITCH__MASK; +} + +#define REG_A3XX_RB_STENCIL_CONTROL 0x00002104 +#define A3XX_RB_STENCIL_CONTROL_STENCIL_ENABLE 0x00000001 +#define A3XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF 0x00000004 +#define A3XX_RB_STENCIL_CONTROL_FUNC__MASK 0x00000700 +#define A3XX_RB_STENCIL_CONTROL_FUNC__SHIFT 8 +static inline uint32_t A3XX_RB_STENCIL_CONTROL_FUNC(enum adreno_compare_func val) +{ + return ((val) << A3XX_RB_STENCIL_CONTROL_FUNC__SHIFT) & A3XX_RB_STENCIL_CONTROL_FUNC__MASK; +} +#define A3XX_RB_STENCIL_CONTROL_FAIL__MASK 0x00003800 +#define A3XX_RB_STENCIL_CONTROL_FAIL__SHIFT 11 +static inline uint32_t A3XX_RB_STENCIL_CONTROL_FAIL(enum adreno_stencil_op val) +{ + return ((val) << A3XX_RB_STENCIL_CONTROL_FAIL__SHIFT) & A3XX_RB_STENCIL_CONTROL_FAIL__MASK; +} +#define A3XX_RB_STENCIL_CONTROL_ZPASS__MASK 0x0001c000 +#define A3XX_RB_STENCIL_CONTROL_ZPASS__SHIFT 14 +static inline uint32_t A3XX_RB_STENCIL_CONTROL_ZPASS(enum adreno_stencil_op val) +{ + return ((val) << A3XX_RB_STENCIL_CONTROL_ZPASS__SHIFT) & A3XX_RB_STENCIL_CONTROL_ZPASS__MASK; +} +#define A3XX_RB_STENCIL_CONTROL_ZFAIL__MASK 0x000e0000 +#define A3XX_RB_STENCIL_CONTROL_ZFAIL__SHIFT 17 +static inline uint32_t A3XX_RB_STENCIL_CONTROL_ZFAIL(enum adreno_stencil_op val) +{ + return ((val) << A3XX_RB_STENCIL_CONTROL_ZFAIL__SHIFT) & A3XX_RB_STENCIL_CONTROL_ZFAIL__MASK; +} +#define A3XX_RB_STENCIL_CONTROL_FUNC_BF__MASK 0x00700000 +#define A3XX_RB_STENCIL_CONTROL_FUNC_BF__SHIFT 20 +static inline uint32_t A3XX_RB_STENCIL_CONTROL_FUNC_BF(enum adreno_compare_func val) +{ + return ((val) << A3XX_RB_STENCIL_CONTROL_FUNC_BF__SHIFT) & A3XX_RB_STENCIL_CONTROL_FUNC_BF__MASK; +} +#define A3XX_RB_STENCIL_CONTROL_FAIL_BF__MASK 0x03800000 +#define A3XX_RB_STENCIL_CONTROL_FAIL_BF__SHIFT 23 +static inline uint32_t A3XX_RB_STENCIL_CONTROL_FAIL_BF(enum adreno_stencil_op val) +{ + return ((val) << A3XX_RB_STENCIL_CONTROL_FAIL_BF__SHIFT) & A3XX_RB_STENCIL_CONTROL_FAIL_BF__MASK; +} +#define A3XX_RB_STENCIL_CONTROL_ZPASS_BF__MASK 0x1c000000 +#define A3XX_RB_STENCIL_CONTROL_ZPASS_BF__SHIFT 26 +static inline uint32_t A3XX_RB_STENCIL_CONTROL_ZPASS_BF(enum adreno_stencil_op val) +{ + return ((val) << A3XX_RB_STENCIL_CONTROL_ZPASS_BF__SHIFT) & A3XX_RB_STENCIL_CONTROL_ZPASS_BF__MASK; +} +#define A3XX_RB_STENCIL_CONTROL_ZFAIL_BF__MASK 0xe0000000 +#define A3XX_RB_STENCIL_CONTROL_ZFAIL_BF__SHIFT 29 +static inline uint32_t A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(enum adreno_stencil_op val) +{ + return ((val) << A3XX_RB_STENCIL_CONTROL_ZFAIL_BF__SHIFT) & A3XX_RB_STENCIL_CONTROL_ZFAIL_BF__MASK; +} + +#define REG_A3XX_UNKNOWN_2105 0x00002105 + +#define REG_A3XX_UNKNOWN_2106 0x00002106 + +#define REG_A3XX_UNKNOWN_2107 0x00002107 + +#define REG_A3XX_RB_STENCILREFMASK 0x00002108 +#define A3XX_RB_STENCILREFMASK_STENCILREF__MASK 0x000000ff +#define A3XX_RB_STENCILREFMASK_STENCILREF__SHIFT 0 +static inline uint32_t A3XX_RB_STENCILREFMASK_STENCILREF(uint32_t val) +{ + return ((val) << A3XX_RB_STENCILREFMASK_STENCILREF__SHIFT) & A3XX_RB_STENCILREFMASK_STENCILREF__MASK; +} +#define A3XX_RB_STENCILREFMASK_STENCILMASK__MASK 0x0000ff00 +#define A3XX_RB_STENCILREFMASK_STENCILMASK__SHIFT 8 +static inline uint32_t A3XX_RB_STENCILREFMASK_STENCILMASK(uint32_t val) +{ + return ((val) << A3XX_RB_STENCILREFMASK_STENCILMASK__SHIFT) & A3XX_RB_STENCILREFMASK_STENCILMASK__MASK; +} +#define A3XX_RB_STENCILREFMASK_STENCILWRITEMASK__MASK 0x00ff0000 +#define A3XX_RB_STENCILREFMASK_STENCILWRITEMASK__SHIFT 16 +static inline uint32_t A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(uint32_t val) +{ + return ((val) << A3XX_RB_STENCILREFMASK_STENCILWRITEMASK__SHIFT) & A3XX_RB_STENCILREFMASK_STENCILWRITEMASK__MASK; +} + +#define REG_A3XX_RB_STENCILREFMASK_BF 0x00002109 +#define A3XX_RB_STENCILREFMASK_BF_STENCILREF__MASK 0x000000ff +#define A3XX_RB_STENCILREFMASK_BF_STENCILREF__SHIFT 0 +static inline uint32_t A3XX_RB_STENCILREFMASK_BF_STENCILREF(uint32_t val) +{ + return ((val) << A3XX_RB_STENCILREFMASK_BF_STENCILREF__SHIFT) & A3XX_RB_STENCILREFMASK_BF_STENCILREF__MASK; +} +#define A3XX_RB_STENCILREFMASK_BF_STENCILMASK__MASK 0x0000ff00 +#define A3XX_RB_STENCILREFMASK_BF_STENCILMASK__SHIFT 8 +static inline uint32_t A3XX_RB_STENCILREFMASK_BF_STENCILMASK(uint32_t val) +{ + return ((val) << A3XX_RB_STENCILREFMASK_BF_STENCILMASK__SHIFT) & A3XX_RB_STENCILREFMASK_BF_STENCILMASK__MASK; +} +#define A3XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__MASK 0x00ff0000 +#define A3XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__SHIFT 16 +static inline uint32_t A3XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(uint32_t val) +{ + return ((val) << A3XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__SHIFT) & A3XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__MASK; +} + +#define REG_A3XX_PA_SC_WINDOW_OFFSET 0x0000210e +#define A3XX_PA_SC_WINDOW_OFFSET_X__MASK 0x0000ffff +#define A3XX_PA_SC_WINDOW_OFFSET_X__SHIFT 0 +static inline uint32_t A3XX_PA_SC_WINDOW_OFFSET_X(uint32_t val) +{ + return ((val) << A3XX_PA_SC_WINDOW_OFFSET_X__SHIFT) & A3XX_PA_SC_WINDOW_OFFSET_X__MASK; +} +#define A3XX_PA_SC_WINDOW_OFFSET_Y__MASK 0xffff0000 +#define A3XX_PA_SC_WINDOW_OFFSET_Y__SHIFT 16 +static inline uint32_t A3XX_PA_SC_WINDOW_OFFSET_Y(uint32_t val) +{ + return ((val) << A3XX_PA_SC_WINDOW_OFFSET_Y__SHIFT) & A3XX_PA_SC_WINDOW_OFFSET_Y__MASK; +} + +#define REG_A3XX_PC_VSTREAM_CONTROL 0x000021e4 + +#define REG_A3XX_PC_VERTEX_REUSE_BLOCK_CNTL 0x000021ea + +#define REG_A3XX_PC_PRIM_VTX_CNTL 0x000021ec +#define A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC__MASK 0x0000001f +#define A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC__SHIFT 0 +static inline uint32_t A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(uint32_t val) +{ + return ((val) << A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC__SHIFT) & A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC__MASK; +} +#define A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE__MASK 0x000000e0 +#define A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE__SHIFT 5 +static inline uint32_t A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(enum adreno_pa_su_sc_draw val) +{ + return ((val) << A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE__SHIFT) & A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE__MASK; +} +#define A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE__MASK 0x00000700 +#define A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE__SHIFT 8 +static inline uint32_t A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(enum adreno_pa_su_sc_draw val) +{ + return ((val) << A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE__SHIFT) & A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE__MASK; +} +#define A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST 0x02000000 + +#define REG_A3XX_PC_RESTART_INDEX 0x000021ed + +#define REG_A3XX_HLSQ_CONTROL_0_REG 0x00002200 +#define A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK 0x00000010 +#define A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT 4 +static inline uint32_t A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(enum a3xx_threadsize val) +{ + return ((val) << A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT) & A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK; +} +#define A3XX_HLSQ_CONTROL_0_REG_FSSUPERTHREADENABLE 0x00000040 +#define A3XX_HLSQ_CONTROL_0_REG_SPSHADERRESTART 0x00000200 +#define A3XX_HLSQ_CONTROL_0_REG_RESERVED2 0x00000400 +#define A3XX_HLSQ_CONTROL_0_REG_CHUNKDISABLE 0x04000000 +#define A3XX_HLSQ_CONTROL_0_REG_CONSTSWITCHMODE 0x08000000 +#define A3XX_HLSQ_CONTROL_0_REG_LAZYUPDATEDISABLE 0x10000000 +#define A3XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE 0x20000000 +#define A3XX_HLSQ_CONTROL_0_REG_TPFULLUPDATE 0x40000000 +#define A3XX_HLSQ_CONTROL_0_REG_SINGLECONTEXT 0x80000000 + +#define REG_A3XX_HLSQ_CONTROL_1_REG 0x00002201 +#define A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__MASK 0x00000040 +#define A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__SHIFT 6 +static inline uint32_t A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(enum a3xx_threadsize val) +{ + return ((val) << A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__SHIFT) & A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__MASK; +} +#define A3XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE 0x00000100 +#define A3XX_HLSQ_CONTROL_1_REG_RESERVED1 0x00000200 + +#define REG_A3XX_HLSQ_CONTROL_2_REG 0x00002202 +#define A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__MASK 0xfc000000 +#define A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__SHIFT 26 +static inline uint32_t A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(uint32_t val) +{ + return ((val) << A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__SHIFT) & A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__MASK; +} + +#define REG_A3XX_HLSQ_CONTROL_3_REG 0x00002203 + +#define REG_A3XX_HLSQ_VS_CONTROL_REG 0x00002204 +#define A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__MASK 0x00000fff +#define A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__SHIFT 0 +static inline uint32_t A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(uint32_t val) +{ + return ((val) << A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__SHIFT) & A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__MASK; +} +#define A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET__MASK 0x00fff000 +#define A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET__SHIFT 12 +static inline uint32_t A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET(uint32_t val) +{ + return ((val) << A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET__SHIFT) & A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET__MASK; +} +#define A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH__MASK 0xff000000 +#define A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH__SHIFT 24 +static inline uint32_t A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH(uint32_t val) +{ + return ((val) << A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH__SHIFT) & A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH__MASK; +} + +#define REG_A3XX_HLSQ_FS_CONTROL_REG 0x00002205 +#define A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__MASK 0x00000fff +#define A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__SHIFT 0 +static inline uint32_t A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH(uint32_t val) +{ + return ((val) << A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__SHIFT) & A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__MASK; +} +#define A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET__MASK 0x00fff000 +#define A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET__SHIFT 12 +static inline uint32_t A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET(uint32_t val) +{ + return ((val) << A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET__SHIFT) & A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET__MASK; +} +#define A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH__MASK 0xff000000 +#define A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH__SHIFT 24 +static inline uint32_t A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH(uint32_t val) +{ + return ((val) << A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH__SHIFT) & A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH__MASK; +} + +#define REG_A3XX_HLSQ_CONST_VSPRESV_RANGE_REG 0x00002206 +#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY__MASK 0x0000ffff +#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY__SHIFT 0 +static inline uint32_t A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY(uint32_t val) +{ + return ((val) << A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY__SHIFT) & A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY__MASK; +} +#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY__MASK 0xffff0000 +#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY__SHIFT 16 +static inline uint32_t A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY(uint32_t val) +{ + return ((val) << A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY__SHIFT) & A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY__MASK; +} + +#define REG_A3XX_HLSQ_CONST_FSPRESV_RANGE_REG 0x00002207 +#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY__MASK 0x0000ffff +#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY__SHIFT 0 +static inline uint32_t A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY(uint32_t val) +{ + return ((val) << A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY__SHIFT) & A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY__MASK; +} +#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY__MASK 0xffff0000 +#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY__SHIFT 16 +static inline uint32_t A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY(uint32_t val) +{ + return ((val) << A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY__SHIFT) & A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY__MASK; +} + +#define REG_A3XX_HLSQ_CL_NDRANGE_0_REG 0x0000220a + +#define REG_A3XX_HLSQ_CL_NDRANGE_1_REG 0x0000220b + +#define REG_A3XX_HLSQ_CL_NDRANGE_2_REG 0x0000220c + +#define REG_A3XX_HLSQ_CL_CONTROL_0_REG 0x00002211 + +#define REG_A3XX_HLSQ_CL_CONTROL_1_REG 0x00002212 + +#define REG_A3XX_HLSQ_CL_KERNEL_CONST_REG 0x00002214 + +#define REG_A3XX_HLSQ_CL_KERNEL_GROUP_X_REG 0x00002215 + +#define REG_A3XX_HLSQ_CL_KERNEL_GROUP_Z_REG 0x00002217 + +#define REG_A3XX_HLSQ_CL_WG_OFFSET_REG 0x0000221a + +#define REG_A3XX_VFD_CONTROL_0 0x00002240 +#define A3XX_VFD_CONTROL_0_TOTALATTRTOVS__MASK 0x0003ffff +#define A3XX_VFD_CONTROL_0_TOTALATTRTOVS__SHIFT 0 +static inline uint32_t A3XX_VFD_CONTROL_0_TOTALATTRTOVS(uint32_t val) +{ + return ((val) << A3XX_VFD_CONTROL_0_TOTALATTRTOVS__SHIFT) & A3XX_VFD_CONTROL_0_TOTALATTRTOVS__MASK; +} +#define A3XX_VFD_CONTROL_0_PACKETSIZE__MASK 0x003c0000 +#define A3XX_VFD_CONTROL_0_PACKETSIZE__SHIFT 18 +static inline uint32_t A3XX_VFD_CONTROL_0_PACKETSIZE(uint32_t val) +{ + return ((val) << A3XX_VFD_CONTROL_0_PACKETSIZE__SHIFT) & A3XX_VFD_CONTROL_0_PACKETSIZE__MASK; +} +#define A3XX_VFD_CONTROL_0_STRMDECINSTRCNT__MASK 0x07c00000 +#define A3XX_VFD_CONTROL_0_STRMDECINSTRCNT__SHIFT 22 +static inline uint32_t A3XX_VFD_CONTROL_0_STRMDECINSTRCNT(uint32_t val) +{ + return ((val) << A3XX_VFD_CONTROL_0_STRMDECINSTRCNT__SHIFT) & A3XX_VFD_CONTROL_0_STRMDECINSTRCNT__MASK; +} +#define A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT__MASK 0xf8000000 +#define A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT__SHIFT 27 +static inline uint32_t A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT(uint32_t val) +{ + return ((val) << A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT__SHIFT) & A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT__MASK; +} + +#define REG_A3XX_VFD_CONTROL_1 0x00002241 +#define A3XX_VFD_CONTROL_1_MAXSTORAGE__MASK 0x0000ffff +#define A3XX_VFD_CONTROL_1_MAXSTORAGE__SHIFT 0 +static inline uint32_t A3XX_VFD_CONTROL_1_MAXSTORAGE(uint32_t val) +{ + return ((val) << A3XX_VFD_CONTROL_1_MAXSTORAGE__SHIFT) & A3XX_VFD_CONTROL_1_MAXSTORAGE__MASK; +} +#define A3XX_VFD_CONTROL_1_REGID4VTX__MASK 0x00ff0000 +#define A3XX_VFD_CONTROL_1_REGID4VTX__SHIFT 16 +static inline uint32_t A3XX_VFD_CONTROL_1_REGID4VTX(uint32_t val) +{ + return ((val) << A3XX_VFD_CONTROL_1_REGID4VTX__SHIFT) & A3XX_VFD_CONTROL_1_REGID4VTX__MASK; +} +#define A3XX_VFD_CONTROL_1_REGID4INST__MASK 0xff000000 +#define A3XX_VFD_CONTROL_1_REGID4INST__SHIFT 24 +static inline uint32_t A3XX_VFD_CONTROL_1_REGID4INST(uint32_t val) +{ + return ((val) << A3XX_VFD_CONTROL_1_REGID4INST__SHIFT) & A3XX_VFD_CONTROL_1_REGID4INST__MASK; +} + +#define REG_A3XX_VFD_INDEX_MIN 0x00002242 + +#define REG_A3XX_VFD_INDEX_MAX 0x00002243 + +#define REG_A3XX_VFD_INSTANCEID_OFFSET 0x00002244 + +#define REG_A3XX_VFD_INDEX_OFFSET 0x00002245 + +#define REG_A3XX_VFD_FETCH(i0) (0x00002246 + 0x2*(i0)) + +#define REG_A3XX_VFD_FETCH_INSTR_0(i0) (0x00002246 + 0x2*(i0)) +#define A3XX_VFD_FETCH_INSTR_0_FETCHSIZE__MASK 0x0000007f +#define A3XX_VFD_FETCH_INSTR_0_FETCHSIZE__SHIFT 0 +static inline uint32_t A3XX_VFD_FETCH_INSTR_0_FETCHSIZE(uint32_t val) +{ + return ((val) << A3XX_VFD_FETCH_INSTR_0_FETCHSIZE__SHIFT) & A3XX_VFD_FETCH_INSTR_0_FETCHSIZE__MASK; +} +#define A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE__MASK 0x0001ff80 +#define A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE__SHIFT 7 +static inline uint32_t A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE(uint32_t val) +{ + return ((val) << A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE__SHIFT) & A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE__MASK; +} +#define A3XX_VFD_FETCH_INSTR_0_SWITCHNEXT 0x00020000 +#define A3XX_VFD_FETCH_INSTR_0_INDEXCODE__MASK 0x00fc0000 +#define A3XX_VFD_FETCH_INSTR_0_INDEXCODE__SHIFT 18 +static inline uint32_t A3XX_VFD_FETCH_INSTR_0_INDEXCODE(uint32_t val) +{ + return ((val) << A3XX_VFD_FETCH_INSTR_0_INDEXCODE__SHIFT) & A3XX_VFD_FETCH_INSTR_0_INDEXCODE__MASK; +} +#define A3XX_VFD_FETCH_INSTR_0_STEPRATE__MASK 0xff000000 +#define A3XX_VFD_FETCH_INSTR_0_STEPRATE__SHIFT 24 +static inline uint32_t A3XX_VFD_FETCH_INSTR_0_STEPRATE(uint32_t val) +{ + return ((val) << A3XX_VFD_FETCH_INSTR_0_STEPRATE__SHIFT) & A3XX_VFD_FETCH_INSTR_0_STEPRATE__MASK; +} + +#define REG_A3XX_VFD_FETCH_INSTR_1(i0) (0x00002247 + 0x2*(i0)) + +#define REG_A3XX_VFD_DECODE(i0) (0x00002266 + 0x1*(i0)) + +#define REG_A3XX_VFD_DECODE_INSTR(i0) (0x00002266 + 0x1*(i0)) +#define A3XX_VFD_DECODE_INSTR_WRITEMASK__MASK 0x0000000f +#define A3XX_VFD_DECODE_INSTR_WRITEMASK__SHIFT 0 +static inline uint32_t A3XX_VFD_DECODE_INSTR_WRITEMASK(uint32_t val) +{ + return ((val) << A3XX_VFD_DECODE_INSTR_WRITEMASK__SHIFT) & A3XX_VFD_DECODE_INSTR_WRITEMASK__MASK; +} +#define A3XX_VFD_DECODE_INSTR_CONSTFILL 0x00000010 +#define A3XX_VFD_DECODE_INSTR_FORMAT__MASK 0x00000fc0 +#define A3XX_VFD_DECODE_INSTR_FORMAT__SHIFT 6 +static inline uint32_t A3XX_VFD_DECODE_INSTR_FORMAT(enum a3xx_vtx_fmt val) +{ + return ((val) << A3XX_VFD_DECODE_INSTR_FORMAT__SHIFT) & A3XX_VFD_DECODE_INSTR_FORMAT__MASK; +} +#define A3XX_VFD_DECODE_INSTR_REGID__MASK 0x000ff000 +#define A3XX_VFD_DECODE_INSTR_REGID__SHIFT 12 +static inline uint32_t A3XX_VFD_DECODE_INSTR_REGID(uint32_t val) +{ + return ((val) << A3XX_VFD_DECODE_INSTR_REGID__SHIFT) & A3XX_VFD_DECODE_INSTR_REGID__MASK; +} +#define A3XX_VFD_DECODE_INSTR_SHIFTCNT__MASK 0x1f000000 +#define A3XX_VFD_DECODE_INSTR_SHIFTCNT__SHIFT 24 +static inline uint32_t A3XX_VFD_DECODE_INSTR_SHIFTCNT(uint32_t val) +{ + return ((val) << A3XX_VFD_DECODE_INSTR_SHIFTCNT__SHIFT) & A3XX_VFD_DECODE_INSTR_SHIFTCNT__MASK; +} +#define A3XX_VFD_DECODE_INSTR_LASTCOMPVALID 0x20000000 +#define A3XX_VFD_DECODE_INSTR_SWITCHNEXT 0x40000000 + +#define REG_A3XX_VFD_VS_THREADING_THRESHOLD 0x0000227e +#define A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD__MASK 0x0000000f +#define A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD__SHIFT 0 +static inline uint32_t A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD(uint32_t val) +{ + return ((val) << A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD__SHIFT) & A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD__MASK; +} +#define A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT__MASK 0x0000ff00 +#define A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT__SHIFT 8 +static inline uint32_t A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT(uint32_t val) +{ + return ((val) << A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT__SHIFT) & A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT__MASK; +} + +#define REG_A3XX_VPC_ATTR 0x00002280 +#define A3XX_VPC_ATTR_TOTALATTR__MASK 0x00000fff +#define A3XX_VPC_ATTR_TOTALATTR__SHIFT 0 +static inline uint32_t A3XX_VPC_ATTR_TOTALATTR(uint32_t val) +{ + return ((val) << A3XX_VPC_ATTR_TOTALATTR__SHIFT) & A3XX_VPC_ATTR_TOTALATTR__MASK; +} +#define A3XX_VPC_ATTR_THRDASSIGN__MASK 0x0ffff000 +#define A3XX_VPC_ATTR_THRDASSIGN__SHIFT 12 +static inline uint32_t A3XX_VPC_ATTR_THRDASSIGN(uint32_t val) +{ + return ((val) << A3XX_VPC_ATTR_THRDASSIGN__SHIFT) & A3XX_VPC_ATTR_THRDASSIGN__MASK; +} +#define A3XX_VPC_ATTR_LMSIZE__MASK 0xf0000000 +#define A3XX_VPC_ATTR_LMSIZE__SHIFT 28 +static inline uint32_t A3XX_VPC_ATTR_LMSIZE(uint32_t val) +{ + return ((val) << A3XX_VPC_ATTR_LMSIZE__SHIFT) & A3XX_VPC_ATTR_LMSIZE__MASK; +} + +#define REG_A3XX_VPC_PACK 0x00002281 +#define A3XX_VPC_PACK_NUMFPNONPOSVAR__MASK 0x0000ff00 +#define A3XX_VPC_PACK_NUMFPNONPOSVAR__SHIFT 8 +static inline uint32_t A3XX_VPC_PACK_NUMFPNONPOSVAR(uint32_t val) +{ + return ((val) << A3XX_VPC_PACK_NUMFPNONPOSVAR__SHIFT) & A3XX_VPC_PACK_NUMFPNONPOSVAR__MASK; +} +#define A3XX_VPC_PACK_NUMNONPOSVSVAR__MASK 0x00ff0000 +#define A3XX_VPC_PACK_NUMNONPOSVSVAR__SHIFT 16 +static inline uint32_t A3XX_VPC_PACK_NUMNONPOSVSVAR(uint32_t val) +{ + return ((val) << A3XX_VPC_PACK_NUMNONPOSVSVAR__SHIFT) & A3XX_VPC_PACK_NUMNONPOSVSVAR__MASK; +} + +#define REG_A3XX_VPC_VARYING_INTERP(i0) (0x00002282 + 0x1*(i0)) + +#define REG_A3XX_VPC_VARYING_INTERP_MODE(i0) (0x00002282 + 0x1*(i0)) + +#define REG_A3XX_VPC_VARYING_PS_REPL(i0) (0x00002286 + 0x1*(i0)) + +#define REG_A3XX_VPC_VARYING_PS_REPL_MODE(i0) (0x00002286 + 0x1*(i0)) + +#define REG_A3XX_VPC_VARY_CYLWRAP_ENABLE_0 0x0000228a + +#define REG_A3XX_VPC_VARY_CYLWRAP_ENABLE_1 0x0000228b + +#define REG_A3XX_SP_SP_CTRL_REG 0x000022c0 +#define A3XX_SP_SP_CTRL_REG_RESOLVE 0x00010000 +#define A3XX_SP_SP_CTRL_REG_CONSTMODE__MASK 0x000c0000 +#define A3XX_SP_SP_CTRL_REG_CONSTMODE__SHIFT 18 +static inline uint32_t A3XX_SP_SP_CTRL_REG_CONSTMODE(uint32_t val) +{ + return ((val) << A3XX_SP_SP_CTRL_REG_CONSTMODE__SHIFT) & A3XX_SP_SP_CTRL_REG_CONSTMODE__MASK; +} +#define A3XX_SP_SP_CTRL_REG_SLEEPMODE__MASK 0x00300000 +#define A3XX_SP_SP_CTRL_REG_SLEEPMODE__SHIFT 20 +static inline uint32_t A3XX_SP_SP_CTRL_REG_SLEEPMODE(uint32_t val) +{ + return ((val) << A3XX_SP_SP_CTRL_REG_SLEEPMODE__SHIFT) & A3XX_SP_SP_CTRL_REG_SLEEPMODE__MASK; +} +#define A3XX_SP_SP_CTRL_REG_LOMODE__MASK 0x00c00000 +#define A3XX_SP_SP_CTRL_REG_LOMODE__SHIFT 22 +static inline uint32_t A3XX_SP_SP_CTRL_REG_LOMODE(uint32_t val) +{ + return ((val) << A3XX_SP_SP_CTRL_REG_LOMODE__SHIFT) & A3XX_SP_SP_CTRL_REG_LOMODE__MASK; +} + +#define REG_A3XX_SP_VS_CTRL_REG0 0x000022c4 +#define A3XX_SP_VS_CTRL_REG0_THREADMODE__MASK 0x00000001 +#define A3XX_SP_VS_CTRL_REG0_THREADMODE__SHIFT 0 +static inline uint32_t A3XX_SP_VS_CTRL_REG0_THREADMODE(enum a3xx_threadmode val) +{ + return ((val) << A3XX_SP_VS_CTRL_REG0_THREADMODE__SHIFT) & A3XX_SP_VS_CTRL_REG0_THREADMODE__MASK; +} +#define A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE__MASK 0x00000002 +#define A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE__SHIFT 1 +static inline uint32_t A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE(enum a3xx_instrbuffermode val) +{ + return ((val) << A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE__SHIFT) & A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE__MASK; +} +#define A3XX_SP_VS_CTRL_REG0_CACHEINVALID 0x00000004 +#define A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x000003f0 +#define A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 4 +static inline uint32_t A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val) +{ + return ((val) << A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK; +} +#define A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x0003fc00 +#define A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 10 +static inline uint32_t A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val) +{ + return ((val) << A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK; +} +#define A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP__MASK 0x000c0000 +#define A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP__SHIFT 18 +static inline uint32_t A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP(uint32_t val) +{ + return ((val) << A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP__SHIFT) & A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP__MASK; +} +#define A3XX_SP_VS_CTRL_REG0_THREADSIZE__MASK 0x00100000 +#define A3XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT 20 +static inline uint32_t A3XX_SP_VS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val) +{ + return ((val) << A3XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT) & A3XX_SP_VS_CTRL_REG0_THREADSIZE__MASK; +} +#define A3XX_SP_VS_CTRL_REG0_SUPERTHREADMODE 0x00200000 +#define A3XX_SP_VS_CTRL_REG0_PIXLODENABLE 0x00400000 +#define A3XX_SP_VS_CTRL_REG0_LENGTH__MASK 0xff000000 +#define A3XX_SP_VS_CTRL_REG0_LENGTH__SHIFT 24 +static inline uint32_t A3XX_SP_VS_CTRL_REG0_LENGTH(uint32_t val) +{ + return ((val) << A3XX_SP_VS_CTRL_REG0_LENGTH__SHIFT) & A3XX_SP_VS_CTRL_REG0_LENGTH__MASK; +} + +#define REG_A3XX_SP_VS_CTRL_REG1 0x000022c5 +#define A3XX_SP_VS_CTRL_REG1_CONSTLENGTH__MASK 0x000003ff +#define A3XX_SP_VS_CTRL_REG1_CONSTLENGTH__SHIFT 0 +static inline uint32_t A3XX_SP_VS_CTRL_REG1_CONSTLENGTH(uint32_t val) +{ + return ((val) << A3XX_SP_VS_CTRL_REG1_CONSTLENGTH__SHIFT) & A3XX_SP_VS_CTRL_REG1_CONSTLENGTH__MASK; +} +#define A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT__MASK 0x000ffc00 +#define A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT__SHIFT 10 +static inline uint32_t A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT(uint32_t val) +{ + return ((val) << A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT__SHIFT) & A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT__MASK; +} +#define A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__MASK 0x3f000000 +#define A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__SHIFT 24 +static inline uint32_t A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(uint32_t val) +{ + return ((val) << A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__SHIFT) & A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__MASK; +} + +#define REG_A3XX_SP_VS_PARAM_REG 0x000022c6 +#define A3XX_SP_VS_PARAM_REG_POSREGID__MASK 0x000000ff +#define A3XX_SP_VS_PARAM_REG_POSREGID__SHIFT 0 +static inline uint32_t A3XX_SP_VS_PARAM_REG_POSREGID(uint32_t val) +{ + return ((val) << A3XX_SP_VS_PARAM_REG_POSREGID__SHIFT) & A3XX_SP_VS_PARAM_REG_POSREGID__MASK; +} +#define A3XX_SP_VS_PARAM_REG_PSIZEREGID__MASK 0x0000ff00 +#define A3XX_SP_VS_PARAM_REG_PSIZEREGID__SHIFT 8 +static inline uint32_t A3XX_SP_VS_PARAM_REG_PSIZEREGID(uint32_t val) +{ + return ((val) << A3XX_SP_VS_PARAM_REG_PSIZEREGID__SHIFT) & A3XX_SP_VS_PARAM_REG_PSIZEREGID__MASK; +} +#define A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__MASK 0xfff00000 +#define A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__SHIFT 20 +static inline uint32_t A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(uint32_t val) +{ + return ((val) << A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__SHIFT) & A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__MASK; +} + +#define REG_A3XX_SP_VS_OUT(i0) (0x000022c7 + 0x1*(i0)) + +#define REG_A3XX_SP_VS_OUT_REG(i0) (0x000022c7 + 0x1*(i0)) +#define A3XX_SP_VS_OUT_REG_A_REGID__MASK 0x000001ff +#define A3XX_SP_VS_OUT_REG_A_REGID__SHIFT 0 +static inline uint32_t A3XX_SP_VS_OUT_REG_A_REGID(uint32_t val) +{ + return ((val) << A3XX_SP_VS_OUT_REG_A_REGID__SHIFT) & A3XX_SP_VS_OUT_REG_A_REGID__MASK; +} +#define A3XX_SP_VS_OUT_REG_A_COMPMASK__MASK 0x00001e00 +#define A3XX_SP_VS_OUT_REG_A_COMPMASK__SHIFT 9 +static inline uint32_t A3XX_SP_VS_OUT_REG_A_COMPMASK(uint32_t val) +{ + return ((val) << A3XX_SP_VS_OUT_REG_A_COMPMASK__SHIFT) & A3XX_SP_VS_OUT_REG_A_COMPMASK__MASK; +} +#define A3XX_SP_VS_OUT_REG_B_REGID__MASK 0x01ff0000 +#define A3XX_SP_VS_OUT_REG_B_REGID__SHIFT 16 +static inline uint32_t A3XX_SP_VS_OUT_REG_B_REGID(uint32_t val) +{ + return ((val) << A3XX_SP_VS_OUT_REG_B_REGID__SHIFT) & A3XX_SP_VS_OUT_REG_B_REGID__MASK; +} +#define A3XX_SP_VS_OUT_REG_B_COMPMASK__MASK 0x1e000000 +#define A3XX_SP_VS_OUT_REG_B_COMPMASK__SHIFT 25 +static inline uint32_t A3XX_SP_VS_OUT_REG_B_COMPMASK(uint32_t val) +{ + return ((val) << A3XX_SP_VS_OUT_REG_B_COMPMASK__SHIFT) & A3XX_SP_VS_OUT_REG_B_COMPMASK__MASK; +} + +#define REG_A3XX_SP_VS_VPC_DST(i0) (0x000022d0 + 0x1*(i0)) + +#define REG_A3XX_SP_VS_VPC_DST_REG(i0) (0x000022d0 + 0x1*(i0)) +#define A3XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK 0x000000ff +#define A3XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT 0 +static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC0(uint32_t val) +{ + return ((val) << A3XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT) & A3XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK; +} +#define A3XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK 0x0000ff00 +#define A3XX_SP_VS_VPC_DST_REG_OUTLOC1__SHIFT 8 +static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC1(uint32_t val) +{ + return ((val) << A3XX_SP_VS_VPC_DST_REG_OUTLOC1__SHIFT) & A3XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK; +} +#define A3XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK 0x00ff0000 +#define A3XX_SP_VS_VPC_DST_REG_OUTLOC2__SHIFT 16 +static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC2(uint32_t val) +{ + return ((val) << A3XX_SP_VS_VPC_DST_REG_OUTLOC2__SHIFT) & A3XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK; +} +#define A3XX_SP_VS_VPC_DST_REG_OUTLOC3__MASK 0xff000000 +#define A3XX_SP_VS_VPC_DST_REG_OUTLOC3__SHIFT 24 +static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC3(uint32_t val) +{ + return ((val) << A3XX_SP_VS_VPC_DST_REG_OUTLOC3__SHIFT) & A3XX_SP_VS_VPC_DST_REG_OUTLOC3__MASK; +} + +#define REG_A3XX_SP_VS_OBJ_OFFSET_REG 0x000022d4 +#define A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK 0x01ff0000 +#define A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT 16 +static inline uint32_t A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(uint32_t val) +{ + return ((val) << A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT) & A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK; +} +#define A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK 0xfe000000 +#define A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT 25 +static inline uint32_t A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val) +{ + return ((val) << A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT) & A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK; +} + +#define REG_A3XX_SP_VS_OBJ_START_REG 0x000022d5 + +#define REG_A3XX_SP_VS_PVT_MEM_CTRL_REG 0x000022d6 + +#define REG_A3XX_SP_VS_PVT_MEM_ADDR_REG 0x000022d7 + +#define REG_A3XX_SP_VS_PVT_MEM_SIZE_REG 0x000022d8 + +#define REG_A3XX_SP_VS_LENGTH_REG 0x000022df +#define A3XX_SP_VS_LENGTH_REG_SHADERLENGTH__MASK 0xffffffff +#define A3XX_SP_VS_LENGTH_REG_SHADERLENGTH__SHIFT 0 +static inline uint32_t A3XX_SP_VS_LENGTH_REG_SHADERLENGTH(uint32_t val) +{ + return ((val) << A3XX_SP_VS_LENGTH_REG_SHADERLENGTH__SHIFT) & A3XX_SP_VS_LENGTH_REG_SHADERLENGTH__MASK; +} + +#define REG_A3XX_SP_FS_CTRL_REG0 0x000022e0 +#define A3XX_SP_FS_CTRL_REG0_THREADMODE__MASK 0x00000001 +#define A3XX_SP_FS_CTRL_REG0_THREADMODE__SHIFT 0 +static inline uint32_t A3XX_SP_FS_CTRL_REG0_THREADMODE(enum a3xx_threadmode val) +{ + return ((val) << A3XX_SP_FS_CTRL_REG0_THREADMODE__SHIFT) & A3XX_SP_FS_CTRL_REG0_THREADMODE__MASK; +} +#define A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE__MASK 0x00000002 +#define A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE__SHIFT 1 +static inline uint32_t A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(enum a3xx_instrbuffermode val) +{ + return ((val) << A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE__SHIFT) & A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE__MASK; +} +#define A3XX_SP_FS_CTRL_REG0_CACHEINVALID 0x00000004 +#define A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x000003f0 +#define A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 4 +static inline uint32_t A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val) +{ + return ((val) << A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK; +} +#define A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x0003fc00 +#define A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 10 +static inline uint32_t A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val) +{ + return ((val) << A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK; +} +#define A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP__MASK 0x000c0000 +#define A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP__SHIFT 18 +static inline uint32_t A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(uint32_t val) +{ + return ((val) << A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP__SHIFT) & A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP__MASK; +} +#define A3XX_SP_FS_CTRL_REG0_THREADSIZE__MASK 0x00100000 +#define A3XX_SP_FS_CTRL_REG0_THREADSIZE__SHIFT 20 +static inline uint32_t A3XX_SP_FS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val) +{ + return ((val) << A3XX_SP_FS_CTRL_REG0_THREADSIZE__SHIFT) & A3XX_SP_FS_CTRL_REG0_THREADSIZE__MASK; +} +#define A3XX_SP_FS_CTRL_REG0_SUPERTHREADMODE 0x00200000 +#define A3XX_SP_FS_CTRL_REG0_PIXLODENABLE 0x00400000 +#define A3XX_SP_FS_CTRL_REG0_LENGTH__MASK 0xff000000 +#define A3XX_SP_FS_CTRL_REG0_LENGTH__SHIFT 24 +static inline uint32_t A3XX_SP_FS_CTRL_REG0_LENGTH(uint32_t val) +{ + return ((val) << A3XX_SP_FS_CTRL_REG0_LENGTH__SHIFT) & A3XX_SP_FS_CTRL_REG0_LENGTH__MASK; +} + +#define REG_A3XX_SP_FS_CTRL_REG1 0x000022e1 +#define A3XX_SP_FS_CTRL_REG1_CONSTLENGTH__MASK 0x000003ff +#define A3XX_SP_FS_CTRL_REG1_CONSTLENGTH__SHIFT 0 +static inline uint32_t A3XX_SP_FS_CTRL_REG1_CONSTLENGTH(uint32_t val) +{ + return ((val) << A3XX_SP_FS_CTRL_REG1_CONSTLENGTH__SHIFT) & A3XX_SP_FS_CTRL_REG1_CONSTLENGTH__MASK; +} +#define A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT__MASK 0x000ffc00 +#define A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT__SHIFT 10 +static inline uint32_t A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT(uint32_t val) +{ + return ((val) << A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT__SHIFT) & A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT__MASK; +} +#define A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING__MASK 0x00f00000 +#define A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING__SHIFT 20 +static inline uint32_t A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING(uint32_t val) +{ + return ((val) << A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING__SHIFT) & A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING__MASK; +} +#define A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET__MASK 0x3f000000 +#define A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET__SHIFT 24 +static inline uint32_t A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET(uint32_t val) +{ + return ((val) << A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET__SHIFT) & A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET__MASK; +} + +#define REG_A3XX_SP_FS_OBJ_OFFSET_REG 0x000022e2 +#define A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK 0x01ff0000 +#define A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT 16 +static inline uint32_t A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(uint32_t val) +{ + return ((val) << A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT) & A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK; +} +#define A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK 0xfe000000 +#define A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT 25 +static inline uint32_t A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val) +{ + return ((val) << A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT) & A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK; +} + +#define REG_A3XX_SP_FS_OBJ_START_REG 0x000022e3 + +#define REG_A3XX_SP_FS_PVT_MEM_CTRL_REG 0x000022e4 + +#define REG_A3XX_SP_FS_PVT_MEM_ADDR_REG 0x000022e5 + +#define REG_A3XX_SP_FS_PVT_MEM_SIZE_REG 0x000022e6 + +#define REG_A3XX_SP_FS_FLAT_SHAD_MODE_REG_0 0x000022e8 + +#define REG_A3XX_SP_FS_FLAT_SHAD_MODE_REG_1 0x000022e9 + +#define REG_A3XX_SP_FS_OUTPUT_REG 0x000022ec + +#define REG_A3XX_SP_FS_MRT(i0) (0x000022f0 + 0x1*(i0)) + +#define REG_A3XX_SP_FS_MRT_REG(i0) (0x000022f0 + 0x1*(i0)) +#define A3XX_SP_FS_MRT_REG_REGID__MASK 0x000000ff +#define A3XX_SP_FS_MRT_REG_REGID__SHIFT 0 +static inline uint32_t A3XX_SP_FS_MRT_REG_REGID(uint32_t val) +{ + return ((val) << A3XX_SP_FS_MRT_REG_REGID__SHIFT) & A3XX_SP_FS_MRT_REG_REGID__MASK; +} +#define A3XX_SP_FS_MRT_REG_HALF_PRECISION 0x00000100 + +#define REG_A3XX_SP_FS_IMAGE_OUTPUT(i0) (0x000022f4 + 0x1*(i0)) + +#define REG_A3XX_SP_FS_IMAGE_OUTPUT_REG(i0) (0x000022f4 + 0x1*(i0)) +#define A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT__MASK 0x0000003f +#define A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT__SHIFT 0 +static inline uint32_t A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT(enum a3xx_color_fmt val) +{ + return ((val) << A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT__SHIFT) & A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT__MASK; +} + +#define REG_A3XX_SP_FS_LENGTH_REG 0x000022ff +#define A3XX_SP_FS_LENGTH_REG_SHADERLENGTH__MASK 0xffffffff +#define A3XX_SP_FS_LENGTH_REG_SHADERLENGTH__SHIFT 0 +static inline uint32_t A3XX_SP_FS_LENGTH_REG_SHADERLENGTH(uint32_t val) +{ + return ((val) << A3XX_SP_FS_LENGTH_REG_SHADERLENGTH__SHIFT) & A3XX_SP_FS_LENGTH_REG_SHADERLENGTH__MASK; +} + +#define REG_A3XX_TPL1_TP_VS_TEX_OFFSET 0x00002340 +#define A3XX_TPL1_TP_VS_TEX_OFFSET_SAMPLEROFFSET__MASK 0x000000ff +#define A3XX_TPL1_TP_VS_TEX_OFFSET_SAMPLEROFFSET__SHIFT 0 +static inline uint32_t A3XX_TPL1_TP_VS_TEX_OFFSET_SAMPLEROFFSET(uint32_t val) +{ + return ((val) << A3XX_TPL1_TP_VS_TEX_OFFSET_SAMPLEROFFSET__SHIFT) & A3XX_TPL1_TP_VS_TEX_OFFSET_SAMPLEROFFSET__MASK; +} +#define A3XX_TPL1_TP_VS_TEX_OFFSET_MEMOBJOFFSET__MASK 0x0000ff00 +#define A3XX_TPL1_TP_VS_TEX_OFFSET_MEMOBJOFFSET__SHIFT 8 +static inline uint32_t A3XX_TPL1_TP_VS_TEX_OFFSET_MEMOBJOFFSET(uint32_t val) +{ + return ((val) << A3XX_TPL1_TP_VS_TEX_OFFSET_MEMOBJOFFSET__SHIFT) & A3XX_TPL1_TP_VS_TEX_OFFSET_MEMOBJOFFSET__MASK; +} +#define A3XX_TPL1_TP_VS_TEX_OFFSET_BASETABLEPTR__MASK 0xffff0000 +#define A3XX_TPL1_TP_VS_TEX_OFFSET_BASETABLEPTR__SHIFT 16 +static inline uint32_t A3XX_TPL1_TP_VS_TEX_OFFSET_BASETABLEPTR(uint32_t val) +{ + return ((val) << A3XX_TPL1_TP_VS_TEX_OFFSET_BASETABLEPTR__SHIFT) & A3XX_TPL1_TP_VS_TEX_OFFSET_BASETABLEPTR__MASK; +} + +#define REG_A3XX_TPL1_TP_VS_BORDER_COLOR_BASE_ADDR 0x00002341 + +#define REG_A3XX_TPL1_TP_FS_TEX_OFFSET 0x00002342 +#define A3XX_TPL1_TP_FS_TEX_OFFSET_SAMPLEROFFSET__MASK 0x000000ff +#define A3XX_TPL1_TP_FS_TEX_OFFSET_SAMPLEROFFSET__SHIFT 0 +static inline uint32_t A3XX_TPL1_TP_FS_TEX_OFFSET_SAMPLEROFFSET(uint32_t val) +{ + return ((val) << A3XX_TPL1_TP_FS_TEX_OFFSET_SAMPLEROFFSET__SHIFT) & A3XX_TPL1_TP_FS_TEX_OFFSET_SAMPLEROFFSET__MASK; +} +#define A3XX_TPL1_TP_FS_TEX_OFFSET_MEMOBJOFFSET__MASK 0x0000ff00 +#define A3XX_TPL1_TP_FS_TEX_OFFSET_MEMOBJOFFSET__SHIFT 8 +static inline uint32_t A3XX_TPL1_TP_FS_TEX_OFFSET_MEMOBJOFFSET(uint32_t val) +{ + return ((val) << A3XX_TPL1_TP_FS_TEX_OFFSET_MEMOBJOFFSET__SHIFT) & A3XX_TPL1_TP_FS_TEX_OFFSET_MEMOBJOFFSET__MASK; +} +#define A3XX_TPL1_TP_FS_TEX_OFFSET_BASETABLEPTR__MASK 0xffff0000 +#define A3XX_TPL1_TP_FS_TEX_OFFSET_BASETABLEPTR__SHIFT 16 +static inline uint32_t A3XX_TPL1_TP_FS_TEX_OFFSET_BASETABLEPTR(uint32_t val) +{ + return ((val) << A3XX_TPL1_TP_FS_TEX_OFFSET_BASETABLEPTR__SHIFT) & A3XX_TPL1_TP_FS_TEX_OFFSET_BASETABLEPTR__MASK; +} + +#define REG_A3XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR 0x00002343 + +#define REG_A3XX_VBIF_CLKON 0x00003001 + +#define REG_A3XX_VBIF_FIXED_SORT_EN 0x0000300c + +#define REG_A3XX_VBIF_FIXED_SORT_SEL0 0x0000300d + +#define REG_A3XX_VBIF_FIXED_SORT_SEL1 0x0000300e + +#define REG_A3XX_VBIF_ABIT_SORT 0x0000301c + +#define REG_A3XX_VBIF_ABIT_SORT_CONF 0x0000301d + +#define REG_A3XX_VBIF_GATE_OFF_WRREQ_EN 0x0000302a + +#define REG_A3XX_VBIF_IN_RD_LIM_CONF0 0x0000302c + +#define REG_A3XX_VBIF_IN_RD_LIM_CONF1 0x0000302d + +#define REG_A3XX_VBIF_IN_WR_LIM_CONF0 0x00003030 + +#define REG_A3XX_VBIF_IN_WR_LIM_CONF1 0x00003031 + +#define REG_A3XX_VBIF_OUT_RD_LIM_CONF0 0x00003034 + +#define REG_A3XX_VBIF_OUT_WR_LIM_CONF0 0x00003035 + +#define REG_A3XX_VBIF_DDR_OUT_MAX_BURST 0x00003036 + +#define REG_A3XX_VBIF_ARB_CTL 0x0000303c + +#define REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB 0x00003049 + +#define REG_A3XX_VBIF_OUT_AXI_AMEMTYPE_CONF0 0x00003058 + +#define REG_A3XX_VBIF_OUT_AXI_AOOO_EN 0x0000305e + +#define REG_A3XX_VBIF_OUT_AXI_AOOO 0x0000305f + +#define REG_A3XX_VSC_BIN_SIZE 0x00000c01 +#define A3XX_VSC_BIN_SIZE_WIDTH__MASK 0x0000001f +#define A3XX_VSC_BIN_SIZE_WIDTH__SHIFT 0 +static inline uint32_t A3XX_VSC_BIN_SIZE_WIDTH(uint32_t val) +{ + return ((val >> 5) << A3XX_VSC_BIN_SIZE_WIDTH__SHIFT) & A3XX_VSC_BIN_SIZE_WIDTH__MASK; +} +#define A3XX_VSC_BIN_SIZE_HEIGHT__MASK 0x000003e0 +#define A3XX_VSC_BIN_SIZE_HEIGHT__SHIFT 5 +static inline uint32_t A3XX_VSC_BIN_SIZE_HEIGHT(uint32_t val) +{ + return ((val >> 5) << A3XX_VSC_BIN_SIZE_HEIGHT__SHIFT) & A3XX_VSC_BIN_SIZE_HEIGHT__MASK; +} + +#define REG_A3XX_VSC_SIZE_ADDRESS 0x00000c02 + +#define REG_A3XX_VSC_PIPE(i0) (0x00000c06 + 0x3*(i0)) + +#define REG_A3XX_VSC_PIPE_CONFIG(i0) (0x00000c06 + 0x3*(i0)) +#define A3XX_VSC_PIPE_CONFIG_X__MASK 0x000003ff +#define A3XX_VSC_PIPE_CONFIG_X__SHIFT 0 +static inline uint32_t A3XX_VSC_PIPE_CONFIG_X(uint32_t val) +{ + return ((val) << A3XX_VSC_PIPE_CONFIG_X__SHIFT) & A3XX_VSC_PIPE_CONFIG_X__MASK; +} +#define A3XX_VSC_PIPE_CONFIG_Y__MASK 0x000ffc00 +#define A3XX_VSC_PIPE_CONFIG_Y__SHIFT 10 +static inline uint32_t A3XX_VSC_PIPE_CONFIG_Y(uint32_t val) +{ + return ((val) << A3XX_VSC_PIPE_CONFIG_Y__SHIFT) & A3XX_VSC_PIPE_CONFIG_Y__MASK; +} +#define A3XX_VSC_PIPE_CONFIG_W__MASK 0x00f00000 +#define A3XX_VSC_PIPE_CONFIG_W__SHIFT 20 +static inline uint32_t A3XX_VSC_PIPE_CONFIG_W(uint32_t val) +{ + return ((val) << A3XX_VSC_PIPE_CONFIG_W__SHIFT) & A3XX_VSC_PIPE_CONFIG_W__MASK; +} +#define A3XX_VSC_PIPE_CONFIG_H__MASK 0x0f000000 +#define A3XX_VSC_PIPE_CONFIG_H__SHIFT 24 +static inline uint32_t A3XX_VSC_PIPE_CONFIG_H(uint32_t val) +{ + return ((val) << A3XX_VSC_PIPE_CONFIG_H__SHIFT) & A3XX_VSC_PIPE_CONFIG_H__MASK; +} + +#define REG_A3XX_VSC_PIPE_DATA_ADDRESS(i0) (0x00000c07 + 0x3*(i0)) + +#define REG_A3XX_VSC_PIPE_DATA_LENGTH(i0) (0x00000c08 + 0x3*(i0)) + +#define REG_A3XX_UNKNOWN_0C3D 0x00000c3d + +#define REG_A3XX_UNKNOWN_0C81 0x00000c81 + +#define REG_A3XX_GRAS_CL_USER_PLANE(i0) (0x00000ca0 + 0x4*(i0)) + +#define REG_A3XX_GRAS_CL_USER_PLANE_X(i0) (0x00000ca0 + 0x4*(i0)) + +#define REG_A3XX_GRAS_CL_USER_PLANE_Y(i0) (0x00000ca1 + 0x4*(i0)) + +#define REG_A3XX_GRAS_CL_USER_PLANE_Z(i0) (0x00000ca2 + 0x4*(i0)) + +#define REG_A3XX_GRAS_CL_USER_PLANE_W(i0) (0x00000ca3 + 0x4*(i0)) + +#define REG_A3XX_RB_GMEM_BASE_ADDR 0x00000cc0 + +#define REG_A3XX_RB_WINDOW_SIZE 0x00000ce0 +#define A3XX_RB_WINDOW_SIZE_WIDTH__MASK 0x00003fff +#define A3XX_RB_WINDOW_SIZE_WIDTH__SHIFT 0 +static inline uint32_t A3XX_RB_WINDOW_SIZE_WIDTH(uint32_t val) +{ + return ((val) << A3XX_RB_WINDOW_SIZE_WIDTH__SHIFT) & A3XX_RB_WINDOW_SIZE_WIDTH__MASK; +} +#define A3XX_RB_WINDOW_SIZE_HEIGHT__MASK 0x0fffc000 +#define A3XX_RB_WINDOW_SIZE_HEIGHT__SHIFT 14 +static inline uint32_t A3XX_RB_WINDOW_SIZE_HEIGHT(uint32_t val) +{ + return ((val) << A3XX_RB_WINDOW_SIZE_HEIGHT__SHIFT) & A3XX_RB_WINDOW_SIZE_HEIGHT__MASK; +} + +#define REG_A3XX_UNKNOWN_0E00 0x00000e00 + +#define REG_A3XX_UNKNOWN_0E43 0x00000e43 + +#define REG_A3XX_VFD_PERFCOUNTER0_SELECT 0x00000e44 + +#define REG_A3XX_VPC_VPC_DEBUG_RAM_SEL 0x00000e61 + +#define REG_A3XX_VPC_VPC_DEBUG_RAM_READ 0x00000e62 + +#define REG_A3XX_UCHE_CACHE_MODE_CONTROL_REG 0x00000e82 + +#define REG_A3XX_UCHE_CACHE_INVALIDATE0_REG 0x00000ea0 +#define A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR__MASK 0x0fffffff +#define A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR__SHIFT 0 +static inline uint32_t A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR(uint32_t val) +{ + return ((val) << A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR__SHIFT) & A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR__MASK; +} + +#define REG_A3XX_UCHE_CACHE_INVALIDATE1_REG 0x00000ea1 +#define A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR__MASK 0x0fffffff +#define A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR__SHIFT 0 +static inline uint32_t A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR(uint32_t val) +{ + return ((val) << A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR__SHIFT) & A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR__MASK; +} +#define A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE__MASK 0x30000000 +#define A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE__SHIFT 28 +static inline uint32_t A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE(enum a3xx_cache_opcode val) +{ + return ((val) << A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE__SHIFT) & A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE__MASK; +} +#define A3XX_UCHE_CACHE_INVALIDATE1_REG_ENTIRE_CACHE 0x80000000 + +#define REG_A3XX_SP_PERFCOUNTER0_SELECT 0x00000ec4 + +#define REG_A3XX_SP_PERFCOUNTER1_SELECT 0x00000ec5 + +#define REG_A3XX_SP_PERFCOUNTER2_SELECT 0x00000ec6 + +#define REG_A3XX_SP_PERFCOUNTER3_SELECT 0x00000ec7 + +#define REG_A3XX_SP_PERFCOUNTER4_SELECT 0x00000ec8 + +#define REG_A3XX_SP_PERFCOUNTER5_SELECT 0x00000ec9 + +#define REG_A3XX_SP_PERFCOUNTER6_SELECT 0x00000eca + +#define REG_A3XX_SP_PERFCOUNTER7_SELECT 0x00000ecb + +#define REG_A3XX_UNKNOWN_0EE0 0x00000ee0 + +#define REG_A3XX_UNKNOWN_0F03 0x00000f03 + +#define REG_A3XX_TEX_SAMP_0 0x00000000 +#define A3XX_TEX_SAMP_0_XY_MAG__MASK 0x0000000c +#define A3XX_TEX_SAMP_0_XY_MAG__SHIFT 2 +static inline uint32_t A3XX_TEX_SAMP_0_XY_MAG(enum a3xx_tex_filter val) +{ + return ((val) << A3XX_TEX_SAMP_0_XY_MAG__SHIFT) & A3XX_TEX_SAMP_0_XY_MAG__MASK; +} +#define A3XX_TEX_SAMP_0_XY_MIN__MASK 0x00000030 +#define A3XX_TEX_SAMP_0_XY_MIN__SHIFT 4 +static inline uint32_t A3XX_TEX_SAMP_0_XY_MIN(enum a3xx_tex_filter val) +{ + return ((val) << A3XX_TEX_SAMP_0_XY_MIN__SHIFT) & A3XX_TEX_SAMP_0_XY_MIN__MASK; +} +#define A3XX_TEX_SAMP_0_WRAP_S__MASK 0x000001c0 +#define A3XX_TEX_SAMP_0_WRAP_S__SHIFT 6 +static inline uint32_t A3XX_TEX_SAMP_0_WRAP_S(enum a3xx_tex_clamp val) +{ + return ((val) << A3XX_TEX_SAMP_0_WRAP_S__SHIFT) & A3XX_TEX_SAMP_0_WRAP_S__MASK; +} +#define A3XX_TEX_SAMP_0_WRAP_T__MASK 0x00000e00 +#define A3XX_TEX_SAMP_0_WRAP_T__SHIFT 9 +static inline uint32_t A3XX_TEX_SAMP_0_WRAP_T(enum a3xx_tex_clamp val) +{ + return ((val) << A3XX_TEX_SAMP_0_WRAP_T__SHIFT) & A3XX_TEX_SAMP_0_WRAP_T__MASK; +} +#define A3XX_TEX_SAMP_0_WRAP_R__MASK 0x00007000 +#define A3XX_TEX_SAMP_0_WRAP_R__SHIFT 12 +static inline uint32_t A3XX_TEX_SAMP_0_WRAP_R(enum a3xx_tex_clamp val) +{ + return ((val) << A3XX_TEX_SAMP_0_WRAP_R__SHIFT) & A3XX_TEX_SAMP_0_WRAP_R__MASK; +} +#define A3XX_TEX_SAMP_0_UNNORM_COORDS 0x80000000 + +#define REG_A3XX_TEX_SAMP_1 0x00000001 + +#define REG_A3XX_TEX_CONST_0 0x00000000 +#define A3XX_TEX_CONST_0_TILED 0x00000001 +#define A3XX_TEX_CONST_0_SWIZ_X__MASK 0x00000070 +#define A3XX_TEX_CONST_0_SWIZ_X__SHIFT 4 +static inline uint32_t A3XX_TEX_CONST_0_SWIZ_X(enum a3xx_tex_swiz val) +{ + return ((val) << A3XX_TEX_CONST_0_SWIZ_X__SHIFT) & A3XX_TEX_CONST_0_SWIZ_X__MASK; +} +#define A3XX_TEX_CONST_0_SWIZ_Y__MASK 0x00000380 +#define A3XX_TEX_CONST_0_SWIZ_Y__SHIFT 7 +static inline uint32_t A3XX_TEX_CONST_0_SWIZ_Y(enum a3xx_tex_swiz val) +{ + return ((val) << A3XX_TEX_CONST_0_SWIZ_Y__SHIFT) & A3XX_TEX_CONST_0_SWIZ_Y__MASK; +} +#define A3XX_TEX_CONST_0_SWIZ_Z__MASK 0x00001c00 +#define A3XX_TEX_CONST_0_SWIZ_Z__SHIFT 10 +static inline uint32_t A3XX_TEX_CONST_0_SWIZ_Z(enum a3xx_tex_swiz val) +{ + return ((val) << A3XX_TEX_CONST_0_SWIZ_Z__SHIFT) & A3XX_TEX_CONST_0_SWIZ_Z__MASK; +} +#define A3XX_TEX_CONST_0_SWIZ_W__MASK 0x0000e000 +#define A3XX_TEX_CONST_0_SWIZ_W__SHIFT 13 +static inline uint32_t A3XX_TEX_CONST_0_SWIZ_W(enum a3xx_tex_swiz val) +{ + return ((val) << A3XX_TEX_CONST_0_SWIZ_W__SHIFT) & A3XX_TEX_CONST_0_SWIZ_W__MASK; +} +#define A3XX_TEX_CONST_0_FMT__MASK 0x1fc00000 +#define A3XX_TEX_CONST_0_FMT__SHIFT 22 +static inline uint32_t A3XX_TEX_CONST_0_FMT(enum a3xx_tex_fmt val) +{ + return ((val) << A3XX_TEX_CONST_0_FMT__SHIFT) & A3XX_TEX_CONST_0_FMT__MASK; +} + +#define REG_A3XX_TEX_CONST_1 0x00000001 +#define A3XX_TEX_CONST_1_HEIGHT__MASK 0x00003fff +#define A3XX_TEX_CONST_1_HEIGHT__SHIFT 0 +static inline uint32_t A3XX_TEX_CONST_1_HEIGHT(uint32_t val) +{ + return ((val) << A3XX_TEX_CONST_1_HEIGHT__SHIFT) & A3XX_TEX_CONST_1_HEIGHT__MASK; +} +#define A3XX_TEX_CONST_1_WIDTH__MASK 0x0fffc000 +#define A3XX_TEX_CONST_1_WIDTH__SHIFT 14 +static inline uint32_t A3XX_TEX_CONST_1_WIDTH(uint32_t val) +{ + return ((val) << A3XX_TEX_CONST_1_WIDTH__SHIFT) & A3XX_TEX_CONST_1_WIDTH__MASK; +} +#define A3XX_TEX_CONST_1_FETCHSIZE__MASK 0xf0000000 +#define A3XX_TEX_CONST_1_FETCHSIZE__SHIFT 28 +static inline uint32_t A3XX_TEX_CONST_1_FETCHSIZE(enum a3xx_tex_fetchsize val) +{ + return ((val) << A3XX_TEX_CONST_1_FETCHSIZE__SHIFT) & A3XX_TEX_CONST_1_FETCHSIZE__MASK; +} + +#define REG_A3XX_TEX_CONST_2 0x00000002 +#define A3XX_TEX_CONST_2_INDX__MASK 0x000000ff +#define A3XX_TEX_CONST_2_INDX__SHIFT 0 +static inline uint32_t A3XX_TEX_CONST_2_INDX(uint32_t val) +{ + return ((val) << A3XX_TEX_CONST_2_INDX__SHIFT) & A3XX_TEX_CONST_2_INDX__MASK; +} +#define A3XX_TEX_CONST_2_PITCH__MASK 0x3ffff000 +#define A3XX_TEX_CONST_2_PITCH__SHIFT 12 +static inline uint32_t A3XX_TEX_CONST_2_PITCH(uint32_t val) +{ + return ((val) << A3XX_TEX_CONST_2_PITCH__SHIFT) & A3XX_TEX_CONST_2_PITCH__MASK; +} +#define A3XX_TEX_CONST_2_SWAP__MASK 0xc0000000 +#define A3XX_TEX_CONST_2_SWAP__SHIFT 30 +static inline uint32_t A3XX_TEX_CONST_2_SWAP(enum a3xx_color_swap val) +{ + return ((val) << A3XX_TEX_CONST_2_SWAP__SHIFT) & A3XX_TEX_CONST_2_SWAP__MASK; +} + +#define REG_A3XX_TEX_CONST_3 0x00000003 + + +#endif /* A3XX_XML */ diff --git a/src/gallium/drivers/freedreno/a3xx/disasm-a3xx.c b/src/gallium/drivers/freedreno/a3xx/disasm-a3xx.c new file mode 100644 index 00000000000..4db095f5679 --- /dev/null +++ b/src/gallium/drivers/freedreno/a3xx/disasm-a3xx.c @@ -0,0 +1,946 @@ +/* + * Copyright (c) 2013 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <string.h> +#include <assert.h> + +#include <util/u_debug.h> + +#include "disasm.h" +#include "instr-a3xx.h" + +static enum debug_t debug; + +#define printf debug_printf + +static const char *levels[] = { + "", + "\t", + "\t\t", + "\t\t\t", + "\t\t\t\t", + "\t\t\t\t\t", + "\t\t\t\t\t\t", + "\t\t\t\t\t\t\t", + "\t\t\t\t\t\t\t\t", + "\t\t\t\t\t\t\t\t\t", + "x", + "x", + "x", + "x", + "x", + "x", +}; + +static const char *component = "xyzw"; + +static const char *type[] = { + [TYPE_F16] = "f16", + [TYPE_F32] = "f32", + [TYPE_U16] = "u16", + [TYPE_U32] = "u32", + [TYPE_S16] = "s16", + [TYPE_S32] = "s32", + [TYPE_U8] = "u8", + [TYPE_S8] = "s8", +}; + +static void print_reg(reg_t reg, bool full, bool r, bool c, bool im, + bool neg, bool abs, bool addr_rel) +{ + const char type = c ? 'c' : 'r'; + + // XXX I prefer - and || for neg/abs, but preserving format used + // by libllvm-a3xx for easy diffing.. + + if (abs && neg) + printf("(absneg)"); + else if (neg) + printf("(neg)"); + else if (abs) + printf("(abs)"); + + if (r) + printf("(r)"); + + if (im) { + printf("%d", reg.iim_val); + } else if (addr_rel) { + /* I would just use %+d but trying to make it diff'able with + * libllvm-a3xx... + */ + if (reg.iim_val < 0) + printf("%s%c<a0.x - %d>", full ? "" : "h", type, -reg.iim_val); + else if (reg.iim_val > 0) + printf("%s%c<a0.x + %d>", full ? "" : "h", type, reg.iim_val); + else + printf("%s%c<a0.x>", full ? "" : "h", type); + } else if ((reg.num == REG_A0) && !c) { + printf("a0.%c", component[reg.comp]); + } else if ((reg.num == REG_P0) && !c) { + printf("p0.%c", component[reg.comp]); + } else { + printf("%s%c%d.%c", full ? "" : "h", type, reg.num, component[reg.comp]); + } +} + +/* Tracking for registers used, read-before-write (input), and + * write-after-read (output.. but not 100%).. + */ + +#define MAX_REG 128 + +typedef struct { + uint8_t full[MAX_REG/8]; + uint8_t half[MAX_REG/8]; +} regmask_t; + +static void regmask_set(regmask_t *regmask, unsigned num, bool full, unsigned val) +{ + unsigned i = num / 8; + unsigned j = num % 8; + assert(num < MAX_REG); + if (full) { + regmask->full[i] = (regmask->full[i] & ~(1 << j)) | (val << j); + } else { + regmask->half[i] = (regmask->half[i] & ~(1 << j)) | (val << j); + } +} + +static unsigned regmask_get(regmask_t *regmask, unsigned num, bool full) +{ + unsigned i = num / 8; + unsigned j = num % 8; + assert(num < MAX_REG); + if (full) { + return (regmask->full[i] >> j) & 0x1; + } else { + return (regmask->half[i] >> j) & 0x1; + } +} + +static unsigned regidx(reg_t reg) +{ + return (4 * reg.num) + reg.comp; +} + +static struct { + regmask_t used; + regmask_t rbw; /* read before write */ + regmask_t war; /* write after read */ + regmask_t cnst; /* used consts */ +} regs; + +static void print_regs(regmask_t *regmask, bool full) +{ + int num, max = 0, cnt = 0; + int first, last; + + void print_sequence(void) + { + if (first != MAX_REG) { + if (first == last) { + printf(" %d", first); + } else { + printf(" %d-%d", first, last); + } + } + } + + first = last = MAX_REG; + + for (num = 0; num < MAX_REG; num++) { + if (regmask_get(regmask, num, full)) { + if (num != (last + 1)) { + print_sequence(); + first = num; + } + last = num; + max = num; + cnt++; + } + } + + print_sequence(); + + printf(" (cnt=%d, max=%d)", cnt, max); +} + +static void print_reg_stats(int level) +{ + printf("%sRegister Stats:\n", levels[level]); + printf("%s- used (half):", levels[level]); + print_regs(®s.used, false); + printf("\n"); + printf("%s- used (full):", levels[level]); + print_regs(®s.used, true); + printf("\n"); + printf("%s- input (half):", levels[level]); + print_regs(®s.rbw, false); + printf("\n"); + printf("%s- input (full):", levels[level]); + print_regs(®s.rbw, true); + printf("\n"); + printf("%s- const (half):", levels[level]); + print_regs(®s.cnst, false); + printf("\n"); + printf("%s- const (full):", levels[level]); + print_regs(®s.cnst, true); + printf("\n"); + printf("%s- output (half):", levels[level]); + print_regs(®s.war, false); + printf(" (estimated)\n"); + printf("%s- output (full):", levels[level]); + print_regs(®s.war, true); + printf(" (estimated)\n"); +} + +/* we have to process the dst register after src to avoid tripping up + * the read-before-write detection + */ +static unsigned last_dst; +static bool last_dst_full; +static bool last_dst_valid = false; + +/* current instruction repeat flag: */ +static unsigned repeat; + +static void process_reg_dst(void) +{ + int i; + + if (!last_dst_valid) + return; + + for (i = 0; i <= repeat; i++) { + unsigned dst = last_dst + i; + + regmask_set(®s.war, dst, last_dst_full, 1); + regmask_set(®s.used, dst, last_dst_full, 1); + } + + last_dst_valid = false; +} + +static void print_reg_dst(reg_t reg, bool full, bool addr_rel) +{ + /* presumably the special registers a0.c and p0.c don't count.. */ + if (!(addr_rel || reg_special(reg))) { + last_dst = regidx(reg); + last_dst_full = full; + last_dst_valid = true; + } + print_reg(reg, full, false, false, false, false, false, addr_rel); +} + +static void print_reg_src(reg_t reg, bool full, bool r, bool c, bool im, + bool neg, bool abs, bool addr_rel) +{ + /* presumably the special registers a0.c and p0.c don't count.. */ + if (!(addr_rel || c || im || reg_special(reg))) { + int i, num = regidx(reg); + for (i = 0; i <= repeat; i++) { + unsigned src = num + i; + + if (!regmask_get(®s.used, src, full)) + regmask_set(®s.rbw, src, full, 1); + + regmask_set(®s.war, src, full, 0); + regmask_set(®s.used, src, full, 1); + + if (!r) + break; + } + } else if (c) { + int i, num = regidx(reg); + for (i = 0; i <= repeat; i++) { + unsigned src = num + i; + + regmask_set(®s.cnst, src, full, 1); + + if (!r) + break; + } + } + + print_reg(reg, full, r, c, im, neg, abs, addr_rel); +} + + +static void print_instr_cat0(instr_t *instr) +{ + instr_cat0_t *cat0 = &instr->cat0; + + switch (cat0->opc) { + case OPC_KILL: + printf(" %sp0.%c", cat0->inv ? "!" : "", + component[cat0->comp]); + break; + case OPC_BR: + printf(" %sp0.%c, #%d", cat0->inv ? "!" : "", + component[cat0->comp], cat0->immed); + break; + case OPC_JUMP: + case OPC_CALL: + printf(" #%d", cat0->immed); + break; + } + + if ((debug & PRINT_VERBOSE) && (cat0->dummy1|cat0->dummy2|cat0->dummy3|cat0->dummy4)) + printf("\t{0: %x,%x,%x,%x}", cat0->dummy1, cat0->dummy2, cat0->dummy3, cat0->dummy4); +} + +static void print_instr_cat1(instr_t *instr) +{ + instr_cat1_t *cat1 = &instr->cat1; + + // XXX maybe a bug in libllvm disassembler? + if (cat1->src_rel) + printf("(ul)"); + + if (cat1->src_type == cat1->dst_type) { + if ((cat1->src_type == TYPE_S16) && (((reg_t)cat1->dst).num == REG_A0)) { + /* special case (nmemonic?): */ + printf("mova"); + } else { + printf("mov.%s%s", type[cat1->src_type], type[cat1->dst_type]); + } + } else { + printf("cov.%s%s", type[cat1->src_type], type[cat1->dst_type]); + } + + printf(" "); + + if (cat1->even) + printf("(even)"); + + if (cat1->pos_inf) + printf("(pos_infinity)"); + + print_reg_dst((reg_t)(cat1->dst), type_size(cat1->dst_type) == 32, + cat1->dst_rel); + + printf(", "); + + /* ugg, have to special case this.. vs print_reg().. */ + if (cat1->src_im) { + if (type_float(cat1->src_type)) + printf("(%f)", cat1->fim_val); + else + printf("%d", cat1->iim_val); + } else if (cat1->src_rel && !cat1->src_c) { + /* I would just use %+d but trying to make it diff'able with + * libllvm-a3xx... + */ + if (cat1->off < 0) + printf("c<a0.x - %d>", -cat1->off); + else if (cat1->off > 0) + printf("c<a0.x + %d>", cat1->off); + else + printf("c<a0.x>"); + } else { + print_reg_src((reg_t)(cat1->src), type_size(cat1->src_type) == 32, + cat1->src_r, cat1->src_c, cat1->src_im, false, false, false); + } + + if ((debug & PRINT_VERBOSE) && (cat1->must_be_0)) + printf("\t{1: %x}", cat1->must_be_0); +} + +static void print_instr_cat2(instr_t *instr) +{ + instr_cat2_t *cat2 = &instr->cat2; + static const char *cond[] = { + "lt", + "le", + "gt", + "ge", + "eq", + "ne", + "?6?", + }; + + switch (cat2->opc) { + case OPC_CMPS_F: + case OPC_CMPS_U: + case OPC_CMPS_S: + case OPC_CMPV_F: + case OPC_CMPV_U: + case OPC_CMPV_S: + printf(".%s", cond[cat2->cond]); + break; + } + + printf(" "); + if (cat2->ei) + printf("(ei)"); + print_reg_dst((reg_t)(cat2->dst), cat2->full ^ cat2->dst_half, false); + printf(", "); + print_reg_src((reg_t)(cat2->src1), cat2->full, cat2->src1_r, + cat2->src1_c, cat2->src1_im, cat2->src1_neg, + cat2->src1_abs, cat2->src1_rel); + switch (cat2->opc) { + case OPC_ABSNEG_F: + case OPC_ABSNEG_S: + case OPC_CLZ_B: + case OPC_CLZ_S: + case OPC_SIGN_F: + case OPC_FLOOR_F: + case OPC_CEIL_F: + case OPC_RNDNE_F: + case OPC_RNDAZ_F: + case OPC_TRUNC_F: + case OPC_NOT_B: + case OPC_BFREV_B: + case OPC_SETRM: + case OPC_CBITS_B: + /* these only have one src reg */ + break; + default: + printf(", "); + print_reg_src((reg_t)(cat2->src2), cat2->full, cat2->src2_r, + cat2->src2_c, cat2->src2_im, cat2->src2_neg, + cat2->src2_abs, cat2->src2_rel); + break; + } +} + +static void print_instr_cat3(instr_t *instr) +{ + instr_cat3_t *cat3 = &instr->cat3; + bool full = true; + + // XXX is this based on opc or some other bit? + switch (cat3->opc) { + case OPC_MAD_F16: + case OPC_MAD_U16: + case OPC_MAD_S16: + case OPC_SEL_B16: + case OPC_SEL_S16: + case OPC_SEL_F16: + case OPC_SAD_S16: + case OPC_SAD_S32: // really?? + full = false; + break; + } + + printf(" "); + print_reg_dst((reg_t)(cat3->dst), full ^ cat3->dst_half, false); + printf(", "); + print_reg_src((reg_t)(cat3->src1), full, + cat3->src1_r, cat3->src1_c, false, cat3->src1_neg, + false, cat3->src1_rel); + printf(", "); + print_reg_src((reg_t)cat3->src2, full, + cat3->src2_r, cat3->src2_c, false, cat3->src2_neg, + false, false); + printf(", "); + print_reg_src((reg_t)(cat3->src3), full, + cat3->src3_r, cat3->src3_c, false, cat3->src3_neg, + false, cat3->src3_rel); +} + +static void print_instr_cat4(instr_t *instr) +{ + instr_cat4_t *cat4 = &instr->cat4; + + printf(" "); + print_reg_dst((reg_t)(cat4->dst), cat4->full ^ cat4->dst_half, false); + printf(", "); + print_reg_src((reg_t)(cat4->src), cat4->full, + cat4->src_r, cat4->src_c, cat4->src_im, + cat4->src_neg, cat4->src_abs, cat4->src_rel); + + if ((debug & PRINT_VERBOSE) && (cat4->dummy1|cat4->dummy2)) + printf("\t{4: %x,%x}", cat4->dummy1, cat4->dummy2); +} + +static void print_instr_cat5(instr_t *instr) +{ + static const struct { + bool src1, src2, samp, tex; + } info[0x1f] = { + [OPC_ISAM] = { true, false, true, true, }, + [OPC_ISAML] = { true, true, true, true, }, + [OPC_ISAMM] = { true, false, true, true, }, + [OPC_SAM] = { true, false, true, true, }, + [OPC_SAMB] = { true, true, true, true, }, + [OPC_SAML] = { true, true, true, true, }, + [OPC_SAMGQ] = { true, false, true, true, }, + [OPC_GETLOD] = { true, false, true, true, }, + [OPC_CONV] = { true, true, true, true, }, + [OPC_CONVM] = { true, true, true, true, }, + [OPC_GETSIZE] = { true, false, false, true, }, + [OPC_GETBUF] = { false, false, false, true, }, + [OPC_GETPOS] = { true, false, false, true, }, + [OPC_GETINFO] = { false, false, false, true, }, + [OPC_DSX] = { true, false, false, false, }, + [OPC_DSY] = { true, false, false, false, }, + [OPC_GATHER4R] = { true, false, true, true, }, + [OPC_GATHER4G] = { true, false, true, true, }, + [OPC_GATHER4B] = { true, false, true, true, }, + [OPC_GATHER4A] = { true, false, true, true, }, + [OPC_SAMGP0] = { true, false, true, true, }, + [OPC_SAMGP1] = { true, false, true, true, }, + [OPC_SAMGP2] = { true, false, true, true, }, + [OPC_SAMGP3] = { true, false, true, true, }, + [OPC_DSXPP_1] = { true, false, false, false, }, + [OPC_DSYPP_1] = { true, false, false, false, }, + [OPC_RGETPOS] = { false, false, false, false, }, + [OPC_RGETINFO] = { false, false, false, false, }, + }; + instr_cat5_t *cat5 = &instr->cat5; + int i; + + if (cat5->is_3d) printf(".3d"); + if (cat5->is_a) printf(".a"); + if (cat5->is_o) printf(".o"); + if (cat5->is_p) printf(".p"); + if (cat5->is_s) printf(".s"); + if (cat5->is_s2en) printf(".s2en"); + + printf(" "); + + switch (cat5->opc) { + case OPC_DSXPP_1: + case OPC_DSYPP_1: + break; + default: + printf("(%s)", type[cat5->type]); + break; + } + + printf("("); + for (i = 0; i < 4; i++) + if (cat5->wrmask & (1 << i)) + printf("%c", "xyzw"[i]); + printf(")"); + + print_reg_dst((reg_t)(cat5->dst), type_size(cat5->type) == 32, false); + + if (info[cat5->opc].src1) { + printf(", "); + print_reg_src((reg_t)(cat5->src1), cat5->full, false, false, false, + false, false, false); + } + + if (cat5->is_s2en) { + printf(", "); + print_reg_src((reg_t)(cat5->s2en.src2), cat5->full, false, false, false, + false, false, false); + printf(", "); + print_reg_src((reg_t)(cat5->s2en.src3), false, false, false, false, + false, false, false); + } else { + if (cat5->is_o || info[cat5->opc].src2) { + printf(", "); + print_reg_src((reg_t)(cat5->norm.src2), cat5->full, + false, false, false, false, false, false); + } + if (info[cat5->opc].samp) + printf(", s#%d", cat5->norm.samp); + if (info[cat5->opc].tex) + printf(", t#%d", cat5->norm.tex); + } + + if (debug & PRINT_VERBOSE) { + if (cat5->is_s2en) { + if ((debug & PRINT_VERBOSE) && (cat5->s2en.dummy1|cat5->s2en.dummy2|cat5->dummy2)) + printf("\t{5: %x,%x,%x}", cat5->s2en.dummy1, cat5->s2en.dummy2, cat5->dummy2); + } else { + if ((debug & PRINT_VERBOSE) && (cat5->norm.dummy1|cat5->dummy2)) + printf("\t{5: %x,%x}", cat5->norm.dummy1, cat5->dummy2); + } + } +} + +static int32_t u2i(uint32_t val, int nbits) +{ + return ((val >> (nbits-1)) * ~((1 << nbits) - 1)) | val; +} + +static void print_instr_cat6(instr_t *instr) +{ + instr_cat6_t *cat6 = &instr->cat6; + + printf(".%s ", type[cat6->type]); + + switch (cat6->opc) { + case OPC_LDG: + case OPC_LDP: + case OPC_LDL: + case OPC_LDLW: + case OPC_LDLV: + /* load instructions: */ + print_reg_dst((reg_t)(cat6->a.dst), type_size(cat6->type) == 32, false); + printf(","); + switch (cat6->opc) { + case OPC_LDG: + printf("g"); + break; + case OPC_LDP: + printf("p"); + break; + case OPC_LDL: + case OPC_LDLW: + case OPC_LDLV: + printf("l"); + break; + } + printf("["); + print_reg_src((reg_t)(cat6->a.src), true, + false, false, false, false, false, false); + if (cat6->a.off) + printf("%+d", cat6->a.off); + printf("]"); + break; + case OPC_PREFETCH: + /* similar to load instructions: */ + printf("g["); + print_reg_src((reg_t)(cat6->a.src), true, + false, false, false, false, false, false); + if (cat6->a.off) + printf("%+d", cat6->a.off); + printf("]"); + break; + case OPC_STG: + case OPC_STP: + case OPC_STL: + case OPC_STLW: + /* store instructions: */ + switch (cat6->opc) { + case OPC_STG: + printf("g"); + break; + case OPC_STP: + printf("p"); + break; + case OPC_STL: + case OPC_STLW: + printf("l"); + break; + } + printf("["); + print_reg_dst((reg_t)(cat6->b.dst), true, false); + if (cat6->b.off || cat6->b.off_hi) + printf("%+d", u2i((cat6->b.off_hi << 8) | cat6->b.off, 13)); + printf("]"); + printf(","); + print_reg_src((reg_t)(cat6->b.src), type_size(cat6->type) == 32, + false, false, false, false, false, false); + + break; + case OPC_STI: + /* sti has same encoding as other store instructions, but + * slightly different syntax: + */ + print_reg_dst((reg_t)(cat6->b.dst), false /* XXX is it always half? */, false); + if (cat6->b.off || cat6->b.off_hi) + printf("%+d", u2i((cat6->b.off_hi << 8) | cat6->b.off, 13)); + printf(","); + print_reg_src((reg_t)(cat6->b.src), type_size(cat6->type) == 32, + false, false, false, false, false, false); + break; + } + + printf(", %d", cat6->iim_val); + + if (debug & PRINT_VERBOSE) { + switch (cat6->opc) { + case OPC_LDG: + case OPC_LDP: + /* load instructions: */ + if (cat6->a.dummy1|cat6->a.dummy2|cat6->a.dummy3) + printf("\t{6: %x,%x,%x}", cat6->a.dummy1, cat6->a.dummy2, cat6->a.dummy3); + if ((cat6->a.must_be_one1 != 1) || (cat6->a.must_be_one2 != 1)) + printf("{?? %d,%d ??}", cat6->a.must_be_one1, cat6->a.must_be_one2); + break; + case OPC_STG: + case OPC_STP: + case OPC_STI: + /* store instructions: */ + if (cat6->b.dummy1|cat6->b.dummy2) + printf("\t{6: %x,%x}", cat6->b.dummy1, cat6->b.dummy2); + if ((cat6->b.must_be_one1 != 1) || (cat6->b.must_be_one2 != 1) || + (cat6->b.must_be_zero1 != 0)) + printf("{?? %d,%d,%d ??}", cat6->b.must_be_one1, cat6->b.must_be_one2, + cat6->b.must_be_zero1); + break; + } + } +} + +/* size of largest OPC field of all the instruction categories: */ +#define NOPC_BITS 6 + +struct opc_info { + uint16_t cat; + uint16_t opc; + const char *name; + void (*print)(instr_t *instr); +} opcs[1 << (3+NOPC_BITS)] = { +#define OPC(cat, opc, name) [((cat) << NOPC_BITS) | (opc)] = { (cat), (opc), #name, print_instr_cat##cat } + /* category 0: */ + OPC(0, OPC_NOP, nop), + OPC(0, OPC_BR, br), + OPC(0, OPC_JUMP, jump), + OPC(0, OPC_CALL, call), + OPC(0, OPC_RET, ret), + OPC(0, OPC_KILL, kill), + OPC(0, OPC_END, end), + OPC(0, OPC_EMIT, emit), + OPC(0, OPC_CUT, cut), + OPC(0, OPC_CHMASK, chmask), + OPC(0, OPC_CHSH, chsh), + OPC(0, OPC_FLOW_REV, flow_rev), + + /* category 1: */ + OPC(1, 0, ), + + /* category 2: */ + OPC(2, OPC_ADD_F, add.f), + OPC(2, OPC_MIN_F, min.f), + OPC(2, OPC_MAX_F, max.f), + OPC(2, OPC_MUL_F, mul.f), + OPC(2, OPC_SIGN_F, sign.f), + OPC(2, OPC_CMPS_F, cmps.f), + OPC(2, OPC_ABSNEG_F, absneg.f), + OPC(2, OPC_CMPV_F, cmpv.f), + OPC(2, OPC_FLOOR_F, floor.f), + OPC(2, OPC_CEIL_F, ceil.f), + OPC(2, OPC_RNDNE_F, rndne.f), + OPC(2, OPC_RNDAZ_F, rndaz.f), + OPC(2, OPC_TRUNC_F, trunc.f), + OPC(2, OPC_ADD_U, add.u), + OPC(2, OPC_ADD_S, add.s), + OPC(2, OPC_SUB_U, sub.u), + OPC(2, OPC_SUB_S, sub.s), + OPC(2, OPC_CMPS_U, cmps.u), + OPC(2, OPC_CMPS_S, cmps.s), + OPC(2, OPC_MIN_U, min.u), + OPC(2, OPC_MIN_S, min.s), + OPC(2, OPC_MAX_U, max.u), + OPC(2, OPC_MAX_S, max.s), + OPC(2, OPC_ABSNEG_S, absneg.s), + OPC(2, OPC_AND_B, and.b), + OPC(2, OPC_OR_B, or.b), + OPC(2, OPC_NOT_B, not.b), + OPC(2, OPC_XOR_B, xor.b), + OPC(2, OPC_CMPV_U, cmpv.u), + OPC(2, OPC_CMPV_S, cmpv.s), + OPC(2, OPC_MUL_U, mul.u), + OPC(2, OPC_MUL_S, mul.s), + OPC(2, OPC_MULL_U, mull.u), + OPC(2, OPC_BFREV_B, bfrev.b), + OPC(2, OPC_CLZ_S, clz.s), + OPC(2, OPC_CLZ_B, clz.b), + OPC(2, OPC_SHL_B, shl.b), + OPC(2, OPC_SHR_B, shr.b), + OPC(2, OPC_ASHR_B, ashr.b), + OPC(2, OPC_BARY_F, bary.f), + OPC(2, OPC_MGEN_B, mgen.b), + OPC(2, OPC_GETBIT_B, getbit.b), + OPC(2, OPC_SETRM, setrm), + OPC(2, OPC_CBITS_B, cbits.b), + OPC(2, OPC_SHB, shb), + OPC(2, OPC_MSAD, msad), + + /* category 3: */ + OPC(3, OPC_MAD_U16, mad.u16), + OPC(3, OPC_MADSH_U16, madsh.u16), + OPC(3, OPC_MAD_S16, mad.s16), + OPC(3, OPC_MADSH_M16, madsh.m16), + OPC(3, OPC_MAD_U24, mad.u24), + OPC(3, OPC_MAD_S24, mad.s24), + OPC(3, OPC_MAD_F16, mad.f16), + OPC(3, OPC_MAD_F32, mad.f32), + OPC(3, OPC_SEL_B16, sel.b16), + OPC(3, OPC_SEL_B32, sel.b32), + OPC(3, OPC_SEL_S16, sel.s16), + OPC(3, OPC_SEL_S32, sel.s32), + OPC(3, OPC_SEL_F16, sel.f16), + OPC(3, OPC_SEL_F32, sel.f32), + OPC(3, OPC_SAD_S16, sad.s16), + OPC(3, OPC_SAD_S32, sad.s32), + + /* category 4: */ + OPC(4, OPC_RCP, rcp), + OPC(4, OPC_RSQ, rsq), + OPC(4, OPC_LOG2, log2), + OPC(4, OPC_EXP2, exp2), + OPC(4, OPC_SIN, sin), + OPC(4, OPC_COS, cos), + OPC(4, OPC_SQRT, sqrt), + + /* category 5: */ + OPC(5, OPC_ISAM, isam), + OPC(5, OPC_ISAML, isaml), + OPC(5, OPC_ISAMM, isamm), + OPC(5, OPC_SAM, sam), + OPC(5, OPC_SAMB, samb), + OPC(5, OPC_SAML, saml), + OPC(5, OPC_SAMGQ, samgq), + OPC(5, OPC_GETLOD, getlod), + OPC(5, OPC_CONV, conv), + OPC(5, OPC_CONVM, convm), + OPC(5, OPC_GETSIZE, getsize), + OPC(5, OPC_GETBUF, getbuf), + OPC(5, OPC_GETPOS, getpos), + OPC(5, OPC_GETINFO, getinfo), + OPC(5, OPC_DSX, dsx), + OPC(5, OPC_DSY, dsy), + OPC(5, OPC_GATHER4R, gather4r), + OPC(5, OPC_GATHER4G, gather4g), + OPC(5, OPC_GATHER4B, gather4b), + OPC(5, OPC_GATHER4A, gather4a), + OPC(5, OPC_SAMGP0, samgp0), + OPC(5, OPC_SAMGP1, samgp1), + OPC(5, OPC_SAMGP2, samgp2), + OPC(5, OPC_SAMGP3, samgp3), + OPC(5, OPC_DSXPP_1, dsxpp.1), + OPC(5, OPC_DSYPP_1, dsypp.1), + OPC(5, OPC_RGETPOS, rgetpos), + OPC(5, OPC_RGETINFO, rgetinfo), + + + /* category 6: */ + OPC(6, OPC_LDG, ldg), + OPC(6, OPC_LDL, ldl), + OPC(6, OPC_LDP, ldp), + OPC(6, OPC_STG, stg), + OPC(6, OPC_STL, stl), + OPC(6, OPC_STP, stp), + OPC(6, OPC_STI, sti), + OPC(6, OPC_G2L, g2l), + OPC(6, OPC_L2G, l2g), + OPC(6, OPC_PREFETCH, prefetch), + OPC(6, OPC_LDLW, ldlw), + OPC(6, OPC_STLW, stlw), + OPC(6, OPC_RESFMT, resfmt), + OPC(6, OPC_RESINFO, resinf), + OPC(6, OPC_ATOMIC_ADD_L, atomic.add.l), + OPC(6, OPC_ATOMIC_SUB_L, atomic.sub.l), + OPC(6, OPC_ATOMIC_XCHG_L, atomic.xchg.l), + OPC(6, OPC_ATOMIC_INC_L, atomic.inc.l), + OPC(6, OPC_ATOMIC_DEC_L, atomic.dec.l), + OPC(6, OPC_ATOMIC_CMPXCHG_L, atomic.cmpxchg.l), + OPC(6, OPC_ATOMIC_MIN_L, atomic.min.l), + OPC(6, OPC_ATOMIC_MAX_L, atomic.max.l), + OPC(6, OPC_ATOMIC_AND_L, atomic.and.l), + OPC(6, OPC_ATOMIC_OR_L, atomic.or.l), + OPC(6, OPC_ATOMIC_XOR_L, atomic.xor.l), + OPC(6, OPC_LDGB_TYPED_4D, ldgb.typed.4d), + OPC(6, OPC_STGB_4D_4, stgb.4d.4), + OPC(6, OPC_STIB, stib), + OPC(6, OPC_LDC_4, ldc.4), + OPC(6, OPC_LDLV, ldlv), + + +#undef OPC +}; + +#define GETINFO(instr) (&(opcs[((instr)->opc_cat << NOPC_BITS) | getopc(instr)])) + +static uint32_t getopc(instr_t *instr) +{ + switch (instr->opc_cat) { + case 0: return instr->cat0.opc; + case 1: return 0; + case 2: return instr->cat2.opc; + case 3: return instr->cat3.opc; + case 4: return instr->cat4.opc; + case 5: return instr->cat5.opc; + case 6: return instr->cat6.opc; + default: return 0; + } +} + +static void print_instr(uint32_t *dwords, int level, int n) +{ + instr_t *instr = (instr_t *)dwords; + uint32_t opc = getopc(instr); + const char *name; + + printf("%s%04d[%08xx_%08xx] ", levels[level], n, dwords[1], dwords[0]); + +#if 0 + /* print unknown bits: */ + if (debug & PRINT_RAW) + printf("[%08xx_%08xx] ", dwords[1] & 0x001ff800, dwords[0] & 0x00000000); + + if (debug & PRINT_VERBOSE) + printf("%d,%02d ", instr->opc_cat, opc); +#endif + + /* NOTE: order flags are printed is a bit fugly.. but for now I + * try to match the order in llvm-a3xx disassembler for easy + * diff'ing.. + */ + + if (instr->sync) + printf("(sy)"); + if (instr->ss && (instr->opc_cat <= 4)) + printf("(ss)"); + if (instr->jmp_tgt) + printf("(jp)"); + if (instr->repeat && (instr->opc_cat <= 4)) { + printf("(rpt%d)", instr->repeat); + repeat = instr->repeat; + } else { + repeat = 0; + } + if (instr->ul && ((2 <= instr->opc_cat) && (instr->opc_cat <= 4))) + printf("(ul)"); + + name = GETINFO(instr)->name; + + if (name) { + printf("%s", name); + GETINFO(instr)->print(instr); + } else { + printf("unknown(%d,%d)", instr->opc_cat, opc); + } + + printf("\n"); + + process_reg_dst(); +} + +int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, enum shader_t type) +{ + int i; + + assert((sizedwords % 2) == 0); + + memset(®s, 0, sizeof(regs)); + + for (i = 0; i < sizedwords; i += 2) + print_instr(&dwords[i], level, i/2); + + print_reg_stats(level); + + return 0; +} diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_blend.c b/src/gallium/drivers/freedreno/a3xx/fd3_blend.c new file mode 100644 index 00000000000..395228d4589 --- /dev/null +++ b/src/gallium/drivers/freedreno/a3xx/fd3_blend.c @@ -0,0 +1,87 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2013 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#include "pipe/p_state.h" +#include "util/u_string.h" +#include "util/u_memory.h" + +#include "fd3_blend.h" +#include "fd3_context.h" +#include "fd3_util.h" + +void * +fd3_blend_state_create(struct pipe_context *pctx, + const struct pipe_blend_state *cso) +{ + struct fd3_blend_stateobj *so; + int i; + + if (cso->logicop_enable) { + DBG("Unsupported! logicop"); + return NULL; + } + + if (cso->independent_blend_enable) { + DBG("Unsupported! independent blend state"); + return NULL; + } + + so = CALLOC_STRUCT(fd3_blend_stateobj); + if (!so) + return NULL; + + so->base = *cso; + + for (i = 0; i < ARRAY_SIZE(so->rb_mrt); i++) { + const struct pipe_rt_blend_state *rt = &cso->rt[i]; + + so->rb_mrt[i].blend_control = + A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(fd_blend_factor(rt->rgb_src_factor)) | + A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(fd_blend_func(rt->rgb_func)) | + A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(fd_blend_factor(rt->rgb_dst_factor)) | + A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(fd_blend_factor(rt->alpha_src_factor)) | + A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(fd_blend_func(rt->alpha_func)) | + A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(fd_blend_factor(rt->alpha_dst_factor)) | + A3XX_RB_MRT_BLEND_CONTROL_CLAMP_ENABLE; + + so->rb_mrt[i].control = + A3XX_RB_MRT_CONTROL_ROP_CODE(12) | + A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(rt->colormask); + + if (rt->blend_enable) + so->rb_mrt[i].control |= + A3XX_RB_MRT_CONTROL_READ_DEST_ENABLE | + A3XX_RB_MRT_CONTROL_BLEND | + A3XX_RB_MRT_CONTROL_BLEND2; + + if (cso->dither) + so->rb_mrt[i].control |= A3XX_RB_MRT_CONTROL_DITHER_MODE(DITHER_ALWAYS); + } + + return so; +} diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_blend.h b/src/gallium/drivers/freedreno/a3xx/fd3_blend.h new file mode 100644 index 00000000000..d269d74dd74 --- /dev/null +++ b/src/gallium/drivers/freedreno/a3xx/fd3_blend.h @@ -0,0 +1,52 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2013 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#ifndef FD3_BLEND_H_ +#define FD3_BLEND_H_ + +#include "pipe/p_state.h" +#include "pipe/p_context.h" + +struct fd3_blend_stateobj { + struct pipe_blend_state base; + struct { + uint32_t blend_control; + uint32_t control; + } rb_mrt[4]; +}; + +static INLINE struct fd3_blend_stateobj * +fd3_blend_stateobj(struct pipe_blend_state *blend) +{ + return (struct fd3_blend_stateobj *)blend; +} + +void * fd3_blend_state_create(struct pipe_context *pctx, + const struct pipe_blend_state *cso); + +#endif /* FD3_BLEND_H_ */ diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c new file mode 100644 index 00000000000..d844cc0f164 --- /dev/null +++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c @@ -0,0 +1,1240 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2013 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#include <stdarg.h> + +#include "pipe/p_state.h" +#include "util/u_string.h" +#include "util/u_memory.h" +#include "util/u_inlines.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_ureg.h" +#include "tgsi/tgsi_info.h" +#include "tgsi/tgsi_strings.h" +#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_scan.h" + +#include "fd3_compiler.h" +#include "fd3_program.h" +#include "fd3_util.h" + +#include "instr-a3xx.h" +#include "ir-a3xx.h" + +/* ************************************************************************* */ +/* split the out or find some helper to use.. like main/bitset.h.. */ + +#define MAX_REG 256 + +typedef uint8_t regmask_t[2 * MAX_REG / 8]; + +static unsigned regmask_idx(struct ir3_register *reg) +{ + unsigned num = reg->num; + assert(num < MAX_REG); + if (reg->flags & IR3_REG_HALF) + num += MAX_REG; + return num; +} + +static void regmask_set(regmask_t regmask, struct ir3_register *reg) +{ + unsigned idx = regmask_idx(reg); + regmask[idx / 8] |= 1 << (idx % 8); +} + +static unsigned regmask_get(regmask_t regmask, struct ir3_register *reg) +{ + unsigned idx = regmask_idx(reg); + return regmask[idx / 8] & (1 << (idx % 8)); +} + +/* ************************************************************************* */ + +struct fd3_compile_context { + const struct tgsi_token *tokens; + struct ir3_shader *ir; + struct fd3_shader_stateobj *so; + + struct tgsi_parse_context parser; + unsigned type; + + struct tgsi_shader_info info; + + /* last input dst (for setting (ei) flag): */ + struct ir3_register *last_input; + + unsigned next_inloc; + unsigned num_internal_temps; + + /* track registers which need to synchronize w/ "complex alu" cat3 + * instruction pipeline: + */ + regmask_t needs_ss; + + /* track registers which need to synchronize with texture fetch + * pipeline: + */ + regmask_t needs_sy; + + /* inputs start at r0, temporaries start after last input, and + * outputs start after last temporary. + * + * We could be more clever, because this is not a hw restriction, + * but probably best just to implement an optimizing pass to + * reduce the # of registers used and get rid of redundant mov's + * (to output register). + */ + unsigned base_reg[TGSI_FILE_COUNT]; + + /* idx/slot for last compiler generated immediate */ + unsigned immediate_idx; + + /* stack of branch instructions that start (potentially nested) + * branch instructions, so that we can fix up the branch targets + * so that we can fix up the branch target on the corresponding + * END instruction + */ + struct ir3_instruction *branch[16]; + unsigned int branch_count; + + /* used when dst is same as one of the src, to avoid overwriting a + * src element before the remaining scalar instructions that make + * up the vector operation + */ + struct tgsi_dst_register tmp_dst; + struct tgsi_src_register tmp_src; +}; + +static unsigned +compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so, + const struct tgsi_token *tokens) +{ + unsigned ret; + + ctx->tokens = tokens; + ctx->ir = so->ir; + ctx->so = so; + ctx->last_input = NULL; + ctx->next_inloc = 8; + ctx->num_internal_temps = 0; + ctx->branch_count = 0; + + memset(ctx->needs_ss, 0, sizeof(ctx->needs_ss)); + memset(ctx->needs_sy, 0, sizeof(ctx->needs_sy)); + memset(ctx->base_reg, 0, sizeof(ctx->base_reg)); + + tgsi_scan_shader(tokens, &ctx->info); + + /* Immediates go after constants: */ + ctx->base_reg[TGSI_FILE_CONSTANT] = 0; + ctx->base_reg[TGSI_FILE_IMMEDIATE] = + ctx->info.file_count[TGSI_FILE_CONSTANT]; + + /* Temporaries after outputs after inputs: */ + ctx->base_reg[TGSI_FILE_INPUT] = 0; + ctx->base_reg[TGSI_FILE_OUTPUT] = + ctx->info.file_count[TGSI_FILE_INPUT]; + ctx->base_reg[TGSI_FILE_TEMPORARY] = + ctx->info.file_count[TGSI_FILE_INPUT] + + ctx->info.file_count[TGSI_FILE_OUTPUT]; + + so->first_immediate = ctx->base_reg[TGSI_FILE_IMMEDIATE]; + ctx->immediate_idx = 4 * (ctx->info.file_count[TGSI_FILE_CONSTANT] + + ctx->info.file_count[TGSI_FILE_IMMEDIATE]); + + ret = tgsi_parse_init(&ctx->parser, tokens); + if (ret != TGSI_PARSE_OK) + return ret; + + ctx->type = ctx->parser.FullHeader.Processor.Processor; + + return ret; +} + +static void +compile_free(struct fd3_compile_context *ctx) +{ + tgsi_parse_free(&ctx->parser); +} + +struct instr_translater { + void (*fxn)(const struct instr_translater *t, + struct fd3_compile_context *ctx, + struct tgsi_full_instruction *inst); + opc_t opc; + opc_t hopc; /* opc to use for half_precision mode, if different */ + unsigned arg; +}; + +static struct ir3_register * +add_dst_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr, + const struct tgsi_dst_register *dst, unsigned chan) +{ + unsigned flags = 0, num = 0; + + switch (dst->File) { + case TGSI_FILE_OUTPUT: + case TGSI_FILE_TEMPORARY: + num = dst->Index + ctx->base_reg[dst->File]; + break; + default: + DBG("unsupported dst register file: %s", + tgsi_file_name(dst->File)); + assert(0); + break; + } + + if (ctx->so->half_precision) + flags |= IR3_REG_HALF; + + return ir3_reg_create(instr, regid(num, chan), flags); +} + +static struct ir3_register * +add_src_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr, + const struct tgsi_src_register *src, unsigned chan) +{ + unsigned flags = 0, num = 0; + struct ir3_register *reg; + + switch (src->File) { + case TGSI_FILE_IMMEDIATE: + /* TODO if possible, use actual immediate instead of const.. but + * TGSI has vec4 immediates, we can only embed scalar (of limited + * size, depending on instruction..) + */ + case TGSI_FILE_CONSTANT: + flags |= IR3_REG_CONST; + num = src->Index + ctx->base_reg[src->File]; + break; + case TGSI_FILE_INPUT: + case TGSI_FILE_TEMPORARY: + num = src->Index + ctx->base_reg[src->File]; + break; + default: + DBG("unsupported src register file: %s", + tgsi_file_name(src->File)); + assert(0); + break; + } + + if (src->Absolute) + flags |= IR3_REG_ABS; + if (src->Negate) + flags |= IR3_REG_NEGATE; + if (ctx->so->half_precision) + flags |= IR3_REG_HALF; + + reg = ir3_reg_create(instr, regid(num, chan), flags); + + if (regmask_get(ctx->needs_ss, reg)) { + instr->flags |= IR3_INSTR_SS; + memset(ctx->needs_ss, 0, sizeof(ctx->needs_ss)); + } + + if (regmask_get(ctx->needs_sy, reg)) { + instr->flags |= IR3_INSTR_SY; + memset(ctx->needs_sy, 0, sizeof(ctx->needs_sy)); + } + + return reg; +} + +static void +src_from_dst(struct tgsi_src_register *src, struct tgsi_dst_register *dst) +{ + src->File = dst->File; + src->Indirect = dst->Indirect; + src->Dimension = dst->Dimension; + src->Index = dst->Index; + src->Absolute = 0; + src->Negate = 0; + src->SwizzleX = TGSI_SWIZZLE_X; + src->SwizzleY = TGSI_SWIZZLE_Y; + src->SwizzleZ = TGSI_SWIZZLE_Z; + src->SwizzleW = TGSI_SWIZZLE_W; +} + +/* Get internal-temp src/dst to use for a sequence of instructions + * generated by a single TGSI op. + */ +static void +get_internal_temp(struct fd3_compile_context *ctx, + struct tgsi_dst_register *tmp_dst, + struct tgsi_src_register *tmp_src) +{ + int n; + + tmp_dst->File = TGSI_FILE_TEMPORARY; + tmp_dst->WriteMask = TGSI_WRITEMASK_XYZW; + tmp_dst->Indirect = 0; + tmp_dst->Dimension = 0; + + /* assign next temporary: */ + n = ctx->num_internal_temps++; + + tmp_dst->Index = ctx->info.file_count[TGSI_FILE_TEMPORARY] + n; + + src_from_dst(tmp_src, tmp_dst); +} + +static void +get_immediate(struct fd3_compile_context *ctx, + struct tgsi_src_register *reg, uint32_t val) +{ + unsigned neg, swiz, idx, i; + /* actually maps 1:1 currently.. not sure if that is safe to rely on: */ + static const unsigned swiz2tgsi[] = { + TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W, + }; + + for (i = 0; i < ctx->immediate_idx; i++) { + swiz = i % 4; + idx = i / 4; + + if (ctx->so->immediates[idx].val[swiz] == val) { + neg = 0; + break; + } + + if (ctx->so->immediates[idx].val[swiz] == -val) { + neg = 1; + break; + } + } + + if (i == ctx->immediate_idx) { + /* need to generate a new immediate: */ + swiz = i % 4; + idx = i / 4; + neg = 0; + ctx->so->immediates[idx].val[swiz] = val; + ctx->so->immediates_count = idx + 1; + ctx->immediate_idx++; + } + + reg->File = TGSI_FILE_IMMEDIATE; + reg->Indirect = 0; + reg->Dimension = 0; + reg->Index = idx; + reg->Absolute = 0; + reg->Negate = neg; + reg->SwizzleX = swiz2tgsi[swiz]; + reg->SwizzleY = swiz2tgsi[swiz]; + reg->SwizzleZ = swiz2tgsi[swiz]; + reg->SwizzleW = swiz2tgsi[swiz]; +} + +static type_t +get_type(struct fd3_compile_context *ctx) +{ + return ctx->so->half_precision ? TYPE_F16 : TYPE_F32; +} + +static unsigned +src_swiz(struct tgsi_src_register *src, int chan) +{ + switch (chan) { + case 0: return src->SwizzleX; + case 1: return src->SwizzleY; + case 2: return src->SwizzleZ; + case 3: return src->SwizzleW; + } + assert(0); + return 0; +} + +static void +create_mov(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst, + struct tgsi_src_register *src) +{ + type_t type_mov = get_type(ctx); + unsigned i; + + for (i = 0; i < 4; i++) { + /* move to destination: */ + if (dst->WriteMask & (1 << i)) { + struct ir3_instruction *instr = + ir3_instr_create(ctx->ir, 1, 0); + instr->cat1.src_type = type_mov; + instr->cat1.dst_type = type_mov; + add_dst_reg(ctx, instr, dst, i); + add_src_reg(ctx, instr, src, src_swiz(src, i)); + } else { + ir3_instr_create(ctx->ir, 0, OPC_NOP); + } + } + +} + +static struct tgsi_dst_register * +get_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst) +{ + struct tgsi_dst_register *dst = &inst->Dst[0].Register; + unsigned i; + for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { + struct tgsi_src_register *src = &inst->Src[i].Register; + if ((src->File == dst->File) && (src->Index == dst->Index)) { + get_internal_temp(ctx, &ctx->tmp_dst, &ctx->tmp_src); + ctx->tmp_dst.WriteMask = dst->WriteMask; + dst = &ctx->tmp_dst; + break; + } + } + return dst; +} + +static void +put_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst, + struct tgsi_dst_register *dst) +{ + /* if necessary, add mov back into original dst: */ + if (dst != &inst->Dst[0].Register) { + create_mov(ctx, &inst->Dst[0].Register, &ctx->tmp_src); + } +} + +/* helper to generate the necessary repeat and/or additional instructions + * to turn a scalar instruction into a vector operation: + */ +static void +vectorize(struct fd3_compile_context *ctx, struct ir3_instruction *instr, + struct tgsi_dst_register *dst, int nsrcs, ...) +{ + va_list ap; + int i, j, n = 0; + + add_dst_reg(ctx, instr, dst, 0); + + va_start(ap, nsrcs); + for (j = 0; j < nsrcs; j++) { + struct tgsi_src_register *src = + va_arg(ap, struct tgsi_src_register *); + unsigned flags = va_arg(ap, unsigned); + add_src_reg(ctx, instr, src, 0)->flags |= flags; + } + va_end(ap); + + for (i = 0; i < 4; i++) { + if (dst->WriteMask & (1 << i)) { + struct ir3_instruction *cur; + + if (n++ == 0) { + cur = instr; + } else { + cur = ir3_instr_clone(instr); + cur->flags &= ~(IR3_INSTR_SY | IR3_INSTR_SS | IR3_INSTR_JP); + } + + /* fix-up dst register component: */ + cur->regs[0]->num = regid(cur->regs[0]->num >> 2, i); + + /* fix-up src register component: */ + va_start(ap, nsrcs); + for (j = 0; j < nsrcs; j++) { + struct tgsi_src_register *src = + va_arg(ap, struct tgsi_src_register *); + (void)va_arg(ap, unsigned); + cur->regs[j+1]->num = + regid(cur->regs[j+1]->num >> 2, + src_swiz(src, i)); + } + va_end(ap); + } + } + + /* pad w/ nop's.. at least until we are clever enough to + * figure out if we really need to.. + */ + for (; n < 4; n++) { + ir3_instr_create(instr->shader, 0, OPC_NOP); + } +} + +/* + * Handlers for TGSI instructions which do not have a 1:1 mapping to + * native instructions: + */ + +static void +trans_dotp(const struct instr_translater *t, + struct fd3_compile_context *ctx, + struct tgsi_full_instruction *inst) +{ + struct ir3_instruction *instr; + struct tgsi_dst_register tmp_dst; + struct tgsi_src_register tmp_src; + struct tgsi_dst_register *dst = &inst->Dst[0].Register; + struct tgsi_src_register *src0 = &inst->Src[0].Register; + struct tgsi_src_register *src1 = &inst->Src[1].Register; + unsigned swiz0[] = { src0->SwizzleX, src0->SwizzleY, src0->SwizzleZ, src0->SwizzleW }; + unsigned swiz1[] = { src1->SwizzleX, src1->SwizzleY, src1->SwizzleZ, src1->SwizzleW }; + opc_t opc_mad = ctx->so->half_precision ? OPC_MAD_F16 : OPC_MAD_F32; + unsigned i; + + assert(inst->Instruction.NumSrcRegs == 2); + assert(inst->Instruction.NumDstRegs == 1); + + get_internal_temp(ctx, &tmp_dst, &tmp_src); + + /* Blob compiler never seems to use a const in src1 position for + * mad.*, although there does seem (according to disassembler + * hidden in libllvm-a3xx.so) to be a bit to indicate that src1 + * is a const. Not sure if this is a hw bug, or simply that the + * disassembler lies. + */ + if ((src1->File == TGSI_FILE_IMMEDIATE) || + (src1->File == TGSI_FILE_CONSTANT)) { + + /* the mov to tmp unswizzles src1, so now we have tmp.xyzw: + */ + for (i = 0; i < 4; i++) + swiz1[i] = i; + + /* the first mul.f will clobber tmp.x, but that is ok + * because after that point we no longer need tmp.x: + */ + create_mov(ctx, &tmp_dst, src1); + src1 = &tmp_src; + } + + instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F); + add_dst_reg(ctx, instr, &tmp_dst, 0); + add_src_reg(ctx, instr, src0, swiz0[0]); + add_src_reg(ctx, instr, src1, swiz1[0]); + + for (i = 1; i < t->arg; i++) { + ir3_instr_create(ctx->ir, 0, OPC_NOP); + + instr = ir3_instr_create(ctx->ir, 3, opc_mad); + add_dst_reg(ctx, instr, &tmp_dst, 0); + add_src_reg(ctx, instr, src0, swiz0[i]); + add_src_reg(ctx, instr, src1, swiz1[i]); + add_src_reg(ctx, instr, &tmp_src, 0); + } + + ir3_instr_create(ctx->ir, 0, OPC_NOP); + + /* pad out to multiple of 4 scalar instructions: */ + for (i = 2 * t->arg; i % 4; i++) { + ir3_instr_create(ctx->ir, 0, OPC_NOP); + } + + create_mov(ctx, dst, &tmp_src); +} + +/* LRP(a,b,c) = (a * b) + ((1 - a) * c) */ +static void +trans_lrp(const struct instr_translater *t, + struct fd3_compile_context *ctx, + struct tgsi_full_instruction *inst) +{ + struct ir3_instruction *instr; + struct tgsi_dst_register tmp_dst1, tmp_dst2; + struct tgsi_src_register tmp_src1, tmp_src2; + struct tgsi_src_register tmp_const; + + get_internal_temp(ctx, &tmp_dst1, &tmp_src1); + get_internal_temp(ctx, &tmp_dst2, &tmp_src2); + + get_immediate(ctx, &tmp_const, fui(1.0)); + + /* tmp1 = (a * b) */ + instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F); + vectorize(ctx, instr, &tmp_dst1, 2, + &inst->Src[0].Register, 0, + &inst->Src[1].Register, 0); + + /* tmp2 = (1 - a) */ + instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F); + vectorize(ctx, instr, &tmp_dst2, 2, + &tmp_const, 0, + &inst->Src[0].Register, IR3_REG_NEGATE); + + /* tmp2 = tmp2 * c */ + instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F); + vectorize(ctx, instr, &tmp_dst2, 2, + &tmp_src2, 0, + &inst->Src[2].Register, 0); + + /* dst = tmp1 + tmp2 */ + instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F); + vectorize(ctx, instr, &inst->Dst[0].Register, 2, + &tmp_src1, 0, + &tmp_src2, 0); +} + +/* FRC(x) = x - FLOOR(x) */ +static void +trans_frac(const struct instr_translater *t, + struct fd3_compile_context *ctx, + struct tgsi_full_instruction *inst) +{ + struct ir3_instruction *instr; + struct tgsi_dst_register tmp_dst; + struct tgsi_src_register tmp_src; + + get_internal_temp(ctx, &tmp_dst, &tmp_src); + + /* tmp = FLOOR(x) */ + instr = ir3_instr_create(ctx->ir, 2, OPC_FLOOR_F); + vectorize(ctx, instr, &tmp_dst, 1, + &inst->Src[0].Register, 0); + + /* dst = x - tmp */ + instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F); + vectorize(ctx, instr, &inst->Dst[0].Register, 2, + &inst->Src[0].Register, 0, + &tmp_src, IR3_REG_NEGATE); +} + +/* POW(a,b) = EXP2(b * LOG2(a)) */ +static void +trans_pow(const struct instr_translater *t, + struct fd3_compile_context *ctx, + struct tgsi_full_instruction *inst) +{ + struct ir3_instruction *instr; + struct ir3_register *r; + struct tgsi_dst_register tmp_dst; + struct tgsi_src_register tmp_src; + struct tgsi_dst_register *dst = &inst->Dst[0].Register; + struct tgsi_src_register *src0 = &inst->Src[0].Register; + struct tgsi_src_register *src1 = &inst->Src[1].Register; + + assert(inst->Instruction.NumSrcRegs == 2); + assert(inst->Instruction.NumDstRegs == 1); + + get_internal_temp(ctx, &tmp_dst, &tmp_src); + + /* log2 Rtmp, Rsrc0 */ + ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 5; + instr = ir3_instr_create(ctx->ir, 4, OPC_LOG2); + r = add_dst_reg(ctx, instr, &tmp_dst, 0); + add_src_reg(ctx, instr, src0, src0->SwizzleX); + regmask_set(ctx->needs_ss, r); + + /* mul.f Rtmp, Rtmp, Rsrc1 */ + instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F); + add_dst_reg(ctx, instr, &tmp_dst, 0); + add_src_reg(ctx, instr, &tmp_src, 0); + add_src_reg(ctx, instr, src1, src1->SwizzleX); + + /* blob compiler seems to ensure there are at least 6 instructions + * between a "simple" (non-cat4) instruction and a dependent cat4.. + * probably we need to handle this in some other places too. + */ + ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 5; + + /* exp2 Rdst, Rtmp */ + instr = ir3_instr_create(ctx->ir, 4, OPC_EXP2); + r = add_dst_reg(ctx, instr, &tmp_dst, 0); + add_src_reg(ctx, instr, &tmp_src, 0); + regmask_set(ctx->needs_ss, r); + + create_mov(ctx, dst, &tmp_src); +} + +/* texture fetch/sample instructions: */ +static void +trans_samp(const struct instr_translater *t, + struct fd3_compile_context *ctx, + struct tgsi_full_instruction *inst) +{ + struct ir3_register *r; + struct ir3_instruction *instr; + struct tgsi_dst_register tmp_dst; + struct tgsi_src_register tmp_src; + struct tgsi_src_register *coord = &inst->Src[0].Register; + struct tgsi_src_register *samp = &inst->Src[1].Register; + unsigned tex = inst->Texture.Texture; + int8_t *order; + unsigned i, j, flags = 0; + + switch (t->arg) { + case TGSI_OPCODE_TEX: + order = (tex == TGSI_TEXTURE_2D) ? + (int8_t[4]){ 0, 1, -1, -1 } : /* 2D */ + (int8_t[4]){ 0, 1, 2, -1 }; /* 3D */ + break; + case TGSI_OPCODE_TXP: + order = (tex == TGSI_TEXTURE_2D) ? + (int8_t[4]){ 0, 1, 3, -1 } : /* 2D */ + (int8_t[4]){ 0, 1, 2, 3 }; /* 3D */ + flags |= IR3_INSTR_P; + break; + default: + assert(0); + break; + } + + if (tex == TGSI_TEXTURE_3D) + flags |= IR3_INSTR_3D; + + assert(inst->Instruction.NumSrcRegs == 2); + assert(inst->Instruction.NumDstRegs == 1); + + /* The texture sample instructions need to coord in successive + * registers/components (ie. src.xy but not src.yx). And TXP + * needs the .w component in .z for 2D.. so in some cases we + * might need to emit some mov instructions to shuffle things + * around: + */ + for (i = 1; (i < 4) && (order[i] >= 0); i++) { + if (src_swiz(coord, 0) != (src_swiz(coord, i) + order[i])) { + type_t type_mov = get_type(ctx); + + /* need to move things around: */ + get_internal_temp(ctx, &tmp_dst, &tmp_src); + + for (j = 0; (j < 4) && (order[j] >= 0); j++) { + instr = ir3_instr_create(ctx->ir, 1, 0); + instr->cat1.src_type = type_mov; + instr->cat1.dst_type = type_mov; + add_dst_reg(ctx, instr, &tmp_dst, j); + add_src_reg(ctx, instr, coord, + src_swiz(coord, order[j])); + } + + coord = &tmp_src; + + if (j < 4) + ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 4 - j - 1; + + break; + } + } + + instr = ir3_instr_create(ctx->ir, 5, t->opc); + instr->cat5.type = get_type(ctx); + instr->cat5.samp = samp->Index; + instr->cat5.tex = samp->Index; + instr->flags |= flags; + + r = add_dst_reg(ctx, instr, &inst->Dst[0].Register, 0); + r->wrmask = inst->Dst[0].Register.WriteMask; + + add_src_reg(ctx, instr, coord, coord->SwizzleX); + + regmask_set(ctx->needs_sy, r); +} + +/* CMP(a,b,c) = (a < 0) ? b : c */ +static void +trans_cmp(const struct instr_translater *t, + struct fd3_compile_context *ctx, + struct tgsi_full_instruction *inst) +{ + struct ir3_instruction *instr; + struct tgsi_dst_register tmp_dst; + struct tgsi_src_register tmp_src; + struct tgsi_src_register constval; + /* final instruction uses original src1 and src2, so we need get_dst() */ + struct tgsi_dst_register *dst = get_dst(ctx, inst); + + get_internal_temp(ctx, &tmp_dst, &tmp_src); + + /* cmps.f.ge tmp, src0, 0.0 */ + instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F); + instr->cat2.condition = IR3_COND_GE; + get_immediate(ctx, &constval, fui(0.0)); + vectorize(ctx, instr, &tmp_dst, 2, + &inst->Src[0].Register, 0, + &constval, 0); + + /* add.s tmp, tmp, -1 */ + instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_S); + instr->repeat = 3; + add_dst_reg(ctx, instr, &tmp_dst, 0); + add_src_reg(ctx, instr, &tmp_src, 0); + ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = -1; + + /* sel.{f32,f16} dst, src2, tmp, src1 */ + instr = ir3_instr_create(ctx->ir, 3, ctx->so->half_precision ? + OPC_SEL_F16 : OPC_SEL_F32); + vectorize(ctx, instr, &inst->Dst[0].Register, 3, + &inst->Src[2].Register, 0, + &tmp_src, 0, + &inst->Src[1].Register, 0); + + put_dst(ctx, inst, dst); +} + +/* + * Conditional / Flow control + */ + +static unsigned +find_instruction(struct fd3_compile_context *ctx, struct ir3_instruction *instr) +{ + unsigned i; + for (i = 0; i < ctx->ir->instrs_count; i++) + if (ctx->ir->instrs[i] == instr) + return i; + return ~0; +} + +static void +push_branch(struct fd3_compile_context *ctx, struct ir3_instruction *instr) +{ + ctx->branch[ctx->branch_count++] = instr; +} + +static void +pop_branch(struct fd3_compile_context *ctx) +{ + struct ir3_instruction *instr; + + /* if we were clever enough, we'd patch this up after the fact, + * and set (jp) flag on whatever the next instruction was, rather + * than inserting an extra nop.. + */ + instr = ir3_instr_create(ctx->ir, 0, OPC_NOP); + instr->flags |= IR3_INSTR_JP; + + /* pop the branch instruction from the stack and fix up branch target: */ + instr = ctx->branch[--ctx->branch_count]; + instr->cat0.immed = ctx->ir->instrs_count - find_instruction(ctx, instr) - 1; +} + +/* We probably don't really want to translate if/else/endif into branches.. + * the blob driver evaluates both legs of the if and then uses the sel + * instruction to pick which sides of the branch to "keep".. but figuring + * that out will take somewhat more compiler smarts. So hopefully branches + * don't kill performance too badly. + */ +static void +trans_if(const struct instr_translater *t, + struct fd3_compile_context *ctx, + struct tgsi_full_instruction *inst) +{ + struct ir3_instruction *instr; + struct tgsi_src_register *src = &inst->Src[0].Register; + struct tgsi_src_register constval; + + get_immediate(ctx, &constval, fui(0.0)); + + instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F); + ir3_reg_create(instr, regid(REG_P0, 0), 0); + add_src_reg(ctx, instr, &constval, constval.SwizzleX); + add_src_reg(ctx, instr, src, src->SwizzleX); + instr->cat2.condition = IR3_COND_EQ; + + instr = ir3_instr_create(ctx->ir, 0, OPC_BR); + push_branch(ctx, instr); +} + +static void +trans_else(const struct instr_translater *t, + struct fd3_compile_context *ctx, + struct tgsi_full_instruction *inst) +{ + struct ir3_instruction *instr; + + /* for first half of if/else/endif, generate a jump past the else: */ + instr = ir3_instr_create(ctx->ir, 0, OPC_JUMP); + + pop_branch(ctx); + push_branch(ctx, instr); +} + +static void +trans_endif(const struct instr_translater *t, + struct fd3_compile_context *ctx, + struct tgsi_full_instruction *inst) +{ + pop_branch(ctx); +} + +/* + * Handlers for TGSI instructions which do have 1:1 mapping to native + * instructions: + */ + +static void +instr_cat0(const struct instr_translater *t, + struct fd3_compile_context *ctx, + struct tgsi_full_instruction *inst) +{ + assert(inst->Instruction.NumSrcRegs == 0); + assert(inst->Instruction.NumDstRegs == 0); + + ir3_instr_create(ctx->ir, 0, t->opc); +} + +static void +instr_cat1(const struct instr_translater *t, + struct fd3_compile_context *ctx, + struct tgsi_full_instruction *inst) +{ + struct tgsi_dst_register *dst = get_dst(ctx, inst); + struct tgsi_src_register *src = &inst->Src[0].Register; + + assert(inst->Instruction.NumSrcRegs == 1); + assert(inst->Instruction.NumDstRegs == 1); + + /* mov instructions can't handle a negate on src: */ + if (src->Negate) { + struct tgsi_src_register constval; + struct ir3_instruction *instr; + + /* since right now, we are using uniformly either TYPE_F16 or + * TYPE_F32, and we don't utilize the conversion possibilities + * of mov instructions, we can get away with substituting an + * add.f which can handle negate. Might need to revisit this + * in the future if we start supporting widening/narrowing or + * conversion to/from integer.. + */ + instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F); + get_immediate(ctx, &constval, fui(0.0)); + vectorize(ctx, instr, dst, 2, src, 0, &constval, 0); + } else { + create_mov(ctx, dst, src); + /* create_mov() generates vector sequence, so no vectorize() */ + } + put_dst(ctx, inst, dst); +} + +static void +instr_cat2(const struct instr_translater *t, + struct fd3_compile_context *ctx, + struct tgsi_full_instruction *inst) +{ + struct tgsi_dst_register *dst = get_dst(ctx, inst); + struct ir3_instruction *instr; + + assert(inst->Instruction.NumSrcRegs == 2); + assert(inst->Instruction.NumDstRegs == 1); + + instr = ir3_instr_create(ctx->ir, 2, t->opc); + instr->cat2.condition = t->arg; + + switch (t->opc) { + case OPC_ABSNEG_F: + case OPC_ABSNEG_S: + case OPC_CLZ_B: + case OPC_CLZ_S: + case OPC_SIGN_F: + case OPC_FLOOR_F: + case OPC_CEIL_F: + case OPC_RNDNE_F: + case OPC_RNDAZ_F: + case OPC_TRUNC_F: + case OPC_NOT_B: + case OPC_BFREV_B: + case OPC_SETRM: + case OPC_CBITS_B: + /* these only have one src reg */ + vectorize(ctx, instr, dst, 1, + &inst->Src[0].Register, 0); + break; + default: + vectorize(ctx, instr, dst, 2, + &inst->Src[0].Register, 0, + &inst->Src[1].Register, 0); + break; + } + + put_dst(ctx, inst, dst); +} + +static void +instr_cat3(const struct instr_translater *t, + struct fd3_compile_context *ctx, + struct tgsi_full_instruction *inst) +{ + struct tgsi_dst_register *dst = get_dst(ctx, inst); + struct tgsi_src_register *src1 = &inst->Src[1].Register; + struct tgsi_dst_register tmp_dst; + struct tgsi_src_register tmp_src; + struct ir3_instruction *instr; + + assert(inst->Instruction.NumSrcRegs == 3); + assert(inst->Instruction.NumDstRegs == 1); + + /* Blob compiler never seems to use a const in src1 position.. + * although there does seem (according to disassembler hidden + * in libllvm-a3xx.so) to be a bit to indicate that src1 is a + * const. Not sure if this is a hw bug, or simply that the + * disassembler lies. + */ + if ((src1->File == TGSI_FILE_CONSTANT) || + (src1->File == TGSI_FILE_IMMEDIATE)) { + get_internal_temp(ctx, &tmp_dst, &tmp_src); + create_mov(ctx, &tmp_dst, src1); + src1 = &tmp_src; + } + + instr = ir3_instr_create(ctx->ir, 3, + ctx->so->half_precision ? t->hopc : t->opc); + vectorize(ctx, instr, dst, 3, + &inst->Src[0].Register, 0, + src1, 0, + &inst->Src[2].Register, 0); + put_dst(ctx, inst, dst); +} + +static void +instr_cat4(const struct instr_translater *t, + struct fd3_compile_context *ctx, + struct tgsi_full_instruction *inst) +{ + struct tgsi_dst_register *dst = get_dst(ctx, inst); + struct ir3_instruction *instr; + + assert(inst->Instruction.NumSrcRegs == 1); + assert(inst->Instruction.NumDstRegs == 1); + + ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 5; + instr = ir3_instr_create(ctx->ir, 4, t->opc); + + vectorize(ctx, instr, dst, 1, + &inst->Src[0].Register, 0); + + regmask_set(ctx->needs_ss, instr->regs[0]); + + put_dst(ctx, inst, dst); +} + +static const struct instr_translater translaters[TGSI_OPCODE_LAST] = { +#define INSTR(n, f, ...) \ + [TGSI_OPCODE_ ## n] = { .fxn = (f), ##__VA_ARGS__ } + + INSTR(MOV, instr_cat1), + INSTR(RCP, instr_cat4, .opc = OPC_RCP), + INSTR(RSQ, instr_cat4, .opc = OPC_RSQ), + INSTR(SQRT, instr_cat4, .opc = OPC_SQRT), + INSTR(MUL, instr_cat2, .opc = OPC_MUL_F), + INSTR(ADD, instr_cat2, .opc = OPC_ADD_F), + INSTR(DP2, trans_dotp, .arg = 2), + INSTR(DP3, trans_dotp, .arg = 3), + INSTR(DP4, trans_dotp, .arg = 4), + INSTR(MIN, instr_cat2, .opc = OPC_MIN_F), + INSTR(MAX, instr_cat2, .opc = OPC_MAX_F), + INSTR(SLT, instr_cat2, .opc = OPC_CMPS_F, .arg = IR3_COND_LT), + INSTR(SGE, instr_cat2, .opc = OPC_CMPS_F, .arg = IR3_COND_GE), + INSTR(MAD, instr_cat3, .opc = OPC_MAD_F32, .hopc = OPC_MAD_F16), + INSTR(LRP, trans_lrp), + INSTR(FRC, trans_frac), + INSTR(FLR, instr_cat2, .opc = OPC_FLOOR_F), + INSTR(EX2, instr_cat4, .opc = OPC_EXP2), + INSTR(LG2, instr_cat4, .opc = OPC_LOG2), + INSTR(POW, trans_pow), + INSTR(COS, instr_cat4, .opc = OPC_SIN), + INSTR(SIN, instr_cat4, .opc = OPC_COS), + INSTR(TEX, trans_samp, .opc = OPC_SAM, .arg = TGSI_OPCODE_TEX), + INSTR(TXP, trans_samp, .opc = OPC_SAM, .arg = TGSI_OPCODE_TXP), + INSTR(CMP, trans_cmp), + INSTR(IF, trans_if), + INSTR(ELSE, trans_else), + INSTR(ENDIF, trans_endif), + INSTR(END, instr_cat0, .opc = OPC_END), +}; + +static void +decl_in(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl) +{ + struct fd3_shader_stateobj *so = ctx->so; + unsigned base = ctx->base_reg[TGSI_FILE_INPUT]; + unsigned i, flags = 0; + + if (ctx->so->half_precision) + flags |= IR3_REG_HALF; + + for (i = decl->Range.First; i <= decl->Range.Last; i++) { + unsigned n = so->inputs_count++; + unsigned r = regid(i + base, 0); + unsigned ncomp; + + /* TODO use ctx->info.input_usage_mask[decl->Range.n] to figure out ncomp: */ + ncomp = 4; + + DBG("decl in -> r%d", i + base); // XXX + + so->inputs[n].compmask = (1 << ncomp) - 1; + so->inputs[n].regid = r; + so->inputs[n].inloc = ctx->next_inloc; + ctx->next_inloc += ncomp; + + so->total_in += ncomp; + + /* for frag shaders, we need to generate the corresponding bary instr: */ + if (ctx->type == TGSI_PROCESSOR_FRAGMENT) { + struct ir3_instruction *instr; + + instr = ir3_instr_create(ctx->ir, 2, OPC_BARY_F); + instr->repeat = ncomp - 1; + + /* dst register: */ + ctx->last_input = ir3_reg_create(instr, r, flags); + + /* input position: */ + ir3_reg_create(instr, 0, IR3_REG_IMMED | IR3_REG_R)->iim_val = + so->inputs[n].inloc - 8; + + /* input base (always r0.x): */ + ir3_reg_create(instr, regid(0,0), 0); + } + } +} + +static void +decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl) +{ + struct fd3_shader_stateobj *so = ctx->so; + unsigned base = ctx->base_reg[TGSI_FILE_OUTPUT]; + unsigned name = decl->Semantic.Name; + unsigned i; + + assert(decl->Declaration.Semantic); // TODO is this ever not true? + + DBG("decl out[%d] -> r%d", name, decl->Range.First + base); // XXX + + if (ctx->type == TGSI_PROCESSOR_VERTEX) { + switch (name) { + case TGSI_SEMANTIC_POSITION: + so->pos_regid = regid(decl->Range.First + base, 0); + break; + case TGSI_SEMANTIC_PSIZE: + so->psize_regid = regid(decl->Range.First + base, 0); + break; + case TGSI_SEMANTIC_COLOR: + case TGSI_SEMANTIC_GENERIC: + for (i = decl->Range.First; i <= decl->Range.Last; i++) + so->outputs[so->outputs_count++].regid = regid(i + base, 0); + break; + default: + DBG("unknown VS semantic name: %s", + tgsi_semantic_names[name]); + assert(0); + } + } else { + switch (name) { + case TGSI_SEMANTIC_COLOR: + so->color_regid = regid(decl->Range.First + base, 0); + break; + default: + DBG("unknown VS semantic name: %s", + tgsi_semantic_names[name]); + assert(0); + } + } +} + +static void +decl_samp(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl) +{ + ctx->so->samplers_count++; +} + +static void +compile_instructions(struct fd3_compile_context *ctx) +{ + struct ir3_shader *ir = ctx->ir; + + while (!tgsi_parse_end_of_tokens(&ctx->parser)) { + tgsi_parse_token(&ctx->parser); + + switch (ctx->parser.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_DECLARATION: { + struct tgsi_full_declaration *decl = + &ctx->parser.FullToken.FullDeclaration; + if (decl->Declaration.File == TGSI_FILE_OUTPUT) { + decl_out(ctx, decl); + } else if (decl->Declaration.File == TGSI_FILE_INPUT) { + decl_in(ctx, decl); + } else if (decl->Declaration.File == TGSI_FILE_SAMPLER) { + decl_samp(ctx, decl); + } + break; + } + case TGSI_TOKEN_TYPE_IMMEDIATE: { + /* TODO: if we know the immediate is small enough, and only + * used with instructions that can embed an immediate, we + * can skip this: + */ + struct tgsi_full_immediate *imm = + &ctx->parser.FullToken.FullImmediate; + unsigned n = ctx->so->immediates_count++; + memcpy(ctx->so->immediates[n].val, imm->u, 16); + break; + } + case TGSI_TOKEN_TYPE_INSTRUCTION: { + struct tgsi_full_instruction *inst = + &ctx->parser.FullToken.FullInstruction; + unsigned opc = inst->Instruction.Opcode; + const struct instr_translater *t = &translaters[opc]; + + if (t->fxn) { + t->fxn(t, ctx, inst); + ctx->num_internal_temps = 0; + } else { + debug_printf("unknown TGSI opc: %s\n", + tgsi_get_opcode_name(opc)); + tgsi_dump(ctx->tokens, 0); + assert(0); + } + + break; + } + default: + break; + } + } + + if (ir->instrs_count > 0) + ir->instrs[0]->flags |= IR3_INSTR_SS | IR3_INSTR_SY; + + if (ctx->last_input) + ctx->last_input->flags |= IR3_REG_EI; +} + +int +fd3_compile_shader(struct fd3_shader_stateobj *so, + const struct tgsi_token *tokens) +{ + struct fd3_compile_context ctx; + + assert(!so->ir); + + so->ir = ir3_shader_create(); + + so->color_regid = regid(63,0); + so->pos_regid = regid(63,0); + so->psize_regid = regid(63,0); + + if (compile_init(&ctx, so, tokens) != TGSI_PARSE_OK) + return -1; + + compile_instructions(&ctx); + + compile_free(&ctx); + + return 0; +} diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.h b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.h new file mode 100644 index 00000000000..1116f598a58 --- /dev/null +++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.h @@ -0,0 +1,38 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2013 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#ifndef FD3_COMPILER_H_ +#define FD3_COMPILER_H_ + +#include "fd3_program.h" +#include "fd3_util.h" + +int fd3_compile_shader(struct fd3_shader_stateobj *so, + const struct tgsi_token *tokens); + +#endif /* FD3_COMPILER_H_ */ diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_context.c b/src/gallium/drivers/freedreno/a3xx/fd3_context.c new file mode 100644 index 00000000000..3ae9b2953e4 --- /dev/null +++ b/src/gallium/drivers/freedreno/a3xx/fd3_context.c @@ -0,0 +1,118 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2013 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + + +#include "fd3_context.h" +#include "fd3_blend.h" +#include "fd3_draw.h" +#include "fd3_emit.h" +#include "fd3_gmem.h" +#include "fd3_program.h" +#include "fd3_rasterizer.h" +#include "fd3_texture.h" +#include "fd3_zsa.h" + +static void +fd3_context_destroy(struct pipe_context *pctx) +{ + fd3_prog_fini(pctx); + fd_context_destroy(pctx); +} + +/* TODO we could combine a few of these small buffers (solid_vbuf, + * blit_texcoord_vbuf, and vsc_size_mem, into a single buffer and + * save a tiny bit of memory + */ + +static struct pipe_resource * +create_solid_vertexbuf(struct pipe_context *pctx) +{ + static const float init_shader_const[] = { + -1.000000, +1.000000, +1.000000, + +1.000000, -1.000000, +1.000000, + }; + struct pipe_resource *prsc = pipe_buffer_create(pctx->screen, + PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, sizeof(init_shader_const)); + pipe_buffer_write(pctx, prsc, 0, + sizeof(init_shader_const), init_shader_const); + return prsc; +} + +static struct pipe_resource * +create_blit_texcoord_vertexbuf(struct pipe_context *pctx) +{ + struct pipe_resource *prsc = pipe_buffer_create(pctx->screen, + PIPE_BIND_CUSTOM, PIPE_USAGE_DYNAMIC, 16); + return prsc; +} + +struct pipe_context * +fd3_context_create(struct pipe_screen *pscreen, void *priv) +{ + struct fd_screen *screen = fd_screen(pscreen); + struct fd3_context *fd3_ctx = CALLOC_STRUCT(fd3_context); + struct pipe_context *pctx; + + if (!fd3_ctx) + return NULL; + + pctx = &fd3_ctx->base.base; + + fd3_ctx->base.screen = fd_screen(pscreen); + + pctx->destroy = fd3_context_destroy; + pctx->create_blend_state = fd3_blend_state_create; + pctx->create_rasterizer_state = fd3_rasterizer_state_create; + pctx->create_depth_stencil_alpha_state = fd3_zsa_state_create; + + fd3_draw_init(pctx); + fd3_gmem_init(pctx); + fd3_texture_init(pctx); + fd3_prog_init(pctx); + + pctx = fd_context_init(&fd3_ctx->base, pscreen, priv); + if (!pctx) + return NULL; + + fd3_ctx->vs_pvt_mem = fd_bo_new(screen->dev, 0x2000, + DRM_FREEDRENO_GEM_TYPE_KMEM); + + fd3_ctx->fs_pvt_mem = fd_bo_new(screen->dev, 0x2000, + DRM_FREEDRENO_GEM_TYPE_KMEM); + + fd3_ctx->vsc_size_mem = fd_bo_new(screen->dev, 0x1000, + DRM_FREEDRENO_GEM_TYPE_KMEM); + + fd3_ctx->vsc_pipe_mem = fd_bo_new(screen->dev, 0x40000, + DRM_FREEDRENO_GEM_TYPE_KMEM); + + fd3_ctx->solid_vbuf = create_solid_vertexbuf(pctx); + fd3_ctx->blit_texcoord_vbuf = create_blit_texcoord_vertexbuf(pctx); + + return pctx; +} diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_context.h b/src/gallium/drivers/freedreno/a3xx/fd3_context.h new file mode 100644 index 00000000000..3829ab52675 --- /dev/null +++ b/src/gallium/drivers/freedreno/a3xx/fd3_context.h @@ -0,0 +1,68 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2013 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#ifndef FD3_CONTEXT_H_ +#define FD3_CONTEXT_H_ + +#include "freedreno_drmif.h" + +#include "freedreno_context.h" + +struct fd3_context { + struct fd_context base; + + struct fd_bo *vs_pvt_mem, *fs_pvt_mem; + + /* not sure how big this actually needs to be.. the blob driver + * combines it w/ the solid_vertexbuf, we could probably do the + * same to save an extra bo allocation.. + */ + struct fd_bo *vsc_size_mem; + + struct fd_bo *vsc_pipe_mem; + + /* vertex buf used for clear/gmem->mem vertices, and mem->gmem + * vertices: + */ + struct pipe_resource *solid_vbuf; + + /* vertex buf used for mem->gmem tex coords: + */ + struct pipe_resource *blit_texcoord_vbuf; +}; + +static INLINE struct fd3_context * +fd3_context(struct fd_context *ctx) +{ + return (struct fd3_context *)ctx; +} + +struct pipe_context * +fd3_context_create(struct pipe_screen *pscreen, void *priv); + +#endif /* FD3_CONTEXT_H_ */ diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c new file mode 100644 index 00000000000..953d45e1738 --- /dev/null +++ b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c @@ -0,0 +1,236 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2013 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#include "pipe/p_state.h" +#include "util/u_string.h" +#include "util/u_memory.h" +#include "util/u_prim.h" + +#include "freedreno_state.h" +#include "freedreno_resource.h" + +#include "fd3_draw.h" +#include "fd3_context.h" +#include "fd3_emit.h" +#include "fd3_program.h" +#include "fd3_util.h" +#include "fd3_zsa.h" + + +static void +emit_vertexbufs(struct fd_context *ctx) +{ + struct fd_vertex_stateobj *vtx = ctx->vtx; + struct fd_vertexbuf_stateobj *vertexbuf = &ctx->vertexbuf; + struct fd3_vertex_buf bufs[PIPE_MAX_ATTRIBS]; + unsigned i; + + if (!vtx->num_elements) + return; + + for (i = 0; i < vtx->num_elements; i++) { + struct pipe_vertex_element *elem = &vtx->pipe[i]; + struct pipe_vertex_buffer *vb = + &vertexbuf->vb[elem->vertex_buffer_index]; + bufs[i].offset = vb->buffer_offset + elem->src_offset; + bufs[i].stride = vb->stride; + bufs[i].prsc = vb->buffer; + bufs[i].format = elem->src_format; + } + + fd3_emit_vertex_bufs(ctx->ring, &ctx->prog, bufs, vtx->num_elements); +} + +static void +fd3_draw(struct fd_context *ctx, const struct pipe_draw_info *info) +{ + struct fd_ringbuffer *ring = ctx->ring; + unsigned dirty = ctx->dirty; + + fd3_emit_state(ctx, dirty); + + if (dirty & FD_DIRTY_VTXBUF) + emit_vertexbufs(ctx); + + OUT_PKT0(ring, REG_A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, 1); + OUT_RING(ring, 0x0000000b); /* PC_VERTEX_REUSE_BLOCK_CNTL */ + + OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1); + OUT_RING(ring, 0x0000000); + + OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4); + OUT_RING(ring, info->min_index); /* VFD_INDEX_MIN */ + OUT_RING(ring, info->max_index + 1); /* VFD_INDEX_MAX */ + OUT_RING(ring, info->start_instance); /* VFD_INSTANCEID_OFFSET */ + OUT_RING(ring, info->start); /* VFD_INDEX_OFFSET */ + + OUT_PKT0(ring, REG_A3XX_PC_RESTART_INDEX, 1); + OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */ + info->restart_index : 0xffffffff); + + fd_draw_emit(ctx, info); +} + +static void +fd3_clear(struct fd_context *ctx, unsigned buffers, + const union pipe_color_union *color, double depth, unsigned stencil) +{ + struct fd3_context *fd3_ctx = fd3_context(ctx); + struct fd_ringbuffer *ring = ctx->ring; + unsigned ce, i; + + /* emit generic state now: */ + fd3_emit_state(ctx, ctx->dirty & (FD_DIRTY_VIEWPORT | + FD_DIRTY_FRAMEBUFFER | FD_DIRTY_SCISSOR)); + + OUT_PKT0(ring, REG_A3XX_RB_BLEND_ALPHA, 1); + OUT_RING(ring, 0X3c0000ff); + + fd3_emit_rbrc_draw_state(ring, + A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER)); + + if (buffers & PIPE_CLEAR_DEPTH) { + OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1); + OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE | + A3XX_RB_DEPTH_CONTROL_Z_ENABLE | + A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_ALWAYS)); + + OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_ZOFFSET, 2); + OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(0.0)); + OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(depth)); + } else { + OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1); + OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER)); + } + + if (buffers & PIPE_CLEAR_STENCIL) { + OUT_PKT0(ring, REG_A3XX_RB_STENCILREFMASK, 2); + OUT_RING(ring, A3XX_RB_STENCILREFMASK_STENCILREF(stencil) | + A3XX_RB_STENCILREFMASK_STENCILMASK(stencil) | + A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff)); + OUT_RING(ring, A3XX_RB_STENCILREFMASK_STENCILREF(0) | + A3XX_RB_STENCILREFMASK_STENCILMASK(0) | + 0xff000000 | // XXX ??? + A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff)); + + OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1); + OUT_RING(ring, A3XX_RB_STENCIL_CONTROL_STENCIL_ENABLE | + A3XX_RB_STENCIL_CONTROL_FUNC(FUNC_ALWAYS) | + A3XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) | + A3XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_REPLACE) | + A3XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) | + A3XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) | + A3XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) | + A3XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) | + A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP)); + } else { + OUT_PKT0(ring, REG_A3XX_RB_STENCILREFMASK, 2); + OUT_RING(ring, A3XX_RB_STENCILREFMASK_STENCILREF(0) | + A3XX_RB_STENCILREFMASK_STENCILMASK(0) | + A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(0)); + OUT_RING(ring, A3XX_RB_STENCILREFMASK_BF_STENCILREF(0) | + A3XX_RB_STENCILREFMASK_BF_STENCILMASK(0) | + A3XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0)); + + OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1); + OUT_RING(ring, A3XX_RB_STENCIL_CONTROL_FUNC(FUNC_NEVER) | + A3XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) | + A3XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) | + A3XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) | + A3XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) | + A3XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) | + A3XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) | + A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP)); + } + + if (buffers & PIPE_CLEAR_COLOR) { + ce = 0xf; + } else { + ce = 0x0; + } + + for (i = 0; i < 4; i++) { + OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1); + OUT_RING(ring, A3XX_RB_MRT_CONTROL_ROP_CODE(12) | + A3XX_RB_MRT_CONTROL_DITHER_MODE(DITHER_ALWAYS) | + A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(ce)); + + OUT_PKT0(ring, REG_A3XX_RB_MRT_BLEND_CONTROL(i), 1); + OUT_RING(ring, A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(FACTOR_ONE) | + A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(BLEND_DST_PLUS_SRC) | + A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(FACTOR_ZERO) | + A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(FACTOR_ONE) | + A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(BLEND_DST_PLUS_SRC) | + A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(FACTOR_ZERO) | + A3XX_RB_MRT_BLEND_CONTROL_CLAMP_ENABLE); + } + + OUT_PKT0(ring, REG_A3XX_GRAS_SU_MODE_CONTROL, 1); + OUT_RING(ring, A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0)); + + fd3_program_emit(ring, &ctx->solid_prog); + + fd3_emit_vertex_bufs(ring, &ctx->solid_prog, (struct fd3_vertex_buf[]) { + { .prsc = fd3_ctx->solid_vbuf, .stride = 12, .format = PIPE_FORMAT_R32G32B32_FLOAT }, + }, 1); + + fd3_emit_constant(ring, SB_FRAG_SHADER, 0, 0, 4, color->ui, NULL); + + OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1); + OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(0) | + A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(PC_DRAW_TRIANGLES) | + A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(PC_DRAW_TRIANGLES) | + A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST); + OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4); + OUT_RING(ring, 0); /* VFD_INDEX_MIN */ + OUT_RING(ring, 2); /* VFD_INDEX_MAX */ + OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */ + OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */ + OUT_PKT0(ring, REG_A3XX_PC_RESTART_INDEX, 1); + OUT_RING(ring, 0xffffffff); /* PC_RESTART_INDEX */ + + OUT_PKT3(ring, CP_EVENT_WRITE, 1); + OUT_RING(ring, PERFCOUNTER_STOP); + + OUT_PKT3(ring, CP_DRAW_INDX, 3); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, DRAW(DI_PT_RECTLIST, DI_SRC_SEL_AUTO_INDEX, + INDEX_SIZE_IGN, IGNORE_VISIBILITY)); + OUT_RING(ring, 2); /* NumIndices */ + + OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1); + OUT_RING(ring, 0x00000000); +} + +void +fd3_draw_init(struct pipe_context *pctx) +{ + struct fd_context *ctx = fd_context(pctx); + ctx->draw = fd3_draw; + ctx->clear = fd3_clear; +} diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_draw.h b/src/gallium/drivers/freedreno/a3xx/fd3_draw.h new file mode 100644 index 00000000000..09b1243dbb3 --- /dev/null +++ b/src/gallium/drivers/freedreno/a3xx/fd3_draw.h @@ -0,0 +1,38 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2013 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#ifndef FD3_DRAW_H_ +#define FD3_DRAW_H_ + +#include "pipe/p_context.h" + +#include "freedreno_draw.h" + +void fd3_draw_init(struct pipe_context *pctx); + +#endif /* FD3_DRAW_H_ */ diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c new file mode 100644 index 00000000000..1d048b08c83 --- /dev/null +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c @@ -0,0 +1,581 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2013 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#include "pipe/p_state.h" +#include "util/u_string.h" +#include "util/u_memory.h" +#include "util/u_helpers.h" +#include "util/u_format.h" + +#include "freedreno_resource.h" + +#include "fd3_emit.h" +#include "fd3_blend.h" +#include "fd3_context.h" +#include "fd3_program.h" +#include "fd3_rasterizer.h" +#include "fd3_texture.h" +#include "fd3_util.h" +#include "fd3_zsa.h" + +/* regid: base const register + * prsc or dwords: buffer containing constant values + * sizedwords: size of const value buffer + */ +void +fd3_emit_constant(struct fd_ringbuffer *ring, + enum adreno_state_block sb, + uint32_t regid, uint32_t offset, uint32_t sizedwords, + const uint32_t *dwords, struct pipe_resource *prsc) +{ + uint32_t i, sz; + enum adreno_state_src src; + + if (prsc) { + sz = 0; + src = SS_INDIRECT; + } else { + sz = sizedwords; + src = SS_DIRECT; + } + + /* we have this sometimes, not others.. perhaps we could be clever + * and figure out actually when we need to invalidate cache: + */ + OUT_PKT0(ring, REG_A3XX_UCHE_CACHE_INVALIDATE0_REG, 2); + OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR(0)); + OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR(0) | + A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE(INVALIDATE) | + A3XX_UCHE_CACHE_INVALIDATE1_REG_ENTIRE_CACHE); + + OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz); + OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/2) | + CP_LOAD_STATE_0_STATE_SRC(src) | + CP_LOAD_STATE_0_STATE_BLOCK(sb) | + CP_LOAD_STATE_0_NUM_UNIT(sizedwords/2)); + if (prsc) { + struct fd_bo *bo = fd_resource(prsc)->bo; + OUT_RELOC(ring, bo, offset, + CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS)); + } else { + OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) | + CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS)); + dwords = (uint32_t *)&((uint8_t *)dwords)[offset]; + } + for (i = 0; i < sz; i++) { + OUT_RING(ring, dwords[i]); + } +} + +static void +emit_constants(struct fd_ringbuffer *ring, + enum adreno_state_block sb, + struct fd_constbuf_stateobj *constbuf, + struct fd3_shader_stateobj *shader) +{ + uint32_t enabled_mask = constbuf->enabled_mask; + uint32_t base = 0; + unsigned i; + + // XXX TODO only emit dirty consts.. but we need to keep track if + // they are clobbered by a clear, gmem2mem, or mem2gmem.. + constbuf->dirty_mask = enabled_mask; + + /* emit user constants: */ + while (enabled_mask) { + unsigned index = ffs(enabled_mask) - 1; + struct pipe_constant_buffer *cb = &constbuf->cb[index]; + unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */ + + // I expect that size should be a multiple of vec4's: + assert(size == align(size, 4)); + + /* gallium could have const-buffer still bound, even though the + * shader is not using it. Writing consts above constlen (or + * rather, HLSQ_{VS,FS}_CONTROL_REG.CONSTLENGTH) will cause a + * hang. + */ + if ((base / 4) >= shader->constlen) + break; + + if (constbuf->dirty_mask & (1 << index)) { + fd3_emit_constant(ring, sb, base, + cb->buffer_offset, size, + cb->user_buffer, cb->buffer); + constbuf->dirty_mask &= ~(1 << index); + } + + base += size; + enabled_mask &= ~(1 << index); + } + + /* emit shader immediates: */ + if (shader) { + for (i = 0; i < shader->immediates_count; i++) { + fd3_emit_constant(ring, sb, + 4 * (shader->first_immediate + i), + 0, 4, shader->immediates[i].val, NULL); + } + } +} + +#define VERT_TEX_OFF 0 +#define FRAG_TEX_OFF 16 +#define BASETABLE_SZ 14 + +static void +emit_textures(struct fd_ringbuffer *ring, + enum adreno_state_block sb, + struct fd_texture_stateobj *tex) +{ + static const unsigned tex_off[] = { + [SB_VERT_TEX] = VERT_TEX_OFF, + [SB_FRAG_TEX] = FRAG_TEX_OFF, + }; + static const enum adreno_state_block mipaddr[] = { + [SB_VERT_TEX] = SB_VERT_MIPADDR, + [SB_FRAG_TEX] = SB_FRAG_MIPADDR, + }; + unsigned i, j; + + assert(tex->num_samplers == tex->num_textures); // TODO check.. + + if (!tex->num_samplers) + return; + + /* output sampler state: */ + OUT_PKT3(ring, CP_LOAD_STATE, 2 + (2 * tex->num_samplers)); + OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(tex_off[sb]) | + CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | + CP_LOAD_STATE_0_STATE_BLOCK(sb) | + CP_LOAD_STATE_0_NUM_UNIT(tex->num_samplers)); + OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) | + CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); + for (i = 0; i < tex->num_samplers; i++) { + struct fd3_sampler_stateobj *sampler = + fd3_sampler_stateobj(tex->samplers[i]); + OUT_RING(ring, sampler->texsamp0); + OUT_RING(ring, sampler->texsamp1); + } + + /* emit texture state: */ + OUT_PKT3(ring, CP_LOAD_STATE, 2 + (4 * tex->num_textures)); + OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(tex_off[sb]) | + CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | + CP_LOAD_STATE_0_STATE_BLOCK(sb) | + CP_LOAD_STATE_0_NUM_UNIT(tex->num_textures)); + OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) | + CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); + for (i = 0; i < tex->num_textures; i++) { + struct fd3_pipe_sampler_view *view = + fd3_pipe_sampler_view(tex->textures[i]); + OUT_RING(ring, view->texconst0); + OUT_RING(ring, view->texconst1); + OUT_RING(ring, view->texconst2 | + A3XX_TEX_CONST_2_INDX(BASETABLE_SZ * i)); + OUT_RING(ring, view->texconst3); + } + + /* emit mipaddrs: */ + OUT_PKT3(ring, CP_LOAD_STATE, 2 + (BASETABLE_SZ * tex->num_textures)); + OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(BASETABLE_SZ * tex_off[sb]) | + CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | + CP_LOAD_STATE_0_STATE_BLOCK(mipaddr[sb]) | + CP_LOAD_STATE_0_NUM_UNIT(BASETABLE_SZ * tex->num_textures)); + OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) | + CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); + for (i = 0; i < tex->num_textures; i++) { + struct fd3_pipe_sampler_view *view = + fd3_pipe_sampler_view(tex->textures[i]); + OUT_RELOC(ring, view->tex_resource->bo, 0, 0); + /* I think each entry is a ptr to mipmap level.. for now, just + * pad w/ null's until I get around to actually implementing + * mipmap support.. + */ + for (j = 1; j < BASETABLE_SZ; j++) { + OUT_RING(ring, 0x00000000); + } + } +} + +static void +emit_cache_flush(struct fd_ringbuffer *ring) +{ + OUT_PKT3(ring, CP_EVENT_WRITE, 1); + OUT_RING(ring, CACHE_FLUSH); + + OUT_PKT3(ring, CP_DRAW_INDX, 3); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, DRAW(DI_PT_POINTLIST, DI_SRC_SEL_AUTO_INDEX, + INDEX_SIZE_IGN, IGNORE_VISIBILITY)); + OUT_RING(ring, 0); /* NumIndices */ + + OUT_PKT3(ring, CP_NOP, 4); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + + OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1); + OUT_RING(ring, 0x00000000); +} + +/* emit texture state for mem->gmem restore operation.. eventually it would + * be good to get rid of this and use normal CSO/etc state for more of these + * special cases, but for now the compiler is not sufficient.. + */ +void +fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring, struct pipe_surface *psurf) +{ + struct fd_resource *rsc = fd_resource(psurf->texture); + + /* output sampler state: */ + OUT_PKT3(ring, CP_LOAD_STATE, 4); + OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(FRAG_TEX_OFF) | + CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | + CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_TEX) | + CP_LOAD_STATE_0_NUM_UNIT(1)); + OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) | + CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); + OUT_RING(ring, A3XX_TEX_SAMP_0_XY_MAG(A3XX_TEX_NEAREST) | + A3XX_TEX_SAMP_0_XY_MIN(A3XX_TEX_NEAREST) | + A3XX_TEX_SAMP_0_WRAP_S(A3XX_TEX_CLAMP_TO_EDGE) | + A3XX_TEX_SAMP_0_WRAP_T(A3XX_TEX_CLAMP_TO_EDGE) | + A3XX_TEX_SAMP_0_WRAP_R(A3XX_TEX_REPEAT)); + OUT_RING(ring, 0x00000000); + + /* emit texture state: */ + OUT_PKT3(ring, CP_LOAD_STATE, 6); + OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(FRAG_TEX_OFF) | + CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | + CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_TEX) | + CP_LOAD_STATE_0_NUM_UNIT(1)); + OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) | + CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); + OUT_RING(ring, A3XX_TEX_CONST_0_FMT(fd3_pipe2tex(psurf->format)) | + 0x40000000 | // XXX + fd3_tex_swiz(psurf->format, PIPE_SWIZZLE_BLUE, PIPE_SWIZZLE_GREEN, + PIPE_SWIZZLE_RED, PIPE_SWIZZLE_ALPHA)); + OUT_RING(ring, A3XX_TEX_CONST_1_FETCHSIZE(fd3_pipe2fetchsize(psurf->format)) | + A3XX_TEX_CONST_1_WIDTH(psurf->width) | + A3XX_TEX_CONST_1_HEIGHT(psurf->height)); + OUT_RING(ring, A3XX_TEX_CONST_2_PITCH(rsc->pitch * rsc->cpp) | + A3XX_TEX_CONST_2_INDX(0)); + OUT_RING(ring, 0x00000000); + + /* emit mipaddrs: */ + OUT_PKT3(ring, CP_LOAD_STATE, 3); + OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(BASETABLE_SZ * FRAG_TEX_OFF) | + CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | + CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_MIPADDR) | + CP_LOAD_STATE_0_NUM_UNIT(1)); + OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) | + CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); + OUT_RELOC(ring, rsc->bo, 0, 0); +} + +void +fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, + struct fd_program_stateobj *prog, + struct fd3_vertex_buf *vbufs, uint32_t n) +{ + struct fd3_shader_stateobj *vp = prog->vp; + uint32_t i; + + n = MIN2(n, vp->inputs_count); + + for (i = 0; i < n; i++) { + struct pipe_resource *prsc = vbufs[i].prsc; + struct fd_resource *rsc = fd_resource(prsc); + enum a3xx_vtx_fmt fmt = fd3_pipe2vtx(vbufs[i].format); + bool switchnext = (i != (n - 1)); + uint32_t fs = util_format_get_blocksize(vbufs[i].format); + + OUT_PKT0(ring, REG_A3XX_VFD_FETCH(i), 2); + OUT_RING(ring, A3XX_VFD_FETCH_INSTR_0_FETCHSIZE(fs - 1) | + A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE(vbufs[i].stride) | + COND(switchnext, A3XX_VFD_FETCH_INSTR_0_SWITCHNEXT) | + A3XX_VFD_FETCH_INSTR_0_INDEXCODE(i) | + A3XX_VFD_FETCH_INSTR_0_STEPRATE(1)); + OUT_RELOC(ring, rsc->bo, vbufs[i].offset, 0); + + OUT_PKT0(ring, REG_A3XX_VFD_DECODE_INSTR(i), 1); + OUT_RING(ring, A3XX_VFD_DECODE_INSTR_CONSTFILL | + A3XX_VFD_DECODE_INSTR_WRITEMASK(vp->inputs[i].compmask) | + A3XX_VFD_DECODE_INSTR_FORMAT(fmt) | + A3XX_VFD_DECODE_INSTR_REGID(vp->inputs[i].regid) | + A3XX_VFD_DECODE_INSTR_SHIFTCNT(fs) | + A3XX_VFD_DECODE_INSTR_LASTCOMPVALID | + COND(switchnext, A3XX_VFD_DECODE_INSTR_SWITCHNEXT)); + } +} + +void +fd3_emit_state(struct fd_context *ctx, uint32_t dirty) +{ + struct fd_ringbuffer *ring = ctx->ring; + + if (dirty & FD_DIRTY_SAMPLE_MASK) { + OUT_PKT0(ring, REG_A3XX_RB_MSAA_CONTROL, 1); + OUT_RING(ring, A3XX_RB_MSAA_CONTROL_DISABLE | + A3XX_RB_MSAA_CONTROL_SAMPLES(MSAA_ONE) | + A3XX_RB_MSAA_CONTROL_SAMPLE_MASK(ctx->sample_mask)); + } + + if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_STENCIL_REF)) { + struct fd3_zsa_stateobj *zsa = fd3_zsa_stateobj(ctx->zsa); + struct pipe_stencil_ref *sr = &ctx->stencil_ref; + + fd3_emit_rbrc_draw_state(ring, zsa->rb_render_control); + + OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1); + OUT_RING(ring, zsa->rb_depth_control); + + OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1); + OUT_RING(ring, zsa->rb_stencil_control); + + OUT_PKT0(ring, REG_A3XX_RB_STENCILREFMASK, 2); + OUT_RING(ring, zsa->rb_stencilrefmask | + A3XX_RB_STENCILREFMASK_STENCILREF(sr->ref_value[0])); + OUT_RING(ring, zsa->rb_stencilrefmask_bf | + A3XX_RB_STENCILREFMASK_BF_STENCILREF(sr->ref_value[1])); + } + + if (dirty & FD_DIRTY_RASTERIZER) { + struct fd3_rasterizer_stateobj *rasterizer = + fd3_rasterizer_stateobj(ctx->rasterizer); + + OUT_PKT0(ring, REG_A3XX_GRAS_SU_MODE_CONTROL, 1); + OUT_RING(ring, rasterizer->gras_su_mode_control); + + OUT_PKT0(ring, REG_A3XX_GRAS_SU_POINT_MINMAX, 2); + OUT_RING(ring, rasterizer->gras_su_point_minmax); + OUT_RING(ring, rasterizer->gras_su_point_size); + + OUT_PKT0(ring, REG_A3XX_GRAS_SU_POLY_OFFSET_SCALE, 2); + OUT_RING(ring, rasterizer->gras_su_poly_offset_scale); + OUT_RING(ring, rasterizer->gras_su_poly_offset_offset); + + OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1); + OUT_RING(ring, rasterizer->gras_cl_clip_cntl); + } + + if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) { + struct fd3_rasterizer_stateobj *rasterizer = + fd3_rasterizer_stateobj(ctx->rasterizer); + struct fd3_shader_stateobj *fp = ctx->prog.fp; + uint32_t stride_in_vpc; + + stride_in_vpc = align(fp->total_in, 4) / 4; + if (stride_in_vpc > 0) + stride_in_vpc = MAX2(stride_in_vpc, 2); + + OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1); + OUT_RING(ring, rasterizer->pc_prim_vtx_cntl | + A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(stride_in_vpc)); + } + + if (dirty & FD_DIRTY_SCISSOR) { + OUT_PKT0(ring, REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL, 2); + OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(ctx->scissor.minx) | + A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(ctx->scissor.miny)); + OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(ctx->scissor.maxx - 1) | + A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(ctx->scissor.maxy - 1)); + + ctx->max_scissor.minx = MIN2(ctx->max_scissor.minx, ctx->scissor.minx); + ctx->max_scissor.miny = MIN2(ctx->max_scissor.miny, ctx->scissor.miny); + ctx->max_scissor.maxx = MAX2(ctx->max_scissor.maxx, ctx->scissor.maxx); + ctx->max_scissor.maxy = MAX2(ctx->max_scissor.maxy, ctx->scissor.maxy); + } + + if (dirty & FD_DIRTY_VIEWPORT) { + OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6); + OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET(ctx->viewport.translate[0] - 0.5)); + OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE(ctx->viewport.scale[0])); + OUT_RING(ring, A3XX_GRAS_CL_VPORT_YOFFSET(ctx->viewport.translate[1] - 0.5)); + OUT_RING(ring, A3XX_GRAS_CL_VPORT_YSCALE(ctx->viewport.scale[1])); + OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(ctx->viewport.translate[2])); + OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(ctx->viewport.scale[2])); + } + + if (dirty & FD_DIRTY_PROG) + fd3_program_emit(ring, &ctx->prog); + + if (dirty & (FD_DIRTY_PROG | FD_DIRTY_CONSTBUF)) { + struct fd_program_stateobj *prog = &ctx->prog; + + emit_constants(ring, SB_VERT_SHADER, + &ctx->constbuf[PIPE_SHADER_VERTEX], + (prog->dirty & FD_SHADER_DIRTY_VP) ? prog->vp : NULL); + emit_constants(ring, SB_FRAG_SHADER, + &ctx->constbuf[PIPE_SHADER_FRAGMENT], + (prog->dirty & FD_SHADER_DIRTY_FP) ? prog->fp : NULL); + } + + if (dirty & FD_DIRTY_BLEND) { + struct fd3_blend_stateobj *blend = fd3_blend_stateobj(ctx->blend); + uint32_t i; + + for (i = 0; i < ARRAY_SIZE(blend->rb_mrt); i++) { + OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1); + OUT_RING(ring, blend->rb_mrt[i].control); + + OUT_PKT0(ring, REG_A3XX_RB_MRT_BLEND_CONTROL(i), 1); + OUT_RING(ring, blend->rb_mrt[i].blend_control); + } + } + + if (dirty & FD_DIRTY_VERTTEX) + emit_textures(ring, SB_VERT_TEX, &ctx->verttex); + + if (dirty & FD_DIRTY_FRAGTEX) + emit_textures(ring, SB_FRAG_TEX, &ctx->fragtex); + + ctx->dirty &= ~dirty; +} + +/* emit setup at begin of new cmdstream buffer (don't rely on previous + * state, there could have been a context switch between ioctls): + */ +void +fd3_emit_restore(struct fd_context *ctx) +{ + struct fd3_context *fd3_ctx = fd3_context(ctx); + struct fd_ringbuffer *ring = ctx->ring; + int i; + + OUT_PKT3(ring, CP_REG_RMW, 3); + OUT_RING(ring, REG_A3XX_RBBM_CLOCK_CTL); + OUT_RING(ring, 0xfffcffff); + OUT_RING(ring, 0x00000000); + + OUT_PKT3(ring, CP_INVALIDATE_STATE, 1); + OUT_RING(ring, 0x00007fff); + + OUT_PKT0(ring, REG_A3XX_SP_VS_PVT_MEM_CTRL_REG, 3); + OUT_RING(ring, 0x08000001); /* SP_VS_PVT_MEM_CTRL_REG */ + OUT_RELOC(ring, fd3_ctx->vs_pvt_mem, 0, 0); /* SP_VS_PVT_MEM_ADDR_REG */ + OUT_RING(ring, 0x00000000); /* SP_VS_PVT_MEM_SIZE_REG */ + + OUT_PKT0(ring, REG_A3XX_SP_FS_PVT_MEM_CTRL_REG, 3); + OUT_RING(ring, 0x08000001); /* SP_FS_PVT_MEM_CTRL_REG */ + OUT_RELOC(ring, fd3_ctx->fs_pvt_mem, 0, 0); /* SP_FS_PVT_MEM_ADDR_REG */ + OUT_RING(ring, 0x00000000); /* SP_FS_PVT_MEM_SIZE_REG */ + + OUT_PKT0(ring, REG_A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, 1); + OUT_RING(ring, 0x0000000b); /* PC_VERTEX_REUSE_BLOCK_CNTL */ + + OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1); + OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | + A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) | + A3XX_GRAS_SC_CONTROL_RASTER_MODE(0)); + + OUT_PKT0(ring, REG_A3XX_RB_MSAA_CONTROL, 2); + OUT_RING(ring, A3XX_RB_MSAA_CONTROL_DISABLE | + A3XX_RB_MSAA_CONTROL_SAMPLES(MSAA_ONE) | + A3XX_RB_MSAA_CONTROL_SAMPLE_MASK(0xffff)); + OUT_RING(ring, 0x00000000); /* UNKNOWN_20C3 */ + + OUT_PKT0(ring, REG_A3XX_GRAS_CL_GB_CLIP_ADJ, 1); + OUT_RING(ring, A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ(0) | + A3XX_GRAS_CL_GB_CLIP_ADJ_VERT(0)); + + OUT_PKT0(ring, REG_A3XX_UNKNOWN_0C81, 1); + OUT_RING(ring, 0x00000001); /* UNKNOWN_0C81 */ + + OUT_PKT0(ring, REG_A3XX_TPL1_TP_VS_TEX_OFFSET, 1); + OUT_RING(ring, A3XX_TPL1_TP_VS_TEX_OFFSET_SAMPLEROFFSET(VERT_TEX_OFF) | + A3XX_TPL1_TP_VS_TEX_OFFSET_MEMOBJOFFSET(VERT_TEX_OFF) | + A3XX_TPL1_TP_VS_TEX_OFFSET_BASETABLEPTR(BASETABLE_SZ * VERT_TEX_OFF)); + + OUT_PKT0(ring, REG_A3XX_TPL1_TP_FS_TEX_OFFSET, 1); + OUT_RING(ring, A3XX_TPL1_TP_FS_TEX_OFFSET_SAMPLEROFFSET(FRAG_TEX_OFF) | + A3XX_TPL1_TP_FS_TEX_OFFSET_MEMOBJOFFSET(FRAG_TEX_OFF) | + A3XX_TPL1_TP_FS_TEX_OFFSET_BASETABLEPTR(BASETABLE_SZ * FRAG_TEX_OFF)); + + OUT_PKT0(ring, REG_A3XX_VPC_VARY_CYLWRAP_ENABLE_0, 2); + OUT_RING(ring, 0x00000000); /* VPC_VARY_CYLWRAP_ENABLE_0 */ + OUT_RING(ring, 0x00000000); /* VPC_VARY_CYLWRAP_ENABLE_1 */ + + OUT_PKT0(ring, REG_A3XX_UNKNOWN_0E43, 1); + OUT_RING(ring, 0x00000001); /* UNKNOWN_0E43 */ + + OUT_PKT0(ring, REG_A3XX_UNKNOWN_0F03, 1); + OUT_RING(ring, 0x00000001); /* UNKNOWN_0F03 */ + + OUT_PKT0(ring, REG_A3XX_UNKNOWN_0EE0, 1); + OUT_RING(ring, 0x00000003); /* UNKNOWN_0EE0 */ + + OUT_PKT0(ring, REG_A3XX_UNKNOWN_0C3D, 1); + OUT_RING(ring, 0x00000001); /* UNKNOWN_0C3D */ + + OUT_PKT0(ring, REG_A3XX_UNKNOWN_0E00, 1); + OUT_RING(ring, 0x00000000); /* UNKNOWN_0E00 */ + + OUT_PKT0(ring, REG_A3XX_HLSQ_CONST_VSPRESV_RANGE_REG, 2); + OUT_RING(ring, A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY(0) | + A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY(0)); + OUT_RING(ring, A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY(0) | + A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY(0)); + + OUT_PKT0(ring, REG_A3XX_UCHE_CACHE_MODE_CONTROL_REG, 1); + OUT_RING(ring, 0x00000001); /* UCHE_CACHE_MODE_CONTROL_REG */ + + OUT_PKT0(ring, REG_A3XX_VSC_SIZE_ADDRESS, 1); + OUT_RELOC(ring, fd3_ctx->vsc_size_mem, 0, 0); /* VSC_SIZE_ADDRESS */ + + OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1); + OUT_RING(ring, 0x00000000); /* GRAS_CL_CLIP_CNTL */ + + OUT_PKT0(ring, REG_A3XX_GRAS_SU_POINT_MINMAX, 2); + OUT_RING(ring, 0xffc00010); /* GRAS_SU_POINT_MINMAX */ + OUT_RING(ring, 0x00000008); /* GRAS_SU_POINT_SIZE */ + + OUT_PKT0(ring, REG_A3XX_PC_RESTART_INDEX, 1); + OUT_RING(ring, 0xffffffff); /* PC_RESTART_INDEX */ + + OUT_PKT0(ring, REG_A3XX_PA_SC_WINDOW_OFFSET, 1); + OUT_RING(ring, A3XX_PA_SC_WINDOW_OFFSET_X(0) | + A3XX_PA_SC_WINDOW_OFFSET_Y(0)); + + OUT_PKT0(ring, REG_A3XX_RB_BLEND_RED, 4); + OUT_RING(ring, 0x00000000); /* RB_BLEND_RED */ + OUT_RING(ring, 0x00000000); /* RB_BLEND_GREEN */ + OUT_RING(ring, 0x00000000); /* RB_BLEND_BLUE */ + OUT_RING(ring, 0x3c0000ff); /* RB_BLEND_ALPHA */ + + for (i = 0; i < 6; i++) { + OUT_PKT0(ring, REG_A3XX_GRAS_CL_USER_PLANE(i), 4); + OUT_RING(ring, 0x00000000); /* GRAS_CL_USER_PLANE[i].X */ + OUT_RING(ring, 0x00000000); /* GRAS_CL_USER_PLANE[i].Y */ + OUT_RING(ring, 0x00000000); /* GRAS_CL_USER_PLANE[i].Z */ + OUT_RING(ring, 0x00000000); /* GRAS_CL_USER_PLANE[i].W */ + } + + emit_cache_flush(ring); +} diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.h b/src/gallium/drivers/freedreno/a3xx/fd3_emit.h new file mode 100644 index 00000000000..668e5ddd095 --- /dev/null +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.h @@ -0,0 +1,89 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2013 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#ifndef FD3_EMIT_H +#define FD3_EMIT_H + +#include "pipe/p_context.h" + +#include "freedreno_context.h" +#include "fd3_util.h" + + +struct fd_ringbuffer; +enum adreno_state_block; + +void fd3_emit_constant(struct fd_ringbuffer *ring, + enum adreno_state_block sb, + uint32_t regid, uint32_t offset, uint32_t sizedwords, + const uint32_t *dwords, struct pipe_resource *prsc); + +void fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring, + struct pipe_surface *psurf); + +/* NOTE: this just exists because we don't have proper vertex/vertexbuf + * state objs for clear, and mem2gmem/gmem2mem operations.. + */ +struct fd3_vertex_buf { + unsigned offset, stride; + struct pipe_resource *prsc; + enum pipe_format format; +}; + +void fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, + struct fd_program_stateobj *prog, + struct fd3_vertex_buf *vbufs, uint32_t n); +void fd3_emit_state(struct fd_context *ctx, uint32_t dirty); +void fd3_emit_restore(struct fd_context *ctx); + + +/* use RMW (read-modify-write) to update RB_RENDER_CONTROL since the + * GMEM/binning code is deciding on the bin-width (and whether to + * use binning) after the draw/clear state is emitted. + */ +static inline void +fd3_emit_rbrc_draw_state(struct fd_ringbuffer *ring, uint32_t val) +{ + OUT_PKT3(ring, CP_REG_RMW, 3); + OUT_RING(ring, REG_A3XX_RB_RENDER_CONTROL); + OUT_RING(ring, A3XX_RB_RENDER_CONTROL_BIN_WIDTH__MASK | + A3XX_RB_RENDER_CONTROL_ENABLE_GMEM); + OUT_RING(ring, val); +} + +static inline void +fd3_emit_rbrc_tile_state(struct fd_ringbuffer *ring, uint32_t val) +{ + OUT_PKT3(ring, CP_REG_RMW, 3); + OUT_RING(ring, REG_A3XX_RB_RENDER_CONTROL); + OUT_RING(ring, ~(A3XX_RB_RENDER_CONTROL_BIN_WIDTH__MASK | + A3XX_RB_RENDER_CONTROL_ENABLE_GMEM)); + OUT_RING(ring, val); +} + +#endif /* FD3_EMIT_H */ diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c new file mode 100644 index 00000000000..16ec95972a0 --- /dev/null +++ b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c @@ -0,0 +1,486 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2013 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#include "pipe/p_state.h" +#include "util/u_string.h" +#include "util/u_memory.h" +#include "util/u_inlines.h" +#include "util/u_format.h" + +#include "freedreno_state.h" +#include "freedreno_resource.h" + +#include "fd3_gmem.h" +#include "fd3_context.h" +#include "fd3_emit.h" +#include "fd3_program.h" +#include "fd3_util.h" +#include "fd3_zsa.h" + + +static void +emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs, + struct pipe_surface **bufs, uint32_t *bases, uint32_t bin_w) +{ + unsigned i; + + for (i = 0; i < 4; i++) { + enum a3xx_color_fmt format = 0; + enum a3xx_color_swap swap = WZYX; + struct fd_resource *res = NULL; + uint32_t stride = 0; + uint32_t base = 0; + + if (i < nr_bufs) { + struct pipe_surface *psurf = bufs[i]; + struct fd_resource *res = fd_resource(psurf->texture); + + format = fd3_pipe2color(psurf->format); + swap = fd3_pipe2swap(psurf->format); + stride = bin_w * res->cpp; + + if (bases) { + base = bases[i] * res->cpp; + } + } + + OUT_PKT0(ring, REG_A3XX_RB_MRT_BUF_INFO(i), 2); + OUT_RING(ring, A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) | + A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(stride) | + A3XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(TILE_32X32) | + A3XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap)); + OUT_RING(ring, A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE(base)); + + OUT_PKT0(ring, REG_A3XX_SP_FS_IMAGE_OUTPUT_REG(i), 1); + OUT_RING(ring, A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT(format)); + } +} + +static uint32_t +depth_base(struct fd_gmem_stateobj *gmem) +{ + return align(gmem->bin_w * gmem->bin_h, 0x4000); +} + +/* transfer from gmem to system memory (ie. normal RAM) */ + +static void +emit_gmem2mem_surf(struct fd_ringbuffer *ring, + enum adreno_rb_copy_control_mode mode, + uint32_t base, struct pipe_surface *psurf) +{ + struct fd_resource *rsc = fd_resource(psurf->texture); + + OUT_PKT0(ring, REG_A3XX_RB_COPY_CONTROL, 4); + OUT_RING(ring, A3XX_RB_COPY_CONTROL_MSAA_RESOLVE(MSAA_ONE) | + A3XX_RB_COPY_CONTROL_MODE(mode) | + A3XX_RB_COPY_CONTROL_GMEM_BASE(base)); + OUT_RELOCS(ring, rsc->bo, 0, 0, -1); /* RB_COPY_DEST_BASE */ + OUT_RING(ring, A3XX_RB_COPY_DEST_PITCH_PITCH(rsc->pitch * rsc->cpp)); + OUT_RING(ring, A3XX_RB_COPY_DEST_INFO_TILE(LINEAR) | + A3XX_RB_COPY_DEST_INFO_FORMAT(fd3_pipe2color(psurf->format)) | + A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(0xf) | + A3XX_RB_COPY_DEST_INFO_ENDIAN(ENDIAN_NONE) | + A3XX_RB_COPY_DEST_INFO_SWAP(fd3_pipe2swap(psurf->format))); + + OUT_PKT3(ring, CP_DRAW_INDX, 3); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, DRAW(DI_PT_RECTLIST, DI_SRC_SEL_AUTO_INDEX, + INDEX_SIZE_IGN, IGNORE_VISIBILITY)); + OUT_RING(ring, 2); /* NumIndices */ +} + +static void +fd3_emit_tile_gmem2mem(struct fd_context *ctx, uint32_t xoff, uint32_t yoff, + uint32_t bin_w, uint32_t bin_h) +{ + struct fd3_context *fd3_ctx = fd3_context(ctx); + struct fd_ringbuffer *ring = ctx->ring; + struct pipe_framebuffer_state *pfb = &ctx->framebuffer; + + OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1); + OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER)); + + OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1); + OUT_RING(ring, A3XX_RB_STENCIL_CONTROL_FUNC(FUNC_NEVER) | + A3XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) | + A3XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) | + A3XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) | + A3XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) | + A3XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) | + A3XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) | + A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP)); + + OUT_PKT0(ring, REG_A3XX_RB_STENCILREFMASK, 2); + OUT_RING(ring, 0xff000000 | + A3XX_RB_STENCILREFMASK_STENCILREF(0) | + A3XX_RB_STENCILREFMASK_STENCILMASK(0) | + A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff)); + OUT_RING(ring, 0xff000000 | + A3XX_RB_STENCILREFMASK_STENCILREF(0) | + A3XX_RB_STENCILREFMASK_STENCILMASK(0) | + A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff)); + + OUT_PKT0(ring, REG_A3XX_GRAS_SU_MODE_CONTROL, 1); + OUT_RING(ring, A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0)); + + OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1); + OUT_RING(ring, 0x00000000); /* GRAS_CL_CLIP_CNTL */ + + OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1); + OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) | + A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE); + + fd3_emit_rbrc_draw_state(ring, + A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE | + A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER)); + + OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1); + OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) | + A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) | + A3XX_GRAS_SC_CONTROL_RASTER_MODE(1)); + + OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1); + OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(0) | + A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(PC_DRAW_TRIANGLES) | + A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(PC_DRAW_TRIANGLES) | + A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST); + + OUT_PKT0(ring, REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL, 2); + OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) | + A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0)); + OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(pfb->width - 1) | + A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(pfb->height - 1)); + + OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4); + OUT_RING(ring, 0); /* VFD_INDEX_MIN */ + OUT_RING(ring, 2); /* VFD_INDEX_MAX */ + OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */ + OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */ + + fd3_program_emit(ring, &ctx->solid_prog); + + fd3_emit_vertex_bufs(ring, &ctx->solid_prog, (struct fd3_vertex_buf[]) { + { .prsc = fd3_ctx->solid_vbuf, .stride = 12, .format = PIPE_FORMAT_R32G32B32_FLOAT }, + }, 1); + + if (ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) { + uint32_t base = depth_base(&ctx->gmem) * + fd_resource(pfb->cbufs[0]->texture)->cpp; + emit_gmem2mem_surf(ring, RB_COPY_DEPTH_STENCIL, base, pfb->zsbuf); + } + + if (ctx->resolve & FD_BUFFER_COLOR) { + emit_gmem2mem_surf(ring, RB_COPY_RESOLVE, 0, pfb->cbufs[0]); + } + + OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1); + OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | + A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE); + + OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1); + OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | + A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) | + A3XX_GRAS_SC_CONTROL_RASTER_MODE(0)); +} + +/* transfer from system memory to gmem */ + +static void +emit_mem2gmem_surf(struct fd_ringbuffer *ring, uint32_t base, + struct pipe_surface *psurf, uint32_t bin_w) +{ + emit_mrt(ring, 1, &psurf, &base, bin_w); + + fd3_emit_gmem_restore_tex(ring, psurf); + + OUT_PKT3(ring, CP_DRAW_INDX, 3); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, DRAW(DI_PT_RECTLIST, DI_SRC_SEL_AUTO_INDEX, + INDEX_SIZE_IGN, IGNORE_VISIBILITY)); + OUT_RING(ring, 2); /* NumIndices */ +} + +static void +fd3_emit_tile_mem2gmem(struct fd_context *ctx, uint32_t xoff, uint32_t yoff, + uint32_t bin_w, uint32_t bin_h) +{ + struct fd3_context *fd3_ctx = fd3_context(ctx); + struct fd_gmem_stateobj *gmem = &ctx->gmem; + struct fd_ringbuffer *ring = ctx->ring; + struct pipe_framebuffer_state *pfb = &ctx->framebuffer; + float x0, y0, x1, y1; + unsigned i; + + /* write texture coordinates to vertexbuf: */ + x0 = ((float)xoff) / ((float)pfb->width); + x1 = ((float)xoff + bin_w) / ((float)pfb->width); + y0 = ((float)yoff) / ((float)pfb->height); + y1 = ((float)yoff + bin_h) / ((float)pfb->height); + + OUT_PKT3(ring, CP_MEM_WRITE, 5); + OUT_RELOC(ring, fd_resource(fd3_ctx->blit_texcoord_vbuf)->bo, 0, 0); + OUT_RING(ring, fui(x0)); + OUT_RING(ring, fui(y0)); + OUT_RING(ring, fui(x1)); + OUT_RING(ring, fui(y1)); + + for (i = 0; i < 4; i++) { + OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1); + OUT_RING(ring, A3XX_RB_MRT_CONTROL_ROP_CODE(12) | + A3XX_RB_MRT_CONTROL_DITHER_MODE(DITHER_DISABLE) | + A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0xf)); + + OUT_PKT0(ring, REG_A3XX_RB_MRT_BLEND_CONTROL(i), 1); + OUT_RING(ring, A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(FACTOR_ONE) | + A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(BLEND_DST_PLUS_SRC) | + A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(FACTOR_ZERO) | + A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(FACTOR_ONE) | + A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(BLEND_DST_PLUS_SRC) | + A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(FACTOR_ZERO) | + A3XX_RB_MRT_BLEND_CONTROL_CLAMP_ENABLE); + } + + fd3_emit_rbrc_tile_state(ring, + A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w)); + + OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1); + OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_LESS)); + + OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1); + OUT_RING(ring, A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER); /* GRAS_CL_CLIP_CNTL */ + + OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6); + OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET((float)bin_w/2.0 - 0.5)); + OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE((float)bin_w/2.0)); + OUT_RING(ring, A3XX_GRAS_CL_VPORT_YOFFSET((float)bin_h/2.0 - 0.5)); + OUT_RING(ring, A3XX_GRAS_CL_VPORT_YSCALE(-(float)bin_h/2.0)); + OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(0.0)); + OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(1.0)); + + OUT_PKT0(ring, REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL, 2); + OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) | + A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0)); + OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(bin_w - 1) | + A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(bin_h - 1)); + + OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2); + OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) | + A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0)); + OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(bin_w - 1) | + A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(bin_h - 1)); + + OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1); + OUT_RING(ring, 0x2 | + A3XX_RB_STENCIL_CONTROL_FUNC(FUNC_ALWAYS) | + A3XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) | + A3XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) | + A3XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) | + A3XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_ALWAYS) | + A3XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) | + A3XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) | + A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP)); + + fd3_emit_rbrc_draw_state(ring, + A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_ALWAYS)); + + OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1); + OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | + A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) | + A3XX_GRAS_SC_CONTROL_RASTER_MODE(1)); + + OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1); + OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(2) | + A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(PC_DRAW_TRIANGLES) | + A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(PC_DRAW_TRIANGLES) | + A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST); + + OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4); + OUT_RING(ring, 0); /* VFD_INDEX_MIN */ + OUT_RING(ring, 2); /* VFD_INDEX_MAX */ + OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */ + OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */ + + fd3_program_emit(ring, &ctx->blit_prog); + + fd3_emit_vertex_bufs(ring, &ctx->blit_prog, (struct fd3_vertex_buf[]) { + { .prsc = fd3_ctx->blit_texcoord_vbuf, .stride = 8, .format = PIPE_FORMAT_R32G32_FLOAT }, + { .prsc = fd3_ctx->solid_vbuf, .stride = 12, .format = PIPE_FORMAT_R32G32B32_FLOAT }, + }, 2); + + /* for gmem pitch/base calculations, we need to use the non- + * truncated tile sizes: + */ + bin_w = gmem->bin_w; + bin_h = gmem->bin_h; + + if (ctx->restore & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) + emit_mem2gmem_surf(ring, depth_base(gmem), pfb->zsbuf, bin_w); + + if (ctx->restore & FD_BUFFER_COLOR) + emit_mem2gmem_surf(ring, 0, pfb->cbufs[0], bin_w); + + OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1); + OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | + A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) | + A3XX_GRAS_SC_CONTROL_RASTER_MODE(0)); +} + +static void +update_vsc_pipe(struct fd_context *ctx) +{ + struct fd_ringbuffer *ring = ctx->ring; + struct fd_gmem_stateobj *gmem = &ctx->gmem; + struct fd_bo *bo = fd3_context(ctx)->vsc_pipe_mem; + int i; + + /* since we aren't using binning, just try to assign all bins + * to same pipe for now: + */ + OUT_PKT0(ring, REG_A3XX_VSC_PIPE(0), 3); + OUT_RING(ring, A3XX_VSC_PIPE_CONFIG_X(0) | + A3XX_VSC_PIPE_CONFIG_Y(0) | + A3XX_VSC_PIPE_CONFIG_W(gmem->nbins_x) | + A3XX_VSC_PIPE_CONFIG_H(gmem->nbins_y)); + OUT_RELOC(ring, bo, 0, 0); /* VSC_PIPE[0].DATA_ADDRESS */ + OUT_RING(ring, fd_bo_size(bo) - 32); /* VSC_PIPE[0].DATA_LENGTH */ + + for (i = 1; i < 8; i++) { + OUT_PKT0(ring, REG_A3XX_VSC_PIPE(i), 3); + OUT_RING(ring, A3XX_VSC_PIPE_CONFIG_X(0) | + A3XX_VSC_PIPE_CONFIG_Y(0) | + A3XX_VSC_PIPE_CONFIG_W(0) | + A3XX_VSC_PIPE_CONFIG_H(0)); + OUT_RING(ring, 0x00000000); /* VSC_PIPE[i].DATA_ADDRESS */ + OUT_RING(ring, 0x00000000); /* VSC_PIPE[i].DATA_LENGTH */ + } +} + +/* before first tile */ +static void +fd3_emit_tile_init(struct fd_context *ctx) +{ + struct fd_ringbuffer *ring = ctx->ring; + struct fd_gmem_stateobj *gmem = &ctx->gmem; + + fd3_emit_restore(ctx); + + /* note: use gmem->bin_w/h, the bin_w/h parameters may be truncated + * at the right and bottom edge tiles + */ + OUT_PKT0(ring, REG_A3XX_VSC_BIN_SIZE, 1); + OUT_RING(ring, A3XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) | + A3XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h)); + + /* TODO we only need to do this if gmem stateobj changes.. or in + * particular if the # of bins changes.. + */ + update_vsc_pipe(ctx); +} + +/* before mem2gmem */ +static void +fd3_emit_tile_prep(struct fd_context *ctx, uint32_t xoff, uint32_t yoff, + uint32_t bin_w, uint32_t bin_h) +{ + struct fd_ringbuffer *ring = ctx->ring; + struct pipe_framebuffer_state *pfb = &ctx->framebuffer; + struct fd_gmem_stateobj *gmem = &ctx->gmem; + uint32_t reg; + + + OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2); + reg = A3XX_RB_DEPTH_INFO_DEPTH_BASE(depth_base(gmem)); + if (pfb->zsbuf) { + reg |= A3XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format)); + } + OUT_RING(ring, reg); + if (pfb->zsbuf) { + uint32_t cpp = util_format_get_blocksize(pfb->zsbuf->format); + OUT_RING(ring, A3XX_RB_DEPTH_PITCH(cpp * gmem->bin_w)); + } else { + OUT_RING(ring, 0x00000000); + } + + OUT_PKT0(ring, REG_A3XX_RB_WINDOW_SIZE, 1); + OUT_RING(ring, A3XX_RB_WINDOW_SIZE_WIDTH(pfb->width) | + A3XX_RB_WINDOW_SIZE_HEIGHT(pfb->height)); + + OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1); + OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | + A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE); +} + +/* before IB to rendering cmds: */ +static void +fd3_emit_tile_renderprep(struct fd_context *ctx, uint32_t xoff, uint32_t yoff, + uint32_t bin_w, uint32_t bin_h) +{ + struct fd_ringbuffer *ring = ctx->ring; + struct fd_gmem_stateobj *gmem = &ctx->gmem; + struct pipe_framebuffer_state *pfb = &ctx->framebuffer; + + uint32_t x1 = xoff; + uint32_t y1 = yoff; + uint32_t x2 = xoff + bin_w - 1; + uint32_t y2 = yoff + bin_h - 1; + + OUT_PKT3(ring, CP_SET_BIN, 3); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, CP_SET_BIN_1_X1(x1) | CP_SET_BIN_1_Y1(y1)); + OUT_RING(ring, CP_SET_BIN_2_X2(x2) | CP_SET_BIN_2_Y2(y2)); + + emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, gmem->bin_w); + + fd3_emit_rbrc_tile_state(ring, + A3XX_RB_RENDER_CONTROL_ENABLE_GMEM | + A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w)); + + /* setup scissor/offset for current tile: */ + OUT_PKT0(ring, REG_A3XX_PA_SC_WINDOW_OFFSET, 1); + OUT_RING(ring, A3XX_PA_SC_WINDOW_OFFSET_X(xoff) | + A3XX_PA_SC_WINDOW_OFFSET_Y(yoff)); + + OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2); + OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(x1) | + A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(y1)); + OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(x2) | + A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(y2)); +} + +void +fd3_gmem_init(struct pipe_context *pctx) +{ + struct fd_context *ctx = fd_context(pctx); + + ctx->emit_tile_init = fd3_emit_tile_init; + ctx->emit_tile_prep = fd3_emit_tile_prep; + ctx->emit_tile_mem2gmem = fd3_emit_tile_mem2gmem; + ctx->emit_tile_renderprep = fd3_emit_tile_renderprep; + ctx->emit_tile_gmem2mem = fd3_emit_tile_gmem2mem; +} diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.h b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.h new file mode 100644 index 00000000000..91b02866c84 --- /dev/null +++ b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.h @@ -0,0 +1,36 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2013 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#ifndef FD3_GMEM_H_ +#define FD3_GMEM_H_ + +#include "pipe/p_context.h" + +void fd3_gmem_init(struct pipe_context *pctx); + +#endif /* FD3_GMEM_H_ */ diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c new file mode 100644 index 00000000000..b5a027e6503 --- /dev/null +++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c @@ -0,0 +1,642 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2013 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#include "pipe/p_state.h" +#include "util/u_string.h" +#include "util/u_memory.h" +#include "util/u_inlines.h" +#include "util/u_format.h" +#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_parse.h" + +#include "fd3_program.h" +#include "fd3_compiler.h" +#include "fd3_texture.h" +#include "fd3_util.h" + +static void +delete_shader(struct fd3_shader_stateobj *so) +{ + ir3_shader_destroy(so->ir); + fd_bo_del(so->bo); + free(so); +} + +static void +assemble_shader(struct pipe_context *pctx, struct fd3_shader_stateobj *so) +{ + struct fd_context *ctx = fd_context(pctx); + uint32_t sz, *bin; + + bin = ir3_shader_assemble(so->ir, &so->info); + sz = so->info.sizedwords * 4; + + so->bo = fd_bo_new(ctx->screen->dev, sz, + DRM_FREEDRENO_GEM_CACHE_WCOMBINE | + DRM_FREEDRENO_GEM_TYPE_KMEM); + + memcpy(fd_bo_map(so->bo), bin, sz); + + free(bin); + + so->instrlen = so->info.sizedwords / 8; + so->constlen = so->info.max_const + 1; +} + +/* for vertex shader, the inputs are loaded into registers before the shader + * is executed, so max_regs from the shader instructions might not properly + * reflect the # of registers actually used: + */ +static void +fixup_vp_regfootprint(struct fd3_shader_stateobj *so) +{ + unsigned i; + for (i = 0; i < so->inputs_count; i++) { + so->info.max_reg = MAX2(so->info.max_reg, so->inputs[i].regid >> 2); + } +} + +static struct fd3_shader_stateobj * +create_shader(struct pipe_context *pctx, const struct pipe_shader_state *cso, + enum shader_t type) +{ + struct fd3_shader_stateobj *so = CALLOC_STRUCT(fd3_shader_stateobj); + int ret; + + if (!so) + return NULL; + + so->type = type; + + if (fd_mesa_debug & FD_DBG_DISASM) { + DBG("dump tgsi: type=%d", so->type); + tgsi_dump(cso->tokens, 0); + } + + if (type == SHADER_FRAGMENT) { + /* we seem to get wrong colors (maybe swap/endianess or hw issue?) + * with full precision color reg. And blob driver only seems to + * use half precision register for color output (that I can find + * so far), even with highp precision. So for force half precision + * for frag shader: + */ + so->half_precision = true; + } + + ret = fd3_compile_shader(so, cso->tokens); + if (ret) { + debug_error("compile failed!"); + goto fail; + } + + assemble_shader(pctx, so); + if (!so->bo) { + debug_error("assemble failed!"); + goto fail; + } + + if (type == SHADER_VERTEX) + fixup_vp_regfootprint(so); + + if (fd_mesa_debug & FD_DBG_DISASM) { + DBG("disassemble: type=%d", so->type); + disasm_a3xx(fd_bo_map(so->bo), so->info.sizedwords, 0, so->type); + } + + return so; + +fail: + delete_shader(so); + return NULL; +} + +static void * +fd3_fp_state_create(struct pipe_context *pctx, + const struct pipe_shader_state *cso) +{ + return create_shader(pctx, cso, SHADER_FRAGMENT); +} + +static void +fd3_fp_state_delete(struct pipe_context *pctx, void *hwcso) +{ + struct fd3_shader_stateobj *so = hwcso; + delete_shader(so); +} + +static void +fd3_fp_state_bind(struct pipe_context *pctx, void *hwcso) +{ + struct fd_context *ctx = fd_context(pctx); + ctx->prog.fp = hwcso; + ctx->prog.dirty |= FD_SHADER_DIRTY_FP; + ctx->dirty |= FD_DIRTY_PROG; +} + +static void * +fd3_vp_state_create(struct pipe_context *pctx, + const struct pipe_shader_state *cso) +{ + return create_shader(pctx, cso, SHADER_VERTEX); +} + +static void +fd3_vp_state_delete(struct pipe_context *pctx, void *hwcso) +{ + struct fd3_shader_stateobj *so = hwcso; + delete_shader(so); +} + +static void +fd3_vp_state_bind(struct pipe_context *pctx, void *hwcso) +{ + struct fd_context *ctx = fd_context(pctx); + ctx->prog.vp = hwcso; + ctx->prog.dirty |= FD_SHADER_DIRTY_VP; + ctx->dirty |= FD_DIRTY_PROG; +} + +static void +emit_shader(struct fd_ringbuffer *ring, struct fd3_shader_stateobj *so) +{ + struct ir3_shader_info *si = &so->info; + enum adreno_state_block sb; + uint32_t i, *bin; + + if (so->type == SHADER_VERTEX) { + sb = SB_VERT_SHADER; + } else { + sb = SB_FRAG_SHADER; + } + + // XXX use SS_INDIRECT + bin = fd_bo_map(so->bo); + OUT_PKT3(ring, CP_LOAD_STATE, 2 + si->sizedwords); + OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) | + CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | + CP_LOAD_STATE_0_STATE_BLOCK(sb) | + CP_LOAD_STATE_0_NUM_UNIT(so->instrlen)); + OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) | + CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); + for (i = 0; i < si->sizedwords; i++) + OUT_RING(ring, bin[i]); +} + +void +fd3_program_emit(struct fd_ringbuffer *ring, + struct fd_program_stateobj *prog) +{ + struct fd3_shader_stateobj *vp = prog->vp; + struct fd3_shader_stateobj *fp = prog->fp; + struct ir3_shader_info *vsi = &vp->info; + struct ir3_shader_info *fsi = &fp->info; + int i; + + /* we could probably divide this up into things that need to be + * emitted if frag-prog is dirty vs if vert-prog is dirty.. + */ + + OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 6); + OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) | + A3XX_HLSQ_CONTROL_0_REG_SPSHADERRESTART | + A3XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE); + OUT_RING(ring, A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) | + A3XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE); + OUT_RING(ring, A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(31)); + OUT_RING(ring, 0x00000000); /* HLSQ_CONTROL_3_REG */ + OUT_RING(ring, A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(vp->constlen) | + A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET(0) | + A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH(vp->instrlen)); + OUT_RING(ring, A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH(fp->constlen) | + A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET(128) | + A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH(fp->instrlen)); + + OUT_PKT0(ring, REG_A3XX_SP_SP_CTRL_REG, 1); + OUT_RING(ring, A3XX_SP_SP_CTRL_REG_CONSTMODE(0) | + A3XX_SP_SP_CTRL_REG_SLEEPMODE(1) | + // XXX "resolve" (?) bit set on gmem->mem pass.. +// COND(!uniforms, A3XX_SP_SP_CTRL_REG_RESOLVE) | + // XXX sometimes 0, sometimes 1: + A3XX_SP_SP_CTRL_REG_LOMODE(1)); + + /* emit unknown sequence of perfcounter disables that the blob + * emits as part of the program state.. + */ + for (i = 0; i < 6; i++) { + OUT_PKT0(ring, REG_A3XX_SP_PERFCOUNTER0_SELECT, 1); + OUT_RING(ring, 0x00000000); /* SP_PERFCOUNTER4_SELECT */ + + OUT_PKT0(ring, REG_A3XX_SP_PERFCOUNTER4_SELECT, 1); + OUT_RING(ring, 0x00000000); /* SP_PERFCOUNTER4_SELECT */ + } + + OUT_PKT0(ring, REG_A3XX_SP_VS_LENGTH_REG, 1); + OUT_RING(ring, A3XX_SP_VS_LENGTH_REG_SHADERLENGTH(vp->instrlen)); + + OUT_PKT0(ring, REG_A3XX_SP_VS_CTRL_REG0, 3); + OUT_RING(ring, A3XX_SP_VS_CTRL_REG0_THREADMODE(MULTI) | + A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE(BUFFER) | + A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(vsi->max_half_reg + 1) | + A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(vsi->max_reg + 1) | + A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP(0) | + A3XX_SP_VS_CTRL_REG0_THREADSIZE(TWO_QUADS) | + A3XX_SP_VS_CTRL_REG0_SUPERTHREADMODE | + COND(vp->samplers_count > 0, A3XX_SP_VS_CTRL_REG0_PIXLODENABLE) | + A3XX_SP_VS_CTRL_REG0_LENGTH(vp->instrlen)); + OUT_RING(ring, A3XX_SP_VS_CTRL_REG1_CONSTLENGTH(vp->constlen) | + A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(vp->total_in) | + A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT(MAX2(vsi->max_const, 0))); + OUT_RING(ring, A3XX_SP_VS_PARAM_REG_POSREGID(vp->pos_regid) | + A3XX_SP_VS_PARAM_REG_PSIZEREGID(vp->psize_regid) | + A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(vp->outputs_count)); + + assert(vp->outputs_count >= fp->inputs_count); + + for (i = 0; i < fp->inputs_count; ) { + uint32_t reg = 0; + + OUT_PKT0(ring, REG_A3XX_SP_VS_OUT_REG(i/2), 1); + + reg |= A3XX_SP_VS_OUT_REG_A_REGID(vp->outputs[i].regid); + reg |= A3XX_SP_VS_OUT_REG_A_COMPMASK(fp->inputs[i].compmask); + i++; + + reg |= A3XX_SP_VS_OUT_REG_B_REGID(vp->outputs[i].regid); + reg |= A3XX_SP_VS_OUT_REG_B_COMPMASK(fp->inputs[i].compmask); + i++; + + OUT_RING(ring, reg); + } + + for (i = 0; i < fp->inputs_count; ) { + uint32_t reg = 0; + + OUT_PKT0(ring, REG_A3XX_SP_VS_VPC_DST_REG(i/4), 1); + + reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC0(fp->inputs[i++].inloc); + reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC1(fp->inputs[i++].inloc); + reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC2(fp->inputs[i++].inloc); + reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC3(fp->inputs[i++].inloc); + + OUT_RING(ring, reg); + } + +#if 0 + /* for some reason, when I write SP_{VS,FS}_OBJ_START_REG I get: +[ 666.663665] kgsl kgsl-3d0: |a3xx_err_callback| RBBM | AHB bus error | READ | addr=201 | ports=1:3 +[ 666.664001] kgsl kgsl-3d0: |a3xx_err_callback| ringbuffer AHB error interrupt +[ 670.680909] kgsl kgsl-3d0: |adreno_idle| spun too long waiting for RB to idle +[ 670.681062] kgsl kgsl-3d0: |kgsl-3d0| Dump Started +[ 670.681123] kgsl kgsl-3d0: POWER: FLAGS = 00000007 | ACTIVE POWERLEVEL = 00000001 +[ 670.681214] kgsl kgsl-3d0: POWER: INTERVAL TIMEOUT = 0000000A +[ 670.681367] kgsl kgsl-3d0: GRP_CLK = 325000000 +[ 670.681489] kgsl kgsl-3d0: BUS CLK = 0 + */ + OUT_PKT0(ring, REG_A3XX_SP_VS_OBJ_OFFSET_REG, 2); + OUT_RING(ring, A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(0) | + A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0)); + OUT_RELOC(ring, vp->bo, 0, 0); /* SP_VS_OBJ_START_REG */ +#endif + + OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1); + OUT_RING(ring, A3XX_SP_FS_LENGTH_REG_SHADERLENGTH(fp->instrlen)); + + OUT_PKT0(ring, REG_A3XX_SP_FS_CTRL_REG0, 2); + OUT_RING(ring, A3XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) | + A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(BUFFER) | + A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(fsi->max_half_reg + 1) | + A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(fsi->max_reg + 1) | + A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(1) | + A3XX_SP_FS_CTRL_REG0_THREADSIZE(FOUR_QUADS) | + A3XX_SP_FS_CTRL_REG0_SUPERTHREADMODE | + COND(fp->samplers_count > 0, A3XX_SP_FS_CTRL_REG0_PIXLODENABLE) | + A3XX_SP_FS_CTRL_REG0_LENGTH(fp->instrlen)); + OUT_RING(ring, A3XX_SP_FS_CTRL_REG1_CONSTLENGTH(fp->constlen) | + A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING(fp->total_in) | + A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT(MAX2(fsi->max_const, 0)) | + A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET(63)); + +#if 0 + OUT_PKT0(ring, REG_A3XX_SP_FS_OBJ_OFFSET_REG, 2); + OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(128) | + A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(128 - fp->instrlen)); + OUT_RELOC(ring, fp->bo, 0, 0); /* SP_FS_OBJ_START_REG */ +#endif + + OUT_PKT0(ring, REG_A3XX_SP_FS_FLAT_SHAD_MODE_REG_0, 2); + OUT_RING(ring, 0x00000000); /* SP_FS_FLAT_SHAD_MODE_REG_0 */ + OUT_RING(ring, 0x00000000); /* SP_FS_FLAT_SHAD_MODE_REG_1 */ + + OUT_PKT0(ring, REG_A3XX_SP_FS_OUTPUT_REG, 1); + OUT_RING(ring, 0x00000000); /* SP_FS_OUTPUT_REG */ + + OUT_PKT0(ring, REG_A3XX_SP_FS_MRT_REG(0), 4); + OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(fp->color_regid) | + COND(fp->half_precision, A3XX_SP_FS_MRT_REG_HALF_PRECISION)); + OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(0)); + OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(0)); + OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(0)); + + OUT_PKT0(ring, REG_A3XX_VPC_ATTR, 2); + OUT_RING(ring, A3XX_VPC_ATTR_TOTALATTR(fp->total_in) | + A3XX_VPC_ATTR_THRDASSIGN(1) | + A3XX_VPC_ATTR_LMSIZE(1)); + OUT_RING(ring, A3XX_VPC_PACK_NUMFPNONPOSVAR(fp->total_in) | + A3XX_VPC_PACK_NUMNONPOSVSVAR(fp->total_in)); + + OUT_PKT0(ring, REG_A3XX_VPC_VARYING_INTERP_MODE(0), 4); + OUT_RING(ring, fp->vinterp[0]); /* VPC_VARYING_INTERP[0].MODE */ + OUT_RING(ring, fp->vinterp[1]); /* VPC_VARYING_INTERP[1].MODE */ + OUT_RING(ring, fp->vinterp[2]); /* VPC_VARYING_INTERP[2].MODE */ + OUT_RING(ring, fp->vinterp[3]); /* VPC_VARYING_INTERP[3].MODE */ + + OUT_PKT0(ring, REG_A3XX_VPC_VARYING_PS_REPL_MODE(0), 4); + OUT_RING(ring, fp->vpsrepl[0]); /* VPC_VARYING_PS_REPL[0].MODE */ + OUT_RING(ring, fp->vpsrepl[1]); /* VPC_VARYING_PS_REPL[1].MODE */ + OUT_RING(ring, fp->vpsrepl[2]); /* VPC_VARYING_PS_REPL[2].MODE */ + OUT_RING(ring, fp->vpsrepl[3]); /* VPC_VARYING_PS_REPL[3].MODE */ + + OUT_PKT0(ring, REG_A3XX_VFD_VS_THREADING_THRESHOLD, 1); + OUT_RING(ring, A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD(15) | + A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT(252)); + + emit_shader(ring, vp); + + OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1); + OUT_RING(ring, 0x00000000); /* VFD_PERFCOUNTER0_SELECT */ + + emit_shader(ring, fp); + + OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1); + OUT_RING(ring, 0x00000000); /* VFD_PERFCOUNTER0_SELECT */ + + OUT_PKT0(ring, REG_A3XX_VFD_CONTROL_0, 2); + OUT_RING(ring, A3XX_VFD_CONTROL_0_TOTALATTRTOVS(vp->total_in) | + A3XX_VFD_CONTROL_0_PACKETSIZE(2) | + A3XX_VFD_CONTROL_0_STRMDECINSTRCNT(vp->inputs_count) | + A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT(vp->inputs_count)); + OUT_RING(ring, A3XX_VFD_CONTROL_1_MAXSTORAGE(1) | // XXX + A3XX_VFD_CONTROL_1_REGID4VTX(regid(63,0)) | + A3XX_VFD_CONTROL_1_REGID4INST(regid(63,0))); +} + +/* once the compiler is good enough, we should construct TGSI in the + * core freedreno driver, and then let the a2xx/a3xx parts compile + * the internal shaders from TGSI the same as regular shaders. This + * would be the first step towards handling most of clear (and the + * gmem<->mem blits) from the core via normal state changes and shader + * state objects. + * + * (Well, there would still be some special bits, because there are + * some registers that don't get set for normal draw, but this should + * be relatively small and could be handled via callbacks from core + * into a2xx/a3xx..) + */ +static struct fd3_shader_stateobj * +create_internal_shader(struct pipe_context *pctx, enum shader_t type, + struct ir3_shader *ir) +{ + struct fd3_shader_stateobj *so = CALLOC_STRUCT(fd3_shader_stateobj); + + if (!so) { + ir3_shader_destroy(ir); + return NULL; + } + + so->type = type; + so->ir = ir; + + assemble_shader(pctx, so); + assert(so->bo); + + return so; +} + +/* Creates shader: + * (sy)(ss)(rpt1)bary.f (ei)r0.z, (r)0, r0.x + * (rpt5)nop + * sam (f32)(xyzw)r0.x, r0.z, s#0, t#0 + * (sy)(rpt3)cov.f32f16 hr0.x, (r)r0.x + * end + */ +static struct fd3_shader_stateobj * +create_blit_fp(struct pipe_context *pctx) +{ + struct fd3_shader_stateobj *so; + struct ir3_shader *ir = ir3_shader_create(); + struct ir3_instruction *instr; + + /* (sy)(ss)(rpt1)bary.f (ei)r0.z, (r)0, r0.x */ + instr = ir3_instr_create(ir, 2, OPC_BARY_F); + instr->flags = IR3_INSTR_SY | IR3_INSTR_SS; + instr->repeat = 1; + + ir3_reg_create(instr, regid(0,2), IR3_REG_EI); /* (ei)r0.z */ + ir3_reg_create(instr, 0, IR3_REG_R | /* (r)0 */ + IR3_REG_IMMED)->iim_val = 0; + ir3_reg_create(instr, regid(0,0), 0); /* r0.x */ + + /* (rpt5)nop */ + instr = ir3_instr_create(ir, 0, OPC_NOP); + instr->repeat = 5; + + /* sam (f32)(xyzw)r0.x, r0.z, s#0, t#0 */ + instr = ir3_instr_create(ir, 5, OPC_SAM); + instr->cat5.samp = 0; + instr->cat5.tex = 0; + instr->cat5.type = TYPE_F32; + + ir3_reg_create(instr, regid(0,0), /* (xyzw)r0.x */ + 0)->wrmask = 0xf; + ir3_reg_create(instr, regid(0,2), 0); /* r0.z */ + + /* (sy)(rpt3)cov.f32f16 hr0.x, (r)r0.x */ + instr = ir3_instr_create(ir, 1, 0); /* mov/cov instructions have no opc */ + instr->flags = IR3_INSTR_SY; + instr->repeat = 3; + instr->cat1.src_type = TYPE_F32; + instr->cat1.dst_type = TYPE_F16; + + ir3_reg_create(instr, regid(0,0), IR3_REG_HALF); /* hr0.x */ + ir3_reg_create(instr, regid(0,0), IR3_REG_R); /* (r)r0.x */ + + /* end */ + instr = ir3_instr_create(ir, 0, OPC_END); + + so = create_internal_shader(pctx, SHADER_FRAGMENT, ir); + if (!so) + return NULL; + + so->color_regid = regid(0,0); + so->half_precision = true; + so->inputs_count = 1; + so->inputs[0].inloc = 8; + so->inputs[0].compmask = 0x3; + so->total_in = 2; + so->samplers_count = 1; + + so->vpsrepl[0] = 0x99999999; + so->vpsrepl[1] = 0x99999999; + so->vpsrepl[2] = 0x99999999; + so->vpsrepl[3] = 0x99999999; + + return so; +} + +/* Creates shader: + * (sy)(ss)end + */ +static struct fd3_shader_stateobj * +create_blit_vp(struct pipe_context *pctx) +{ + struct fd3_shader_stateobj *so; + struct ir3_shader *ir = ir3_shader_create(); + struct ir3_instruction *instr; + + /* (sy)(ss)end */ + instr = ir3_instr_create(ir, 0, OPC_END); + instr->flags = IR3_INSTR_SY | IR3_INSTR_SS; + + so = create_internal_shader(pctx, SHADER_VERTEX, ir); + if (!so) + return NULL; + + so->pos_regid = regid(1,0); + so->psize_regid = regid(63,0); + so->inputs_count = 2; + so->inputs[0].regid = regid(0,0); + so->inputs[0].compmask = 0xf; + so->inputs[1].regid = regid(1,0); + so->inputs[1].compmask = 0xf; + so->total_in = 8; + so->outputs_count = 1; + so->outputs[0].regid = regid(0,0); + + fixup_vp_regfootprint(so); + + return so; +} + +/* Creates shader: + * (sy)(ss)(rpt3)mov.f16f16 hr0.x, (r)hc0.x + * end + */ +static struct fd3_shader_stateobj * +create_solid_fp(struct pipe_context *pctx) +{ + struct fd3_shader_stateobj *so; + struct ir3_shader *ir = ir3_shader_create(); + struct ir3_instruction *instr; + + /* (sy)(ss)(rpt3)mov.f16f16 hr0.x, (r)hc0.x */ + instr = ir3_instr_create(ir, 1, 0); /* mov/cov instructions have no opc */ + instr->flags = IR3_INSTR_SY | IR3_INSTR_SS; + instr->repeat = 3; + instr->cat1.src_type = TYPE_F16; + instr->cat1.dst_type = TYPE_F16; + + ir3_reg_create(instr, regid(0,0), IR3_REG_HALF); /* hr0.x */ + ir3_reg_create(instr, regid(0,0), IR3_REG_HALF | /* (r)hc0.x */ + IR3_REG_CONST | IR3_REG_R); + + /* end */ + instr = ir3_instr_create(ir, 0, OPC_END); + + so = create_internal_shader(pctx, SHADER_FRAGMENT, ir); + if (!so) + return NULL; + + so->color_regid = regid(0,0); + so->half_precision = true; + so->inputs_count = 0; + so->total_in = 0; + + return so; +} + +/* Creates shader: + * (sy)(ss)end + */ +static struct fd3_shader_stateobj * +create_solid_vp(struct pipe_context *pctx) +{ + struct fd3_shader_stateobj *so; + struct ir3_shader *ir = ir3_shader_create(); + struct ir3_instruction *instr; + + /* (sy)(ss)end */ + instr = ir3_instr_create(ir, 0, OPC_END); + instr->flags = IR3_INSTR_SY | IR3_INSTR_SS; + + + so = create_internal_shader(pctx, SHADER_VERTEX, ir); + if (!so) + return NULL; + + so->pos_regid = regid(0,0); + so->psize_regid = regid(63,0); + so->inputs_count = 1; + so->inputs[0].regid = regid(0,0); + so->inputs[0].compmask = 0xf; + so->total_in = 4; + so->outputs_count = 0; + + fixup_vp_regfootprint(so); + + return so; +} + +void +fd3_prog_init(struct pipe_context *pctx) +{ + struct fd_context *ctx = fd_context(pctx); + + pctx->create_fs_state = fd3_fp_state_create; + pctx->bind_fs_state = fd3_fp_state_bind; + pctx->delete_fs_state = fd3_fp_state_delete; + + pctx->create_vs_state = fd3_vp_state_create; + pctx->bind_vs_state = fd3_vp_state_bind; + pctx->delete_vs_state = fd3_vp_state_delete; + + ctx->solid_prog.fp = create_solid_fp(pctx); + ctx->solid_prog.vp = create_solid_vp(pctx); + ctx->blit_prog.fp = create_blit_fp(pctx); + ctx->blit_prog.vp = create_blit_vp(pctx); +} + +void +fd3_prog_fini(struct pipe_context *pctx) +{ + struct fd_context *ctx = fd_context(pctx); + + delete_shader(ctx->solid_prog.vp); + delete_shader(ctx->solid_prog.fp); + delete_shader(ctx->blit_prog.vp); + delete_shader(ctx->blit_prog.fp); +} diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.h b/src/gallium/drivers/freedreno/a3xx/fd3_program.h new file mode 100644 index 00000000000..9b50d34f756 --- /dev/null +++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.h @@ -0,0 +1,116 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2013 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#ifndef FD3_PROGRAM_H_ +#define FD3_PROGRAM_H_ + +#include "pipe/p_context.h" + +#include "freedreno_context.h" + +#include "ir-a3xx.h" +#include "disasm.h" + +struct fd3_shader_stateobj { + enum shader_t type; + + struct fd_bo *bo; + + struct ir3_shader_info info; + struct ir3_shader *ir; + + /* is shader using (or more precisely, is color_regid) half- + * precision register? + */ + bool half_precision; + + /* special output register locations: */ + uint8_t pos_regid, psize_regid, color_regid; + + /* the instructions length is in units of instruction groups + * (4 instructions, 8 dwords): + */ + unsigned instrlen; + + /* the constants length is in units of vec4's, and is the sum of + * the uniforms and the built-in compiler constants + */ + unsigned constlen; + + /* About Linkage: + * + Let the frag shader determine the position/compmask for the + * varyings, since it is the place where we know if the varying + * is actually used, and if so, which components are used. So + * what the hw calls "outloc" is taken from the "inloc" of the + * frag shader. + * + From the vert shader, we only need the output regid + */ + + /* varyings/outputs: */ + unsigned outputs_count; + struct { + uint8_t regid; + } outputs[16]; + + /* vertices/inputs: */ + unsigned inputs_count; + struct { + uint8_t regid; + uint8_t compmask; + /* in theory inloc of fs should match outloc of vs: */ + uint8_t inloc; + } inputs[16]; + + unsigned total_in; /* sum of inputs (scalar) */ + + /* samplers: */ + unsigned samplers_count; + + /* const reg # of first immediate, ie. 1 == c1 + * (not regid, because TGSI thinks in terms of vec4 registers, + * not scalar registers) + */ + unsigned first_immediate; + unsigned immediates_count; + struct { + uint32_t val[4]; + } immediates[64]; + + /* so far, only used for blit_prog shader.. values for + * VPC_VARYING_INTERP[i].MODE and VPC_VARYING_PS_REPL[i].MODE + */ + uint32_t vinterp[4], vpsrepl[4]; +}; + +void fd3_program_emit(struct fd_ringbuffer *ring, + struct fd_program_stateobj *prog); + +void fd3_prog_init(struct pipe_context *pctx); +void fd3_prog_fini(struct pipe_context *pctx); + +#endif /* FD3_PROGRAM_H_ */ diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c b/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c new file mode 100644 index 00000000000..8f6c0fe95cf --- /dev/null +++ b/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c @@ -0,0 +1,92 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2013 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + + +#include "pipe/p_state.h" +#include "util/u_string.h" +#include "util/u_memory.h" + +#include "fd3_rasterizer.h" +#include "fd3_context.h" +#include "fd3_util.h" + +void * +fd3_rasterizer_state_create(struct pipe_context *pctx, + const struct pipe_rasterizer_state *cso) +{ + struct fd3_rasterizer_stateobj *so; + + so = CALLOC_STRUCT(fd3_rasterizer_stateobj); + if (!so) + return NULL; + + so->base = *cso; + +/* + if (cso->line_stipple_enable) { + ??? TODO line stipple + } + TODO cso->half_pixel_center + TODO cso->point_size + TODO psize_min/psize_max +*/ + so->gras_cl_clip_cntl = A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER; /* ??? */ + so->gras_su_point_minmax = 0xffc00010; /* ??? */ + so->gras_su_point_size = 0x00000008; /* ??? */ + so->gras_su_poly_offset_scale = + A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL(cso->offset_scale); + so->gras_su_poly_offset_offset = + A3XX_GRAS_SU_POLY_OFFSET_OFFSET(cso->offset_units); + + so->gras_su_mode_control = + A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(cso->line_width/2); + + so->pc_prim_vtx_cntl = + A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(fd_polygon_mode(cso->fill_front)) | + A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(fd_polygon_mode(cso->fill_back)); + + if (cso->cull_face & PIPE_FACE_FRONT) + so->gras_su_mode_control |= A3XX_GRAS_SU_MODE_CONTROL_CULL_FRONT; + if (cso->cull_face & PIPE_FACE_BACK) + so->gras_su_mode_control |= A3XX_GRAS_SU_MODE_CONTROL_CULL_BACK; + if (!cso->flatshade_first) + so->pc_prim_vtx_cntl |= A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST; +/* + if (!cso->front_ccw) + TODO + if (cso->line_stipple_enable) + TODO + if (cso->multisample) + TODO +*/ + + if (cso->offset_tri) + so->gras_su_mode_control |= A3XX_GRAS_SU_MODE_CONTROL_POLY_OFFSET; + + return so; +} diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.h b/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.h new file mode 100644 index 00000000000..7e9c1f51f59 --- /dev/null +++ b/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.h @@ -0,0 +1,56 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2013 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#ifndef FD3_RASTERIZER_H_ +#define FD3_RASTERIZER_H_ + +#include "pipe/p_state.h" +#include "pipe/p_context.h" + +struct fd3_rasterizer_stateobj { + struct pipe_rasterizer_state base; + uint32_t gras_su_point_minmax; + uint32_t gras_su_point_size; + uint32_t gras_su_poly_offset_scale; + uint32_t gras_su_poly_offset_offset; + + uint32_t gras_su_mode_control; + uint32_t gras_cl_clip_cntl; + uint32_t pc_prim_vtx_cntl; +}; + +static INLINE struct fd3_rasterizer_stateobj * +fd3_rasterizer_stateobj(struct pipe_rasterizer_state *rast) +{ + return (struct fd3_rasterizer_stateobj *)rast; +} + +void * fd3_rasterizer_state_create(struct pipe_context *pctx, + const struct pipe_rasterizer_state *cso); + +#endif /* FD3_RASTERIZER_H_ */ diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_screen.c b/src/gallium/drivers/freedreno/a3xx/fd3_screen.c new file mode 100644 index 00000000000..9bb19abc2a8 --- /dev/null +++ b/src/gallium/drivers/freedreno/a3xx/fd3_screen.c @@ -0,0 +1,105 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2013 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#include "pipe/p_screen.h" +#include "util/u_format.h" + +#include "fd3_screen.h" +#include "fd3_context.h" +#include "fd3_util.h" + +static boolean +fd3_screen_is_format_supported(struct pipe_screen *pscreen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned sample_count, + unsigned usage) +{ + unsigned retval = 0; + + if ((target >= PIPE_MAX_TEXTURE_TYPES) || + (sample_count > 1) || /* TODO add MSAA */ + !util_format_is_supported(format, usage)) { + DBG("not supported: format=%s, target=%d, sample_count=%d, usage=%x", + util_format_name(format), target, sample_count, usage); + return FALSE; + } + + if ((usage & PIPE_BIND_VERTEX_BUFFER) && + (fd3_pipe2vtx(format) != ~0)) { + retval |= PIPE_BIND_VERTEX_BUFFER; + } + + if ((usage & PIPE_BIND_SAMPLER_VIEW) && + (fd3_pipe2tex(format) != ~0)) { + retval |= PIPE_BIND_SAMPLER_VIEW; + } + + if ((usage & (PIPE_BIND_RENDER_TARGET | + PIPE_BIND_DISPLAY_TARGET | + PIPE_BIND_SCANOUT | + PIPE_BIND_SHARED)) && + (fd3_pipe2color(format) != ~0) && + (fd3_pipe2tex(format) != ~0)) { + retval |= usage & (PIPE_BIND_RENDER_TARGET | + PIPE_BIND_DISPLAY_TARGET | + PIPE_BIND_SCANOUT | + PIPE_BIND_SHARED); + } + + if ((usage & PIPE_BIND_DEPTH_STENCIL) && + (fd_pipe2depth(format) != ~0) && + (fd3_pipe2tex(format) != ~0)) { + retval |= PIPE_BIND_DEPTH_STENCIL; + } + + if ((usage & PIPE_BIND_INDEX_BUFFER) && + (fd_pipe2index(format) != ~0)) { + retval |= PIPE_BIND_INDEX_BUFFER; + } + + if (usage & PIPE_BIND_TRANSFER_READ) + retval |= PIPE_BIND_TRANSFER_READ; + if (usage & PIPE_BIND_TRANSFER_WRITE) + retval |= PIPE_BIND_TRANSFER_WRITE; + + if (retval != usage) { + DBG("not supported: format=%s, target=%d, sample_count=%d, " + "usage=%x, retval=%x", util_format_name(format), + target, sample_count, usage, retval); + } + + return retval == usage; +} + +void +fd3_screen_init(struct pipe_screen *pscreen) +{ + pscreen->context_create = fd3_context_create; + pscreen->is_format_supported = fd3_screen_is_format_supported; +} diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_screen.h b/src/gallium/drivers/freedreno/a3xx/fd3_screen.h new file mode 100644 index 00000000000..38204d387a3 --- /dev/null +++ b/src/gallium/drivers/freedreno/a3xx/fd3_screen.h @@ -0,0 +1,36 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2013 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#ifndef FD3_SCREEN_H_ +#define FD3_SCREEN_H_ + +#include "pipe/p_screen.h" + +void fd3_screen_init(struct pipe_screen *pscreen); + +#endif /* FD3_SCREEN_H_ */ diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c new file mode 100644 index 00000000000..ae08b8ac90a --- /dev/null +++ b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c @@ -0,0 +1,140 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2013 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#include "pipe/p_state.h" +#include "util/u_string.h" +#include "util/u_memory.h" +#include "util/u_inlines.h" + +#include "fd3_texture.h" +#include "fd3_util.h" + +static enum a3xx_tex_clamp +tex_clamp(unsigned wrap) +{ + /* hardware probably supports more, but we can't coax all the + * wrap/clamp modes out of the GLESv2 blob driver. + * + * TODO once we have basics working, go back and just try + * different values and see what happens + */ + switch (wrap) { + case PIPE_TEX_WRAP_REPEAT: + return A3XX_TEX_REPEAT; + case PIPE_TEX_WRAP_CLAMP: + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + return A3XX_TEX_CLAMP_TO_EDGE; + case PIPE_TEX_WRAP_MIRROR_CLAMP: + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + case PIPE_TEX_WRAP_MIRROR_REPEAT: + return A3XX_TEX_MIRROR_REPEAT; + default: + DBG("invalid wrap: %u", wrap); + return 0; + } +} + +static enum a3xx_tex_filter +tex_filter(unsigned filter) +{ + switch (filter) { + case PIPE_TEX_FILTER_NEAREST: + return A3XX_TEX_NEAREST; + case PIPE_TEX_FILTER_LINEAR: + return A3XX_TEX_LINEAR; + default: + DBG("invalid filter: %u", filter); + return 0; + } +} + +static void * +fd3_sampler_state_create(struct pipe_context *pctx, + const struct pipe_sampler_state *cso) +{ + struct fd3_sampler_stateobj *so = CALLOC_STRUCT(fd3_sampler_stateobj); + + if (!so) + return NULL; + + so->base = *cso; + + so->texsamp0 = + A3XX_TEX_SAMP_0_XY_MAG(tex_filter(cso->mag_img_filter)) | + A3XX_TEX_SAMP_0_XY_MIN(tex_filter(cso->min_img_filter)) | + A3XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s)) | + A3XX_TEX_SAMP_0_WRAP_T(tex_clamp(cso->wrap_t)) | + A3XX_TEX_SAMP_0_WRAP_R(tex_clamp(cso->wrap_r)); + so->texsamp1 = 0x00000000; /* ??? */ + + return so; +} + +static struct pipe_sampler_view * +fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc, + const struct pipe_sampler_view *cso) +{ + struct fd3_pipe_sampler_view *so = CALLOC_STRUCT(fd3_pipe_sampler_view); + struct fd_resource *rsc = fd_resource(prsc); + + if (!so) + return NULL; + + so->base = *cso; + pipe_reference(NULL, &prsc->reference); + so->base.texture = prsc; + so->base.reference.count = 1; + so->base.context = pctx; + + so->tex_resource = rsc; + + so->texconst0 = + 0x40000000 | /* ??? */ + A3XX_TEX_CONST_0_FMT(fd3_pipe2tex(cso->format)) | + fd3_tex_swiz(cso->format, cso->swizzle_r, cso->swizzle_g, + cso->swizzle_b, cso->swizzle_a); + so->texconst1 = + A3XX_TEX_CONST_1_FETCHSIZE(fd3_pipe2fetchsize(cso->format)) | + A3XX_TEX_CONST_1_WIDTH(prsc->width0) | + A3XX_TEX_CONST_1_HEIGHT(prsc->height0); + /* when emitted, A3XX_TEX_CONST_2_INDX() must be OR'd in: */ + so->texconst2 = + A3XX_TEX_CONST_2_PITCH(rsc->pitch * rsc->cpp); + so->texconst3 = 0x00000000; /* ??? */ + + return &so->base; +} + +void +fd3_texture_init(struct pipe_context *pctx) +{ + pctx->create_sampler_state = fd3_sampler_state_create; + pctx->create_sampler_view = fd3_sampler_view_create; +} diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_texture.h b/src/gallium/drivers/freedreno/a3xx/fd3_texture.h new file mode 100644 index 00000000000..a83f527366b --- /dev/null +++ b/src/gallium/drivers/freedreno/a3xx/fd3_texture.h @@ -0,0 +1,68 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2013 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#ifndef FD3_TEXTURE_H_ +#define FD3_TEXTURE_H_ + +#include "pipe/p_context.h" + +#include "freedreno_texture.h" +#include "freedreno_resource.h" + +#include "fd3_context.h" +#include "fd3_util.h" + +struct fd3_sampler_stateobj { + struct pipe_sampler_state base; + uint32_t texsamp0, texsamp1; +}; + +static INLINE struct fd3_sampler_stateobj * +fd3_sampler_stateobj(struct pipe_sampler_state *samp) +{ + return (struct fd3_sampler_stateobj *)samp; +} + +struct fd3_pipe_sampler_view { + struct pipe_sampler_view base; + struct fd_resource *tex_resource; + uint32_t texconst0, texconst1, texconst2, texconst3; +}; + +static INLINE struct fd3_pipe_sampler_view * +fd3_pipe_sampler_view(struct pipe_sampler_view *pview) +{ + return (struct fd3_pipe_sampler_view *)pview; +} + +unsigned fd3_get_const_idx(struct fd_context *ctx, + struct fd_texture_stateobj *tex, unsigned samp_id); + +void fd3_texture_init(struct pipe_context *pctx); + +#endif /* FD3_TEXTURE_H_ */ diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_util.c b/src/gallium/drivers/freedreno/a3xx/fd3_util.c new file mode 100644 index 00000000000..a08bc2349eb --- /dev/null +++ b/src/gallium/drivers/freedreno/a3xx/fd3_util.c @@ -0,0 +1,348 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2013 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#include "pipe/p_defines.h" +#include "util/u_format.h" + +#include "fd3_util.h" + +/* convert pipe format to vertex buffer format: */ +enum a3xx_vtx_fmt +fd3_pipe2vtx(enum pipe_format format) +{ + switch (format) { + /* 8-bit buffers. */ + case PIPE_FORMAT_A8_UNORM: + case PIPE_FORMAT_I8_UNORM: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_L8_SRGB: + return VFMT_NORM_UBYTE_8; + + case PIPE_FORMAT_A8_SNORM: + case PIPE_FORMAT_I8_SNORM: + case PIPE_FORMAT_L8_SNORM: + case PIPE_FORMAT_R8_SNORM: + return VFMT_NORM_BYTE_8; + + case PIPE_FORMAT_A8_UINT: + case PIPE_FORMAT_I8_UINT: + case PIPE_FORMAT_L8_UINT: + case PIPE_FORMAT_R8_UINT: + return VFMT_UBYTE_8; + + case PIPE_FORMAT_A8_SINT: + case PIPE_FORMAT_I8_SINT: + case PIPE_FORMAT_L8_SINT: + case PIPE_FORMAT_R8_SINT: + return VFMT_BYTE_8; + + /* 16-bit buffers. */ + case PIPE_FORMAT_R16_UNORM: + case PIPE_FORMAT_A16_UNORM: + case PIPE_FORMAT_L16_UNORM: + case PIPE_FORMAT_I16_UNORM: + case PIPE_FORMAT_Z16_UNORM: + return VFMT_NORM_USHORT_16; + + case PIPE_FORMAT_R16_SNORM: + case PIPE_FORMAT_A16_SNORM: + case PIPE_FORMAT_L16_SNORM: + case PIPE_FORMAT_I16_SNORM: + return VFMT_NORM_SHORT_16; + + case PIPE_FORMAT_R16_UINT: + case PIPE_FORMAT_A16_UINT: + case PIPE_FORMAT_L16_UINT: + case PIPE_FORMAT_I16_UINT: + return VFMT_USHORT_16; + + case PIPE_FORMAT_R16_SINT: + case PIPE_FORMAT_A16_SINT: + case PIPE_FORMAT_L16_SINT: + case PIPE_FORMAT_I16_SINT: + return VFMT_SHORT_16; + + case PIPE_FORMAT_L8A8_UNORM: + case PIPE_FORMAT_R8G8_UNORM: + return VFMT_NORM_UBYTE_8_8; + + case PIPE_FORMAT_L8A8_SNORM: + case PIPE_FORMAT_R8G8_SNORM: + return VFMT_NORM_BYTE_8_8; + + case PIPE_FORMAT_L8A8_UINT: + case PIPE_FORMAT_R8G8_UINT: + return VFMT_UBYTE_8_8; + + case PIPE_FORMAT_L8A8_SINT: + case PIPE_FORMAT_R8G8_SINT: + return VFMT_BYTE_8_8; + + /* 24-bit buffers. */ + case PIPE_FORMAT_R8G8B8_UNORM: + return VFMT_NORM_UBYTE_8_8_8; + + case PIPE_FORMAT_R8G8B8_SNORM: + return VFMT_NORM_BYTE_8_8_8; + + case PIPE_FORMAT_R8G8B8_UINT: + return VFMT_UBYTE_8_8_8; + + case PIPE_FORMAT_R8G8B8_SINT: + return VFMT_BYTE_8_8_8; + + /* 32-bit buffers. */ + case PIPE_FORMAT_A8B8G8R8_UNORM: + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_B8G8R8X8_UNORM: + case PIPE_FORMAT_R8G8B8A8_UNORM: + case PIPE_FORMAT_R8G8B8X8_UNORM: + case PIPE_FORMAT_X8B8G8R8_UNORM: + case PIPE_FORMAT_X8R8G8B8_UNORM: + case PIPE_FORMAT_A8B8G8R8_SRGB: + case PIPE_FORMAT_B8G8R8A8_SRGB: + return VFMT_NORM_UBYTE_8_8_8_8; + + case PIPE_FORMAT_R8G8B8A8_SNORM: + case PIPE_FORMAT_R8G8B8X8_SNORM: + return VFMT_NORM_BYTE_8_8_8_8; + + case PIPE_FORMAT_R8G8B8A8_UINT: + case PIPE_FORMAT_R8G8B8X8_UINT: + return VFMT_UBYTE_8_8_8_8; + + case PIPE_FORMAT_R8G8B8A8_SINT: + case PIPE_FORMAT_R8G8B8X8_SINT: + return VFMT_BYTE_8_8_8_8; + +/* TODO probably need gles3 blob drivers to find the 32bit int formats: + case PIPE_FORMAT_R32_UINT: + case PIPE_FORMAT_R32_SINT: + case PIPE_FORMAT_A32_UINT: + case PIPE_FORMAT_A32_SINT: + case PIPE_FORMAT_L32_UINT: + case PIPE_FORMAT_L32_SINT: + case PIPE_FORMAT_I32_UINT: + case PIPE_FORMAT_I32_SINT: +*/ + + case PIPE_FORMAT_R32_FLOAT: + case PIPE_FORMAT_A32_FLOAT: + case PIPE_FORMAT_L32_FLOAT: + case PIPE_FORMAT_I32_FLOAT: + case PIPE_FORMAT_Z32_FLOAT: + return VFMT_FLOAT_32; + + case PIPE_FORMAT_R32_FIXED: + return VFMT_FIXED_32; + + /* 64-bit buffers. */ + case PIPE_FORMAT_R16G16B16A16_UNORM: + return VFMT_NORM_USHORT_16_16_16_16; + + case PIPE_FORMAT_R16G16B16A16_SNORM: + return VFMT_NORM_SHORT_16_16_16_16; + + case PIPE_FORMAT_R16G16B16A16_UINT: + return VFMT_USHORT_16_16_16_16; + + case PIPE_FORMAT_R16G16B16A16_SINT: + return VFMT_SHORT_16_16_16_16; + + case PIPE_FORMAT_R32G32_FLOAT: + case PIPE_FORMAT_L32A32_FLOAT: + return VFMT_FLOAT_32_32; + + case PIPE_FORMAT_R32G32_FIXED: + return VFMT_FIXED_32_32; + +/* TODO probably need gles3 blob drivers to find the 32bit int formats: + case PIPE_FORMAT_R32G32_SINT: + case PIPE_FORMAT_R32G32_UINT: + case PIPE_FORMAT_L32A32_UINT: + case PIPE_FORMAT_L32A32_SINT: +*/ + + /* 96-bit buffers. */ + case PIPE_FORMAT_R32G32B32_FLOAT: + return VFMT_FLOAT_32_32_32; + + case PIPE_FORMAT_R32G32B32_FIXED: + return VFMT_FIXED_32_32_32; + + /* 128-bit buffers. */ + case PIPE_FORMAT_R32G32B32A32_FLOAT: + return VFMT_FLOAT_32_32_32_32; + + case PIPE_FORMAT_R32G32B32A32_FIXED: + return VFMT_FIXED_32_32_32_32; + +/* TODO probably need gles3 blob drivers to find the 32bit int formats: + case PIPE_FORMAT_R32G32B32A32_SNORM: + case PIPE_FORMAT_R32G32B32A32_UNORM: + case PIPE_FORMAT_R32G32B32A32_SINT: + case PIPE_FORMAT_R32G32B32A32_UINT: +*/ + + default: + return ~0; + } +} + +/* convert pipe format to texture sampler format: */ +enum a3xx_tex_fmt +fd3_pipe2tex(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_A8_UNORM: + case PIPE_FORMAT_I8_UNORM: + return TFMT_NORM_UINT_8; + + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_B8G8R8X8_UNORM: + return TFMT_NORM_UINT_8_8_8_8; + + case PIPE_FORMAT_Z24X8_UNORM: + return TFMT_NORM_UINT_X8Z24; + + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + return TFMT_NORM_UINT_8_8_8_8; + + case PIPE_FORMAT_Z16_UNORM: + return TFMT_NORM_UINT_8_8; + + // TODO add more.. + + default: + return ~0; + } +} + +enum a3xx_tex_fetchsize +fd3_pipe2fetchsize(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_A8_UNORM: + case PIPE_FORMAT_I8_UNORM: + return TFETCH_1_BYTE; + + case PIPE_FORMAT_Z16_UNORM: + return TFETCH_2_BYTE; + + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_B8G8R8X8_UNORM: + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + return TFETCH_4_BYTE; + + // TODO add more.. + + default: + return TFETCH_DISABLE; /* save default */ + } +} + +/* convert pipe format to MRT / copydest format used for render-target: */ +enum a3xx_color_fmt +fd3_pipe2color(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_B8G8R8X8_UNORM: + return RB_R8G8B8A8_UNORM; + + case PIPE_FORMAT_Z16_UNORM: + return RB_Z16_UNORM; + + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + /* for DEPTHX_24_8, blob driver also seems to use R8G8B8A8 fmt.. */ + return RB_R8G8B8A8_UNORM; + + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_L8_UNORM: + return RB_A8_UNORM; + + // TODO add more.. + + default: + return ~0; + } +} + +enum a3xx_color_swap +fd3_pipe2swap(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_B8G8R8X8_UNORM: + return WXYZ; + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + return WZYX; + + default: + return WZYX; + } +} + +static inline enum a3xx_tex_swiz +tex_swiz(unsigned swiz) +{ + switch (swiz) { + default: + case PIPE_SWIZZLE_RED: return A3XX_TEX_X; + case PIPE_SWIZZLE_GREEN: return A3XX_TEX_Y; + case PIPE_SWIZZLE_BLUE: return A3XX_TEX_Z; + case PIPE_SWIZZLE_ALPHA: return A3XX_TEX_W; + case PIPE_SWIZZLE_ZERO: return A3XX_TEX_ZERO; + case PIPE_SWIZZLE_ONE: return A3XX_TEX_ONE; + } +} + +uint32_t +fd3_tex_swiz(enum pipe_format format, unsigned swizzle_r, unsigned swizzle_g, + unsigned swizzle_b, unsigned swizzle_a) +{ + const struct util_format_description *desc = + util_format_description(format); + uint8_t swiz[] = { + swizzle_r, swizzle_g, swizzle_b, swizzle_a, + PIPE_SWIZZLE_ZERO, PIPE_SWIZZLE_ONE, + PIPE_SWIZZLE_ONE, PIPE_SWIZZLE_ONE, + }; + + return A3XX_TEX_CONST_0_SWIZ_X(tex_swiz(swiz[desc->swizzle[0]])) | + A3XX_TEX_CONST_0_SWIZ_Y(tex_swiz(swiz[desc->swizzle[1]])) | + A3XX_TEX_CONST_0_SWIZ_Z(tex_swiz(swiz[desc->swizzle[2]])) | + A3XX_TEX_CONST_0_SWIZ_W(tex_swiz(swiz[desc->swizzle[3]])); +} diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_util.h b/src/gallium/drivers/freedreno/a3xx/fd3_util.h new file mode 100644 index 00000000000..e9ec15f5a3f --- /dev/null +++ b/src/gallium/drivers/freedreno/a3xx/fd3_util.h @@ -0,0 +1,56 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2013 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#ifndef FD3_UTIL_H_ +#define FD3_UTIL_H_ + +#include "freedreno_util.h" + +#include "a3xx.xml.h" + +enum a3xx_vtx_fmt fd3_pipe2vtx(enum pipe_format format); +enum a3xx_tex_fmt fd3_pipe2tex(enum pipe_format format); +enum a3xx_tex_fetchsize fd3_pipe2fetchsize(enum pipe_format format); +enum a3xx_color_fmt fd3_pipe2color(enum pipe_format format); +enum a3xx_color_swap fd3_pipe2swap(enum pipe_format format); + +uint32_t fd3_tex_swiz(enum pipe_format format, unsigned swizzle_r, + unsigned swizzle_g, unsigned swizzle_b, unsigned swizzle_a); + +/* comp: + * 0 - x + * 1 - y + * 2 - z + * 3 - w + */ +static inline uint32_t regid(int num, int comp) +{ + return (num << 2) | (comp & 0x3); +} + +#endif /* FD3_UTIL_H_ */ diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_zsa.c b/src/gallium/drivers/freedreno/a3xx/fd3_zsa.c new file mode 100644 index 00000000000..857ab8f106a --- /dev/null +++ b/src/gallium/drivers/freedreno/a3xx/fd3_zsa.c @@ -0,0 +1,100 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2013 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + + +#include "pipe/p_state.h" +#include "util/u_string.h" +#include "util/u_memory.h" + +#include "fd3_zsa.h" +#include "fd3_context.h" +#include "fd3_util.h" + +void * +fd3_zsa_state_create(struct pipe_context *pctx, + const struct pipe_depth_stencil_alpha_state *cso) +{ + struct fd3_zsa_stateobj *so; + + so = CALLOC_STRUCT(fd3_zsa_stateobj); + if (!so) + return NULL; + + so->base = *cso; + + so->rb_depth_control |= + A3XX_RB_DEPTH_CONTROL_ZFUNC(cso->depth.func); /* maps 1:1 */ + + if (cso->depth.enabled) + so->rb_depth_control |= + A3XX_RB_DEPTH_CONTROL_Z_ENABLE | + A3XX_RB_DEPTH_CONTROL_Z_TEST_ENABLE; + + if (cso->depth.writemask) + so->rb_depth_control |= A3XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE; + + if (cso->stencil[0].enabled) { + const struct pipe_stencil_state *s = &cso->stencil[0]; + + so->rb_stencil_control |= + A3XX_RB_STENCIL_CONTROL_STENCIL_ENABLE | + A3XX_RB_STENCIL_CONTROL_FUNC(s->func) | /* maps 1:1 */ + A3XX_RB_STENCIL_CONTROL_FAIL(fd_stencil_op(s->fail_op)) | + A3XX_RB_STENCIL_CONTROL_ZPASS(fd_stencil_op(s->zpass_op)) | + A3XX_RB_STENCIL_CONTROL_ZFAIL(fd_stencil_op(s->zfail_op)); + so->rb_stencilrefmask |= + 0xff000000 | /* ??? */ + A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(s->writemask) | + A3XX_RB_STENCILREFMASK_STENCILMASK(s->valuemask); + + if (cso->stencil[1].enabled) { + const struct pipe_stencil_state *bs = &cso->stencil[1]; + + so->rb_stencil_control |= + A3XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF | + A3XX_RB_STENCIL_CONTROL_FUNC_BF(bs->func) | /* maps 1:1 */ + A3XX_RB_STENCIL_CONTROL_FAIL_BF(fd_stencil_op(bs->fail_op)) | + A3XX_RB_STENCIL_CONTROL_ZPASS_BF(fd_stencil_op(bs->zpass_op)) | + A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(fd_stencil_op(bs->zfail_op)); + so->rb_stencilrefmask_bf |= + 0xff000000 | /* ??? */ + A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(bs->writemask) | + A3XX_RB_STENCILREFMASK_STENCILMASK(bs->valuemask); + } + } + + if (cso->alpha.enabled) { + so->rb_render_control = + A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(cso->alpha.func); + // TODO alpha_ref and alpha_test_enable?? + } + + so->rb_render_control |= 0x2000; /* ??? */ + + return so; +} diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_zsa.h b/src/gallium/drivers/freedreno/a3xx/fd3_zsa.h new file mode 100644 index 00000000000..0cc80a8cf0a --- /dev/null +++ b/src/gallium/drivers/freedreno/a3xx/fd3_zsa.h @@ -0,0 +1,56 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2013 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark <[email protected]> + */ + +#ifndef FD3_ZSA_H_ +#define FD3_ZSA_H_ + + +#include "pipe/p_state.h" +#include "pipe/p_context.h" + +#include "freedreno_util.h" + +struct fd3_zsa_stateobj { + struct pipe_depth_stencil_alpha_state base; + uint32_t rb_render_control; + uint32_t rb_depth_control; + uint32_t rb_stencil_control; + uint32_t rb_stencilrefmask; + uint32_t rb_stencilrefmask_bf; +}; + +static INLINE struct fd3_zsa_stateobj * +fd3_zsa_stateobj(struct pipe_depth_stencil_alpha_state *zsa) +{ + return (struct fd3_zsa_stateobj *)zsa; +} + +void * fd3_zsa_state_create(struct pipe_context *pctx, + const struct pipe_depth_stencil_alpha_state *cso); + +#endif /* FD3_ZSA_H_ */ diff --git a/src/gallium/drivers/freedreno/a3xx/instr-a3xx.h b/src/gallium/drivers/freedreno/a3xx/instr-a3xx.h new file mode 100644 index 00000000000..464a7e9d757 --- /dev/null +++ b/src/gallium/drivers/freedreno/a3xx/instr-a3xx.h @@ -0,0 +1,532 @@ +/* + * Copyright (c) 2013 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef INSTR_A3XX_H_ +#define INSTR_A3XX_H_ + +#define PACKED __attribute__((__packed__)) + +#include <stdint.h> +#include <assert.h> + +typedef enum { + /* category 0: */ + OPC_NOP = 0, + OPC_BR = 1, + OPC_JUMP = 2, + OPC_CALL = 3, + OPC_RET = 4, + OPC_KILL = 5, + OPC_END = 6, + OPC_EMIT = 7, + OPC_CUT = 8, + OPC_CHMASK = 9, + OPC_CHSH = 10, + OPC_FLOW_REV = 11, + + /* category 1: */ + /* no opc.. all category 1 are variants of mov */ + + /* category 2: */ + OPC_ADD_F = 0, + OPC_MIN_F = 1, + OPC_MAX_F = 2, + OPC_MUL_F = 3, + OPC_SIGN_F = 4, + OPC_CMPS_F = 5, + OPC_ABSNEG_F = 6, + OPC_CMPV_F = 7, + /* 8 - invalid */ + OPC_FLOOR_F = 9, + OPC_CEIL_F = 10, + OPC_RNDNE_F = 11, + OPC_RNDAZ_F = 12, + OPC_TRUNC_F = 13, + /* 14-15 - invalid */ + OPC_ADD_U = 16, + OPC_ADD_S = 17, + OPC_SUB_U = 18, + OPC_SUB_S = 19, + OPC_CMPS_U = 20, + OPC_CMPS_S = 21, + OPC_MIN_U = 22, + OPC_MIN_S = 23, + OPC_MAX_U = 24, + OPC_MAX_S = 25, + OPC_ABSNEG_S = 26, + /* 27 - invalid */ + OPC_AND_B = 28, + OPC_OR_B = 29, + OPC_NOT_B = 30, + OPC_XOR_B = 31, + /* 32 - invalid */ + OPC_CMPV_U = 33, + OPC_CMPV_S = 34, + /* 35-47 - invalid */ + OPC_MUL_U = 48, + OPC_MUL_S = 49, + OPC_MULL_U = 50, + OPC_BFREV_B = 51, + OPC_CLZ_S = 52, + OPC_CLZ_B = 53, + OPC_SHL_B = 54, + OPC_SHR_B = 55, + OPC_ASHR_B = 56, + OPC_BARY_F = 57, + OPC_MGEN_B = 58, + OPC_GETBIT_B = 59, + OPC_SETRM = 60, + OPC_CBITS_B = 61, + OPC_SHB = 62, + OPC_MSAD = 63, + + /* category 3: */ + OPC_MAD_U16 = 0, + OPC_MADSH_U16 = 1, + OPC_MAD_S16 = 2, + OPC_MADSH_M16 = 3, /* should this be .s16? */ + OPC_MAD_U24 = 4, + OPC_MAD_S24 = 5, + OPC_MAD_F16 = 6, + OPC_MAD_F32 = 7, + OPC_SEL_B16 = 8, + OPC_SEL_B32 = 9, + OPC_SEL_S16 = 10, + OPC_SEL_S32 = 11, + OPC_SEL_F16 = 12, + OPC_SEL_F32 = 13, + OPC_SAD_S16 = 14, + OPC_SAD_S32 = 15, + + /* category 4: */ + OPC_RCP = 0, + OPC_RSQ = 1, + OPC_LOG2 = 2, + OPC_EXP2 = 3, + OPC_SIN = 4, + OPC_COS = 5, + OPC_SQRT = 6, + // 7-63 - invalid + + /* category 5: */ + OPC_ISAM = 0, + OPC_ISAML = 1, + OPC_ISAMM = 2, + OPC_SAM = 3, + OPC_SAMB = 4, + OPC_SAML = 5, + OPC_SAMGQ = 6, + OPC_GETLOD = 7, + OPC_CONV = 8, + OPC_CONVM = 9, + OPC_GETSIZE = 10, + OPC_GETBUF = 11, + OPC_GETPOS = 12, + OPC_GETINFO = 13, + OPC_DSX = 14, + OPC_DSY = 15, + OPC_GATHER4R = 16, + OPC_GATHER4G = 17, + OPC_GATHER4B = 18, + OPC_GATHER4A = 19, + OPC_SAMGP0 = 20, + OPC_SAMGP1 = 21, + OPC_SAMGP2 = 22, + OPC_SAMGP3 = 23, + OPC_DSXPP_1 = 24, + OPC_DSYPP_1 = 25, + OPC_RGETPOS = 26, + OPC_RGETINFO = 27, + + /* category 6: */ + OPC_LDG = 0, /* load-global */ + OPC_LDL = 1, + OPC_LDP = 2, + OPC_STG = 3, /* store-global */ + OPC_STL = 4, + OPC_STP = 5, + OPC_STI = 6, + OPC_G2L = 7, + OPC_L2G = 8, + OPC_PREFETCH = 9, + OPC_LDLW = 10, + OPC_STLW = 11, + OPC_RESFMT = 14, + OPC_RESINFO = 15, + OPC_ATOMIC_ADD_L = 16, + OPC_ATOMIC_SUB_L = 17, + OPC_ATOMIC_XCHG_L = 18, + OPC_ATOMIC_INC_L = 19, + OPC_ATOMIC_DEC_L = 20, + OPC_ATOMIC_CMPXCHG_L = 21, + OPC_ATOMIC_MIN_L = 22, + OPC_ATOMIC_MAX_L = 23, + OPC_ATOMIC_AND_L = 24, + OPC_ATOMIC_OR_L = 25, + OPC_ATOMIC_XOR_L = 26, + OPC_LDGB_TYPED_4D = 27, + OPC_STGB_4D_4 = 28, + OPC_STIB = 29, + OPC_LDC_4 = 30, + OPC_LDLV = 31, + +} opc_t; + +typedef enum { + TYPE_F16 = 0, + TYPE_F32 = 1, + TYPE_U16 = 2, + TYPE_U32 = 3, + TYPE_S16 = 4, + TYPE_S32 = 5, + TYPE_U8 = 6, + TYPE_S8 = 7, // XXX I assume? +} type_t; + +static inline uint32_t type_size(type_t type) +{ + switch (type) { + case TYPE_F32: + case TYPE_U32: + case TYPE_S32: + return 32; + case TYPE_F16: + case TYPE_U16: + case TYPE_S16: + return 16; + case TYPE_U8: + case TYPE_S8: + return 8; + default: + assert(0); /* invalid type */ + return 0; + } +} + +static inline int type_float(type_t type) +{ + return (type == TYPE_F32) || (type == TYPE_F16); +} + +typedef union PACKED { + /* normal gpr or const src register: */ + struct PACKED { + uint32_t comp : 2; + uint32_t num : 9; + }; + /* for immediate val: */ + int32_t iim_val : 11; + /* to make compiler happy: */ + uint32_t dummy32; + uint32_t dummy11 : 11; + uint32_t dummy8 : 8; +} reg_t; + +/* special registers: */ +#define REG_A0 61 /* address register */ +#define REG_P0 62 /* predicate register */ + +static inline int reg_special(reg_t reg) +{ + return (reg.num == REG_A0) || (reg.num == REG_P0); +} + +typedef struct PACKED { + /* dword0: */ + int16_t immed : 16; + uint32_t dummy1 : 16; + + /* dword1: */ + uint32_t dummy2 : 8; + uint32_t repeat : 3; + uint32_t dummy3 : 1; + uint32_t ss : 1; + uint32_t dummy4 : 7; + uint32_t inv : 1; + uint32_t comp : 2; + uint32_t opc : 4; + uint32_t jmp_tgt : 1; + uint32_t sync : 1; + uint32_t opc_cat : 3; +} instr_cat0_t; + +typedef struct PACKED { + /* dword0: */ + union PACKED { + /* for normal src register: */ + struct PACKED { + uint32_t src : 11; + uint32_t pad : 21; + }; + /* for address relative: */ + struct PACKED { + int32_t off : 10; + uint32_t must_be_3 : 2; + uint32_t unknown : 20; + }; + /* for immediate: */ + int32_t iim_val; + float fim_val; + }; + + /* dword1: */ + uint32_t dst : 8; + uint32_t repeat : 3; + uint32_t src_r : 1; + uint32_t ss : 1; + uint32_t src_rel : 1; + uint32_t dst_type : 3; + uint32_t dst_rel : 1; + uint32_t src_type : 3; + uint32_t src_c : 1; + uint32_t src_im : 1; + uint32_t even : 1; + uint32_t pos_inf : 1; + uint32_t must_be_0 : 2; + uint32_t jmp_tgt : 1; + uint32_t sync : 1; + uint32_t opc_cat : 3; +} instr_cat1_t; + +typedef struct PACKED { + /* dword0: */ + uint32_t src1 : 11; + uint32_t src1_rel : 1; /* relative address */ + uint32_t src1_c : 1; /* const */ + uint32_t src1_im : 1; /* immediate */ + uint32_t src1_neg : 1; /* negate */ + uint32_t src1_abs : 1; /* absolute value */ + + uint32_t src2 : 11; + uint32_t src2_rel : 1; /* relative address */ + uint32_t src2_c : 1; /* const */ + uint32_t src2_im : 1; /* immediate */ + uint32_t src2_neg : 1; /* negate */ + uint32_t src2_abs : 1; /* absolute value */ + + /* dword1: */ + uint32_t dst : 8; + uint32_t repeat : 3; + uint32_t src1_r : 1; + uint32_t ss : 1; + uint32_t ul : 1; /* dunno */ + uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */ + uint32_t ei : 1; + uint32_t cond : 3; + uint32_t src2_r : 1; + uint32_t full : 1; /* not half */ + uint32_t opc : 6; + uint32_t jmp_tgt : 1; + uint32_t sync : 1; + uint32_t opc_cat : 3; +} instr_cat2_t; + +typedef struct PACKED { + /* dword0: */ + uint32_t src1 : 11; + uint32_t src1_rel : 1; + uint32_t src1_c : 1; + uint32_t src2_c : 1; + uint32_t src1_neg : 1; + uint32_t src2_r : 1; + uint32_t src3 : 11; + uint32_t src3_rel : 1; + uint32_t src3_c : 1; + uint32_t src3_r : 1; + uint32_t src2_neg : 1; + uint32_t src3_neg : 1; + + /* dword1: */ + uint32_t dst : 8; + uint32_t repeat : 3; + uint32_t src1_r : 1; + uint32_t ss : 1; + uint32_t ul : 1; + uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */ + uint32_t src2 : 8; + uint32_t opc : 4; + uint32_t jmp_tgt : 1; + uint32_t sync : 1; + uint32_t opc_cat : 3; +} instr_cat3_t; + +typedef struct PACKED { + /* dword0: */ + uint32_t src : 11; + uint32_t src_rel : 1; + uint32_t src_c : 1; + uint32_t src_im : 1; + uint32_t src_neg : 1; + uint32_t src_abs : 1; + uint32_t dummy1 : 16; /* seem to be ignored */ + + /* dword1: */ + uint32_t dst : 8; + uint32_t repeat : 3; + uint32_t src_r : 1; + uint32_t ss : 1; + uint32_t ul : 1; + uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */ + uint32_t dummy2 : 5; /* seem to be ignored */ + uint32_t full : 1; /* not half */ + uint32_t opc : 6; + uint32_t jmp_tgt : 1; + uint32_t sync : 1; + uint32_t opc_cat : 3; +} instr_cat4_t; + +typedef struct PACKED { + /* dword0: */ + union PACKED { + /* normal case: */ + struct PACKED { + uint32_t full : 1; /* not half */ + uint32_t src1 : 8; + uint32_t src2 : 8; + uint32_t dummy1 : 4; /* seem to be ignored */ + uint32_t samp : 4; + uint32_t tex : 7; + } norm; + /* s2en case: */ + struct PACKED { + uint32_t full : 1; /* not half */ + uint32_t src1 : 8; + uint32_t src2 : 11; + uint32_t dummy1 : 1; + uint32_t src3 : 8; + uint32_t dummy2 : 3; + } s2en; + /* same in either case: */ + // XXX I think, confirm this + struct PACKED { + uint32_t full : 1; /* not half */ + uint32_t src1 : 8; + uint32_t pad : 23; + }; + }; + + /* dword1: */ + uint32_t dst : 8; + uint32_t wrmask : 4; /* write-mask */ + uint32_t type : 3; + uint32_t dummy2 : 1; /* seems to be ignored */ + uint32_t is_3d : 1; + + uint32_t is_a : 1; + uint32_t is_s : 1; + uint32_t is_s2en : 1; + uint32_t is_o : 1; + uint32_t is_p : 1; + + uint32_t opc : 5; + uint32_t jmp_tgt : 1; + uint32_t sync : 1; + uint32_t opc_cat : 3; +} instr_cat5_t; + +/* used for load instructions: */ +typedef struct PACKED { + /* dword0: */ + uint32_t must_be_one1 : 1; + int16_t off : 13; + uint32_t src : 8; + uint32_t dummy1 : 1; + uint32_t must_be_one2 : 1; + int32_t iim_val : 8; + + /* dword1: */ + uint32_t dst : 8; + uint32_t dummy2 : 9; + uint32_t type : 3; + uint32_t dummy3 : 2; + uint32_t opc : 5; + uint32_t jmp_tgt : 1; + uint32_t sync : 1; + uint32_t opc_cat : 3; +} instr_cat6a_t; + +/* used for store instructions: */ +typedef struct PACKED { + /* dword0: */ + uint32_t must_be_zero1 : 1; + uint32_t src : 8; + uint32_t off_hi : 5; /* high bits of 'off'... ugly! */ + uint32_t dummy1 : 9; + uint32_t must_be_one1 : 1; + int32_t iim_val : 8; + + /* dword1: */ + uint16_t off : 8; + uint32_t must_be_one2 : 1; + uint32_t dst : 8; + uint32_t type : 3; + uint32_t dummy2 : 2; + uint32_t opc : 5; + uint32_t jmp_tgt : 1; + uint32_t sync : 1; + uint32_t opc_cat : 3; +} instr_cat6b_t; + +typedef union PACKED { + instr_cat6a_t a; + instr_cat6b_t b; + struct PACKED { + /* dword0: */ + uint32_t pad1 : 24; + int32_t iim_val : 8; + + /* dword1: */ + uint32_t pad2 : 17; + uint32_t type : 3; + uint32_t pad3 : 2; + uint32_t opc : 5; + uint32_t jmp_tgt : 1; + uint32_t sync : 1; + uint32_t opc_cat : 3; + }; +} instr_cat6_t; + +typedef union PACKED { + instr_cat0_t cat0; + instr_cat1_t cat1; + instr_cat2_t cat2; + instr_cat3_t cat3; + instr_cat4_t cat4; + instr_cat5_t cat5; + instr_cat6_t cat6; + struct PACKED { + /* dword0: */ + uint64_t pad1 : 40; + uint32_t repeat : 3; /* cat0-cat4 */ + uint32_t pad2 : 1; + uint32_t ss : 1; /* cat1-cat4 (cat0??) */ + uint32_t ul : 1; /* cat2-cat4 (and cat1 in blob.. which may be bug??) */ + uint32_t pad3 : 13; + uint32_t jmp_tgt : 1; + uint32_t sync : 1; + uint32_t opc_cat : 3; + + }; +} instr_t; + +#endif /* INSTR_A3XX_H_ */ diff --git a/src/gallium/drivers/freedreno/a3xx/ir-a3xx.c b/src/gallium/drivers/freedreno/a3xx/ir-a3xx.c new file mode 100644 index 00000000000..76e8b113ac1 --- /dev/null +++ b/src/gallium/drivers/freedreno/a3xx/ir-a3xx.c @@ -0,0 +1,527 @@ +/* + * Copyright (c) 2013 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ir-a3xx.h" + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <assert.h> +#include <stdbool.h> +#include <errno.h> + +#include "freedreno_util.h" +#include "instr-a3xx.h" + +/* simple allocator to carve allocations out of an up-front allocated heap, + * so that we can free everything easily in one shot. + */ +static void * ir3_alloc(struct ir3_shader *shader, int sz) +{ + void *ptr = &shader->heap[shader->heap_idx]; + shader->heap_idx += align(sz, 4); + return ptr; +} + +struct ir3_shader * ir3_shader_create(void) +{ + return calloc(1, sizeof(struct ir3_shader)); +} + +void ir3_shader_destroy(struct ir3_shader *shader) +{ + free(shader); +} + +#define iassert(cond) do { \ + if (!(cond)) { \ + assert(cond); \ + return -1; \ + } } while (0) + +static uint32_t reg(struct ir3_register *reg, struct ir3_shader_info *info, + uint32_t repeat, uint32_t valid_flags) +{ + reg_t val = { .dummy32 = 0 }; + + assert(!(reg->flags & ~valid_flags)); + + if (!(reg->flags & IR3_REG_R)) + repeat = 0; + + if (reg->flags & IR3_REG_IMMED) { + val.iim_val = reg->iim_val; + } else { + int8_t max = (reg->num + repeat) >> 2; + + val.comp = reg->num & 0x3; + val.num = reg->num >> 2; + + if (reg->flags & IR3_REG_CONST) { + info->max_const = MAX2(info->max_const, max); + } else if ((max != REG_A0) && (max != REG_P0)) { + if (reg->flags & IR3_REG_HALF) { + info->max_half_reg = MAX2(info->max_half_reg, max); + } else { + info->max_reg = MAX2(info->max_reg, max); + } + } + } + + return val.dummy32; +} + +static int emit_cat0(struct ir3_instruction *instr, void *ptr, + struct ir3_shader_info *info) +{ + instr_cat0_t *cat0 = ptr; + + cat0->immed = instr->cat0.immed; + cat0->repeat = instr->repeat; + cat0->ss = !!(instr->flags & IR3_INSTR_SS); + cat0->inv = instr->cat0.inv; + cat0->comp = instr->cat0.comp; + cat0->opc = instr->opc; + cat0->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); + cat0->sync = !!(instr->flags & IR3_INSTR_SY); + cat0->opc_cat = 0; + + return 0; +} + +static uint32_t type_flags(type_t type) +{ + return (type_size(type) == 32) ? 0 : IR3_REG_HALF; +} + +static int emit_cat1(struct ir3_instruction *instr, void *ptr, + struct ir3_shader_info *info) +{ + struct ir3_register *dst = instr->regs[0]; + struct ir3_register *src = instr->regs[1]; + instr_cat1_t *cat1 = ptr; + + iassert(instr->regs_count == 2); + iassert(!((dst->flags ^ type_flags(instr->cat1.dst_type)) & IR3_REG_HALF)); + iassert((src->flags & IR3_REG_IMMED) || + !((src->flags ^ type_flags(instr->cat1.src_type)) & IR3_REG_HALF)); + + if (src->flags & IR3_REG_IMMED) { + cat1->iim_val = src->iim_val; + cat1->src_im = 1; + } else if (src->flags & IR3_REG_RELATIV) { + cat1->off = src->offset; + cat1->src_rel = 1; + cat1->must_be_3 = 3; + } else { + cat1->src = reg(src, info, instr->repeat, + IR3_REG_IMMED | IR3_REG_RELATIV | + IR3_REG_R | IR3_REG_CONST | IR3_REG_HALF); + } + + cat1->dst = reg(dst, info, instr->repeat, + IR3_REG_RELATIV | IR3_REG_EVEN | + IR3_REG_R | IR3_REG_POS_INF | IR3_REG_HALF); + cat1->repeat = instr->repeat; + cat1->src_r = !!(src->flags & IR3_REG_R); + cat1->ss = !!(instr->flags & IR3_INSTR_SS); + cat1->dst_type = instr->cat1.dst_type; + cat1->dst_rel = !!(dst->flags & IR3_REG_RELATIV); + cat1->src_type = instr->cat1.src_type; + cat1->src_c = !!(src->flags & IR3_REG_CONST); + cat1->even = !!(dst->flags & IR3_REG_EVEN); + cat1->pos_inf = !!(dst->flags & IR3_REG_POS_INF); + cat1->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); + cat1->sync = !!(instr->flags & IR3_INSTR_SY); + cat1->opc_cat = 1; + + return 0; +} + +static int emit_cat2(struct ir3_instruction *instr, void *ptr, + struct ir3_shader_info *info) +{ + struct ir3_register *dst = instr->regs[0]; + struct ir3_register *src1 = instr->regs[1]; + struct ir3_register *src2 = instr->regs[2]; + instr_cat2_t *cat2 = ptr; + + iassert((instr->regs_count == 2) || (instr->regs_count == 3)); + + cat2->src1 = reg(src1, info, instr->repeat, + IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_IMMED | + IR3_REG_NEGATE | IR3_REG_ABS | IR3_REG_R | IR3_REG_HALF); + cat2->src1_rel = !!(src1->flags & IR3_REG_RELATIV); + cat2->src1_c = !!(src1->flags & IR3_REG_CONST); + cat2->src1_im = !!(src1->flags & IR3_REG_IMMED); + cat2->src1_neg = !!(src1->flags & IR3_REG_NEGATE); + cat2->src1_abs = !!(src1->flags & IR3_REG_ABS); + cat2->src1_r = !!(src1->flags & IR3_REG_R); + + if (src2) { + iassert((src2->flags & IR3_REG_IMMED) || + !((src1->flags ^ src2->flags) & IR3_REG_HALF)); + cat2->src2 = reg(src2, info, instr->repeat, + IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_IMMED | + IR3_REG_NEGATE | IR3_REG_ABS | IR3_REG_R | IR3_REG_HALF); + cat2->src2_rel = !!(src2->flags & IR3_REG_RELATIV); + cat2->src2_c = !!(src2->flags & IR3_REG_CONST); + cat2->src2_im = !!(src2->flags & IR3_REG_IMMED); + cat2->src2_neg = !!(src2->flags & IR3_REG_NEGATE); + cat2->src2_abs = !!(src2->flags & IR3_REG_ABS); + cat2->src2_r = !!(src2->flags & IR3_REG_R); + } + + cat2->dst = reg(dst, info, instr->repeat, + IR3_REG_R | IR3_REG_EI | IR3_REG_HALF); + cat2->repeat = instr->repeat; + cat2->ss = !!(instr->flags & IR3_INSTR_SS); + cat2->ul = !!(instr->flags & IR3_INSTR_UL); + cat2->dst_half = !!((src1->flags ^ dst->flags) & IR3_REG_HALF); + cat2->ei = !!(dst->flags & IR3_REG_EI); + cat2->cond = instr->cat2.condition; + cat2->full = ! (src1->flags & IR3_REG_HALF); + cat2->opc = instr->opc; + cat2->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); + cat2->sync = !!(instr->flags & IR3_INSTR_SY); + cat2->opc_cat = 2; + + return 0; +} + +static int emit_cat3(struct ir3_instruction *instr, void *ptr, + struct ir3_shader_info *info) +{ + struct ir3_register *dst = instr->regs[0]; + struct ir3_register *src1 = instr->regs[1]; + struct ir3_register *src2 = instr->regs[2]; + struct ir3_register *src3 = instr->regs[3]; + instr_cat3_t *cat3 = ptr; + uint32_t src_flags = 0; + + switch (instr->opc) { + case OPC_MAD_F16: + case OPC_MAD_U16: + case OPC_MAD_S16: + case OPC_SEL_B16: + case OPC_SEL_S16: + case OPC_SEL_F16: + case OPC_SAD_S16: + case OPC_SAD_S32: // really?? + src_flags |= IR3_REG_HALF; + break; + default: + break; + } + + iassert(instr->regs_count == 4); + iassert(!((src1->flags ^ src_flags) & IR3_REG_HALF)); + iassert(!((src2->flags ^ src_flags) & IR3_REG_HALF)); + iassert(!((src3->flags ^ src_flags) & IR3_REG_HALF)); + + cat3->src1 = reg(src1, info, instr->repeat, + IR3_REG_RELATIV | IR3_REG_CONST | + IR3_REG_NEGATE | IR3_REG_R | IR3_REG_HALF); + cat3->src1_rel = !!(src1->flags & IR3_REG_RELATIV); + cat3->src1_c = !!(src1->flags & IR3_REG_CONST); + cat3->src1_neg = !!(src1->flags & IR3_REG_NEGATE); + cat3->src1_r = !!(src1->flags & IR3_REG_R); + + cat3->src2 = reg(src2, info, instr->repeat, + IR3_REG_CONST | IR3_REG_NEGATE | + IR3_REG_R | IR3_REG_HALF); + cat3->src2_c = !!(src2->flags & IR3_REG_CONST); + cat3->src2_neg = !!(src2->flags & IR3_REG_NEGATE); + cat3->src2_r = !!(src2->flags & IR3_REG_R); + + cat3->src3 = reg(src3, info, instr->repeat, + IR3_REG_RELATIV | IR3_REG_CONST | + IR3_REG_NEGATE | IR3_REG_R | IR3_REG_HALF); + cat3->src3_rel = !!(src3->flags & IR3_REG_RELATIV); + cat3->src3_c = !!(src3->flags & IR3_REG_CONST); + cat3->src3_neg = !!(src3->flags & IR3_REG_NEGATE); + cat3->src3_r = !!(src3->flags & IR3_REG_R); + + cat3->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); + cat3->repeat = instr->repeat; + cat3->ss = !!(instr->flags & IR3_INSTR_SS); + cat3->ul = !!(instr->flags & IR3_INSTR_UL); + cat3->dst_half = !!((src_flags ^ dst->flags) & IR3_REG_HALF); + cat3->opc = instr->opc; + cat3->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); + cat3->sync = !!(instr->flags & IR3_INSTR_SY); + cat3->opc_cat = 3; + + return 0; +} + +static int emit_cat4(struct ir3_instruction *instr, void *ptr, + struct ir3_shader_info *info) +{ + struct ir3_register *dst = instr->regs[0]; + struct ir3_register *src = instr->regs[1]; + instr_cat4_t *cat4 = ptr; + + iassert(instr->regs_count == 2); + + cat4->src = reg(src, info, instr->repeat, + IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_IMMED | + IR3_REG_NEGATE | IR3_REG_ABS | IR3_REG_R | + IR3_REG_HALF); + cat4->src_rel = !!(src->flags & IR3_REG_RELATIV); + cat4->src_c = !!(src->flags & IR3_REG_CONST); + cat4->src_im = !!(src->flags & IR3_REG_IMMED); + cat4->src_neg = !!(src->flags & IR3_REG_NEGATE); + cat4->src_abs = !!(src->flags & IR3_REG_ABS); + cat4->src_r = !!(src->flags & IR3_REG_R); + + cat4->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); + cat4->repeat = instr->repeat; + cat4->ss = !!(instr->flags & IR3_INSTR_SS); + cat4->ul = !!(instr->flags & IR3_INSTR_UL); + cat4->dst_half = !!((src->flags ^ dst->flags) & IR3_REG_HALF); + cat4->full = ! (src->flags & IR3_REG_HALF); + cat4->opc = instr->opc; + cat4->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); + cat4->sync = !!(instr->flags & IR3_INSTR_SY); + cat4->opc_cat = 4; + + return 0; +} + +static int emit_cat5(struct ir3_instruction *instr, void *ptr, + struct ir3_shader_info *info) +{ + struct ir3_register *dst = instr->regs[0]; + struct ir3_register *src1 = instr->regs[1]; + struct ir3_register *src2 = instr->regs[2]; + struct ir3_register *src3 = instr->regs[3]; + instr_cat5_t *cat5 = ptr; + + iassert(!((dst->flags ^ type_flags(instr->cat5.type)) & IR3_REG_HALF)); + + if (src1) { + cat5->full = ! (src1->flags & IR3_REG_HALF); + cat5->src1 = reg(src1, info, instr->repeat, IR3_REG_HALF); + } + + + if (instr->flags & IR3_INSTR_S2EN) { + if (src2) { + iassert(!((src1->flags ^ src2->flags) & IR3_REG_HALF)); + cat5->s2en.src2 = reg(src2, info, instr->repeat, IR3_REG_HALF); + } + if (src3) { + iassert(src3->flags & IR3_REG_HALF); + cat5->s2en.src3 = reg(src3, info, instr->repeat, IR3_REG_HALF); + } + iassert(!(instr->cat5.samp | instr->cat5.tex)); + } else { + iassert(!src3); + if (src2) { + iassert(!((src1->flags ^ src2->flags) & IR3_REG_HALF)); + cat5->norm.src2 = reg(src2, info, instr->repeat, IR3_REG_HALF); + } + cat5->norm.samp = instr->cat5.samp; + cat5->norm.tex = instr->cat5.tex; + } + + cat5->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); + cat5->wrmask = dst->wrmask; + cat5->type = instr->cat5.type; + cat5->is_3d = !!(instr->flags & IR3_INSTR_3D); + cat5->is_a = !!(instr->flags & IR3_INSTR_A); + cat5->is_s = !!(instr->flags & IR3_INSTR_S); + cat5->is_s2en = !!(instr->flags & IR3_INSTR_S2EN); + cat5->is_o = !!(instr->flags & IR3_INSTR_O); + cat5->is_p = !!(instr->flags & IR3_INSTR_P); + cat5->opc = instr->opc; + cat5->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); + cat5->sync = !!(instr->flags & IR3_INSTR_SY); + cat5->opc_cat = 5; + + return 0; +} + +static int emit_cat6(struct ir3_instruction *instr, void *ptr, + struct ir3_shader_info *info) +{ + struct ir3_register *dst = instr->regs[0]; + struct ir3_register *src = instr->regs[1]; + instr_cat6_t *cat6 = ptr; + + iassert(instr->regs_count == 2); + + switch (instr->opc) { + /* load instructions: */ + case OPC_LDG: + case OPC_LDP: + case OPC_LDL: + case OPC_LDLW: + case OPC_LDLV: + case OPC_PREFETCH: { + instr_cat6a_t *cat6a = ptr; + + iassert(!((dst->flags ^ type_flags(instr->cat6.type)) & IR3_REG_HALF)); + + cat6a->must_be_one1 = 1; + cat6a->must_be_one2 = 1; + cat6a->off = instr->cat6.offset; + cat6a->src = reg(src, info, instr->repeat, 0); + cat6a->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); + break; + } + /* store instructions: */ + case OPC_STG: + case OPC_STP: + case OPC_STL: + case OPC_STLW: + case OPC_STI: { + instr_cat6b_t *cat6b = ptr; + uint32_t src_flags = type_flags(instr->cat6.type); + uint32_t dst_flags = (instr->opc == OPC_STI) ? IR3_REG_HALF : 0; + + iassert(!((src->flags ^ src_flags) & IR3_REG_HALF)); + + cat6b->must_be_one1 = 1; + cat6b->must_be_one2 = 1; + cat6b->src = reg(src, info, instr->repeat, src_flags); + cat6b->off_hi = instr->cat6.offset >> 8; + cat6b->off = instr->cat6.offset; + cat6b->dst = reg(dst, info, instr->repeat, IR3_REG_R | dst_flags); + + break; + } + default: + // TODO + break; + } + + cat6->iim_val = instr->cat6.iim_val; + cat6->type = instr->cat6.type; + cat6->opc = instr->opc; + cat6->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); + cat6->sync = !!(instr->flags & IR3_INSTR_SY); + cat6->opc_cat = 6; + + return 0; +} + +static int (*emit[])(struct ir3_instruction *instr, void *ptr, + struct ir3_shader_info *info) = { + emit_cat0, emit_cat1, emit_cat2, emit_cat3, emit_cat4, emit_cat5, emit_cat6, +}; + +void * ir3_shader_assemble(struct ir3_shader *shader, struct ir3_shader_info *info) +{ + uint32_t *ptr, *dwords; + uint32_t i; + + info->max_reg = -1; + info->max_half_reg = -1; + info->max_const = -1; + + /* need a integer number of instruction "groups" (sets of four + * instructions), so pad out w/ NOPs if needed: + */ + while (shader->instrs_count != align(shader->instrs_count, 4)) + ir3_instr_create(shader, 0, OPC_NOP); + + /* each instruction is 64bits: */ + info->sizedwords = 2 * shader->instrs_count; + + ptr = dwords = calloc(1, 4 * info->sizedwords); + + for (i = 0; i < shader->instrs_count; i++) { + struct ir3_instruction *instr = shader->instrs[i]; + int ret = emit[instr->category](instr, dwords, info); + if (ret) + goto fail; + dwords += 2; + } + + return ptr; + +fail: + free(ptr); + return NULL; +} + +static struct ir3_register * reg_create(struct ir3_shader *shader, + int num, int flags) +{ + struct ir3_register *reg = + ir3_alloc(shader, sizeof(struct ir3_register)); + reg->flags = flags; + reg->num = num; + return reg; +} + +static void insert_instr(struct ir3_shader *shader, + struct ir3_instruction *instr) +{ + assert(shader->instrs_count < ARRAY_SIZE(shader->instrs)); + shader->instrs[shader->instrs_count++] = instr; +} + +struct ir3_instruction * ir3_instr_create(struct ir3_shader *shader, + int category, opc_t opc) +{ + struct ir3_instruction *instr = + ir3_alloc(shader, sizeof(struct ir3_instruction)); + instr->shader = shader; + instr->category = category; + instr->opc = opc; + insert_instr(shader, instr); + return instr; +} + +struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr) +{ + struct ir3_instruction *new_instr = + ir3_alloc(instr->shader, sizeof(struct ir3_instruction)); + unsigned i; + + *new_instr = *instr; + insert_instr(instr->shader, new_instr); + + /* clone registers: */ + new_instr->regs_count = 0; + for (i = 0; i < instr->regs_count; i++) { + struct ir3_register *reg = instr->regs[i]; + struct ir3_register *new_reg = + ir3_reg_create(new_instr, reg->num, reg->flags); + *new_reg = *reg; + } + + return new_instr; +} + +struct ir3_register * ir3_reg_create(struct ir3_instruction *instr, + int num, int flags) +{ + struct ir3_register *reg = reg_create(instr->shader, num, flags); + assert(instr->regs_count < ARRAY_SIZE(instr->regs)); + instr->regs[instr->regs_count++] = reg; + return reg; +} diff --git a/src/gallium/drivers/freedreno/a3xx/ir-a3xx.h b/src/gallium/drivers/freedreno/a3xx/ir-a3xx.h new file mode 100644 index 00000000000..2fedc7bee38 --- /dev/null +++ b/src/gallium/drivers/freedreno/a3xx/ir-a3xx.h @@ -0,0 +1,190 @@ +/* + * Copyright (c) 2013 Rob Clark <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef IR3_H_ +#define IR3_H_ + +#include <stdint.h> +#include <stdbool.h> + +#include "instr-a3xx.h" + +/* low level intermediate representation of an adreno shader program */ + +struct ir3_shader; + +struct ir3_shader * fd_asm_parse(const char *src); + +struct ir3_shader_info { + uint16_t sizedwords; + /* NOTE: max_reg, etc, does not include registers not touched + * by the shader (ie. vertex fetched via VFD_DECODE but not + * touched by shader) + */ + int8_t max_reg; /* highest GPR # used by shader */ + int8_t max_half_reg; + int8_t max_const; +}; + +struct ir3_register { + enum { + IR3_REG_CONST = 0x001, + IR3_REG_IMMED = 0x002, + IR3_REG_HALF = 0x004, + IR3_REG_RELATIV= 0x008, + IR3_REG_R = 0x010, + IR3_REG_NEGATE = 0x020, + IR3_REG_ABS = 0x040, + IR3_REG_EVEN = 0x080, + IR3_REG_POS_INF= 0x100, + /* (ei) flag, end-input? Set on last bary, presumably to signal + * that the shader needs no more input: + */ + IR3_REG_EI = 0x200, + } flags; + union { + /* normal registers: */ + struct { + /* the component is in the low two bits of the reg #, so + * rN.x becomes: (n << 2) | x + */ + int num; + int wrmask; + }; + /* immediate: */ + int iim_val; + float fim_val; + /* relative: */ + int offset; + }; +}; + +struct ir3_instruction { + struct ir3_shader *shader; + int category; + opc_t opc; + enum { + /* (sy) flag is set on first instruction, and after sample + * instructions (probably just on RAW hazard). + */ + IR3_INSTR_SY = 0x001, + /* (ss) flag is set on first instruction, and first instruction + * to depend on the result of "long" instructions (RAW hazard): + * + * rcp, rsq, log2, exp2, sin, cos, sqrt + * + * It seems to synchronize until all in-flight instructions are + * completed, for example: + * + * rsq hr1.w, hr1.w + * add.f hr2.z, (neg)hr2.z, hc0.y + * mul.f hr2.w, (neg)hr2.y, (neg)hr2.y + * rsq hr2.x, hr2.x + * (rpt1)nop + * mad.f16 hr2.w, hr2.z, hr2.z, hr2.w + * nop + * mad.f16 hr2.w, (neg)hr0.w, (neg)hr0.w, hr2.w + * (ss)(rpt2)mul.f hr1.x, (r)hr1.x, hr1.w + * (rpt2)mul.f hr0.x, (neg)(r)hr0.x, hr2.x + * + * The last mul.f does not have (ss) set, presumably because the + * (ss) on the previous instruction does the job. + * + * The blob driver also seems to set it on WAR hazards, although + * not really clear if this is needed or just blob compiler being + * sloppy. So far I haven't found a case where removing the (ss) + * causes problems for WAR hazard, but I could just be getting + * lucky: + * + * rcp r1.y, r3.y + * (ss)(rpt2)mad.f32 r3.y, (r)c9.x, r1.x, (r)r3.z + * + */ + IR3_INSTR_SS = 0x002, + /* (jp) flag is set on jump targets: + */ + IR3_INSTR_JP = 0x004, + IR3_INSTR_UL = 0x008, + IR3_INSTR_3D = 0x010, + IR3_INSTR_A = 0x020, + IR3_INSTR_O = 0x040, + IR3_INSTR_P = 0x080, + IR3_INSTR_S = 0x100, + IR3_INSTR_S2EN = 0x200, + } flags; + int repeat; + unsigned regs_count; + struct ir3_register *regs[4]; + union { + struct { + char inv; + char comp; + int immed; + } cat0; + struct { + type_t src_type, dst_type; + } cat1; + struct { + enum { + IR3_COND_LT = 0, + IR3_COND_LE = 1, + IR3_COND_GT = 2, + IR3_COND_GE = 3, + IR3_COND_EQ = 4, + IR3_COND_NE = 5, + } condition; + } cat2; + struct { + unsigned samp, tex; + type_t type; + } cat5; + struct { + type_t type; + int offset; + int iim_val; + } cat6; + }; +}; + +/* this is just large to cope w/ the large test *.asm: */ +#define MAX_INSTRS 10240 + +struct ir3_shader { + unsigned instrs_count; + struct ir3_instruction *instrs[MAX_INSTRS]; + uint32_t heap[128 * MAX_INSTRS]; + unsigned heap_idx; +}; + +struct ir3_shader * ir3_shader_create(void); +void ir3_shader_destroy(struct ir3_shader *shader); +void * ir3_shader_assemble(struct ir3_shader *shader, + struct ir3_shader_info *info); + +struct ir3_instruction * ir3_instr_create(struct ir3_shader *shader, int category, opc_t opc); +struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr); + +struct ir3_register * ir3_reg_create(struct ir3_instruction *instr, + int num, int flags); + +#endif /* IR3_H_ */ diff --git a/src/gallium/drivers/freedreno/adreno_common.xml.h b/src/gallium/drivers/freedreno/adreno_common.xml.h index 42057da0737..b1198125e97 100644 --- a/src/gallium/drivers/freedreno/adreno_common.xml.h +++ b/src/gallium/drivers/freedreno/adreno_common.xml.h @@ -8,7 +8,7 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng git clone git://0x04.net/rules-ng-ng The rules-ng-ng source files this header was generated from are: -- /home/robclark/src/freedreno/envytools/rnndb/a3xx.xml ( 38794 bytes, from 2013-05-05 22:47:28) +- /home/robclark/src/freedreno/envytools/rnndb/a3xx.xml ( 42578 bytes, from 2013-06-02 13:10:46) - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2013-03-31 16:51:27) - /home/robclark/src/freedreno/envytools/rnndb/adreno_common.xml ( 3094 bytes, from 2013-05-05 18:29:22) - /home/robclark/src/freedreno/envytools/rnndb/adreno_pm4.xml ( 9712 bytes, from 2013-05-26 15:22:37) diff --git a/src/gallium/drivers/freedreno/adreno_pm4.xml.h b/src/gallium/drivers/freedreno/adreno_pm4.xml.h index 853206d3757..d3a7baca0e9 100644 --- a/src/gallium/drivers/freedreno/adreno_pm4.xml.h +++ b/src/gallium/drivers/freedreno/adreno_pm4.xml.h @@ -8,7 +8,7 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng git clone git://0x04.net/rules-ng-ng The rules-ng-ng source files this header was generated from are: -- /home/robclark/src/freedreno/envytools/rnndb/a3xx.xml ( 38794 bytes, from 2013-05-05 22:47:28) +- /home/robclark/src/freedreno/envytools/rnndb/a3xx.xml ( 42578 bytes, from 2013-06-02 13:10:46) - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2013-03-31 16:51:27) - /home/robclark/src/freedreno/envytools/rnndb/adreno_common.xml ( 3094 bytes, from 2013-05-05 18:29:22) - /home/robclark/src/freedreno/envytools/rnndb/adreno_pm4.xml ( 9712 bytes, from 2013-05-26 15:22:37) diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index 9c47a58a81f..f88fa08aa7f 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -50,6 +50,7 @@ #include "freedreno_util.h" #include "fd2_screen.h" +#include "fd3_screen.h" /* XXX this should go away */ #include "state_tracker/drm_driver.h" @@ -412,6 +413,9 @@ fd_screen_create(struct fd_device *dev) case 220: fd2_screen_init(pscreen); break; + case 320: + fd3_screen_init(pscreen); + break; default: debug_printf("unsupported GPU: a%03d\n", screen->gpu_id); goto fail; diff --git a/src/gallium/drivers/freedreno/freedreno_util.c b/src/gallium/drivers/freedreno/freedreno_util.c index 83a33db8f5b..0462e5fb515 100644 --- a/src/gallium/drivers/freedreno/freedreno_util.c +++ b/src/gallium/drivers/freedreno/freedreno_util.c @@ -39,6 +39,8 @@ fd_pipe2depth(enum pipe_format format) return DEPTHX_16; case PIPE_FORMAT_Z24X8_UNORM: case PIPE_FORMAT_Z24_UNORM_S8_UINT: + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8_UINT_Z24_UNORM: return DEPTHX_24_8; default: return ~0; |