diff options
author | Jerome Glisse <[email protected]> | 2010-09-17 10:41:50 -0400 |
---|---|---|
committer | Jerome Glisse <[email protected]> | 2010-09-17 10:49:05 -0400 |
commit | fd266ec62ca772a8551d2d7922d718d9d84bdf07 (patch) | |
tree | 1f664adfe2d523149885a4c19b0a240841053454 /src/gallium/winsys/r600 | |
parent | d80bed15660e784a405c467be1ded8266d8ffc48 (diff) |
r600g: alternative command stream building from context
Winsys context build a list of register block a register block is
a set of consecutive register that will be emited together in the
same pm4 packet (the various r600_block* are there to provide basic
grouping that try to take advantage of states that are linked together)
Some consecutive register are emited each in a different block,
for instance the various cb[0-7]_base. At winsys context creation,
the list of block is created & an index into the list of block. So
to find into which block a register is in you simply use the register
offset and lookup the block index. Block are grouped together into
group which are the various pkt3 group of config, context, resource,
Pipe state build a list of register each state want to modify,
beside register value it also give a register mask so only subpart
of a register can be updated by a given pipe state (the oring is
in the winsys) There is no prebuild register list or define for
each pipe state. Once pipe state are built they are bound to
the winsys context.
Each of this functions will go through the list of register and
will find into which block each reg falls and will update the
value of the block with proper masking (vs/ps resource/constant
are specialized variant with somewhat limited capabilities).
Each block modified by r600_context_pipe_state_set* is marked as
dirty and we update a count of dwords needed to emit all dirty
state so far.
r600_context_pipe_state_set* should be call only when pipe context
change some of the state (thus when pipe bind state or set state)
Then to draw primitive you make a call to r600_context_draw
void r600_context_draw(struct r600_context *ctx, struct r600_draw *draw)
It will check if there is enough dwords in current cs buffer and
if not will flush. Once there is enough room it will copy packet
from dirty block and then add the draw packet3 to initiate the draw.
The flush will send the current cs, reset the count of dwords to
0 and remark all states that are enabled as dirty and recompute
the number of dwords needed to send the current context.
Signed-off-by: Jerome Glisse <[email protected]>
Diffstat (limited to 'src/gallium/winsys/r600')
-rw-r--r-- | src/gallium/winsys/r600/drm/Makefile | 2 | ||||
-rw-r--r-- | src/gallium/winsys/r600/drm/r600.c | 129 | ||||
-rw-r--r-- | src/gallium/winsys/r600/drm/r600_priv.h | 56 | ||||
-rw-r--r-- | src/gallium/winsys/r600/drm/r600_state2.c | 1055 | ||||
-rw-r--r-- | src/gallium/winsys/r600/drm/r600d.h | 77 |
5 files changed, 1310 insertions, 9 deletions
diff --git a/src/gallium/winsys/r600/drm/Makefile b/src/gallium/winsys/r600/drm/Makefile index c81a075f1e8..9d8dc8dc594 100644 --- a/src/gallium/winsys/r600/drm/Makefile +++ b/src/gallium/winsys/r600/drm/Makefile @@ -7,6 +7,8 @@ LIBNAME = r600winsys C_SOURCES = \ bof.c \ r600_state.c \ + r600_state2.c \ + r600.c \ radeon_ctx.c \ radeon_draw.c \ radeon_state.c \ diff --git a/src/gallium/winsys/r600/drm/r600.c b/src/gallium/winsys/r600/drm/r600.c new file mode 100644 index 00000000000..af9b9187ab1 --- /dev/null +++ b/src/gallium/winsys/r600/drm/r600.c @@ -0,0 +1,129 @@ +/* + * Copyright 2010 Jerome Glisse <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Jerome Glisse + */ +#include "xf86drm.h" +#include "radeon_drm.h" +#include "r600_priv.h" + +enum radeon_family r600_get_family(struct radeon *r600) +{ + return r600->family; +} + +static int r600_get_device(struct radeon *r600) +{ + struct drm_radeon_info info; + + r600->device = 0; + info.request = RADEON_INFO_DEVICE_ID; + info.value = (uintptr_t)&r600->device; + return drmCommandWriteRead(r600->fd, DRM_RADEON_INFO, &info, sizeof(struct drm_radeon_info)); +} + +struct radeon *r600_new(int fd, unsigned device) +{ + struct radeon *r600; + int r; + + r600 = calloc(1, sizeof(*r600)); + if (r600 == NULL) { + return NULL; + } + r600->fd = fd; + r600->device = device; + if (fd >= 0) { + r = r600_get_device(r600); + if (r) { + R600_ERR("Failed to get device id\n"); + r600_delete(r600); + return NULL; + } + } + r600->family = radeon_family_from_device(r600->device); + if (r600->family == CHIP_UNKNOWN) { + R600_ERR("Unknown chipset 0x%04X\n", r600->device); + r600_delete(r600); + return NULL; + } + switch (r600->family) { + case CHIP_R600: + case CHIP_RV610: + case CHIP_RV630: + case CHIP_RV670: + case CHIP_RV620: + case CHIP_RV635: + case CHIP_RS780: + case CHIP_RS880: + case CHIP_RV770: + case CHIP_RV730: + case CHIP_RV710: + case CHIP_RV740: + break; + case CHIP_R100: + case CHIP_RV100: + case CHIP_RS100: + case CHIP_RV200: + case CHIP_RS200: + case CHIP_R200: + case CHIP_RV250: + case CHIP_RS300: + case CHIP_RV280: + case CHIP_R300: + case CHIP_R350: + case CHIP_RV350: + case CHIP_RV380: + case CHIP_R420: + case CHIP_R423: + case CHIP_RV410: + case CHIP_RS400: + case CHIP_RS480: + case CHIP_RS600: + case CHIP_RS690: + case CHIP_RS740: + case CHIP_RV515: + case CHIP_R520: + case CHIP_RV530: + case CHIP_RV560: + case CHIP_RV570: + case CHIP_R580: + case CHIP_CEDAR: + case CHIP_REDWOOD: + case CHIP_JUNIPER: + case CHIP_CYPRESS: + case CHIP_HEMLOCK: + default: + R600_ERR("unknown or unsupported chipset 0x%04X\n", r600->device); + break; + } + return r600; +} + +void r600_delete(struct radeon *r600) +{ + if (r600 == NULL) + return; + drmClose(r600->fd); + free(r600); +} diff --git a/src/gallium/winsys/r600/drm/r600_priv.h b/src/gallium/winsys/r600/drm/r600_priv.h new file mode 100644 index 00000000000..7a9025ad3c2 --- /dev/null +++ b/src/gallium/winsys/r600/drm/r600_priv.h @@ -0,0 +1,56 @@ +/* + * Copyright 2010 Jerome Glisse <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Jerome Glisse + */ +#ifndef R600_PRIV_H +#define R600_PRIV_H + +#include <errno.h> +#include <stdint.h> +#include <stdlib.h> +#include <assert.h> +#include "r600.h" + + +struct radeon { + int fd; + int refcount; + unsigned device; + unsigned family; +}; + +struct radeon *r600_new(int fd, unsigned device); +void r600_delete(struct radeon *r600); + +struct r600_reg { + unsigned need_bo; + unsigned flush_flags; + unsigned offset; +}; + + +/* radeon_pciid.c */ +unsigned radeon_family_from_device(unsigned device); + +#endif diff --git a/src/gallium/winsys/r600/drm/r600_state2.c b/src/gallium/winsys/r600/drm/r600_state2.c new file mode 100644 index 00000000000..f6fba0a8dc5 --- /dev/null +++ b/src/gallium/winsys/r600/drm/r600_state2.c @@ -0,0 +1,1055 @@ +/* + * Copyright 2010 Jerome Glisse <[email protected]> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Jerome Glisse + */ +#include <errno.h> +#include <stdint.h> +#include <string.h> +#include <stdlib.h> +#include <assert.h> +#include "r600.h" +#include "r600d.h" +#include "r600_priv.h" +#include "radeon_drm.h" +#include "bof.h" +#include "pipe/p_compiler.h" +#include "util/u_inlines.h" +#include <pipebuffer/pb_bufmgr.h> + +struct radeon_ws_bo { + struct pipe_reference reference; + struct pb_buffer *pb; +}; + +struct radeon_bo { + struct pipe_reference reference; + unsigned handle; + unsigned size; + unsigned alignment; + unsigned map_count; + void *data; +}; +struct radeon_bo *radeon_bo_pb_get_bo(struct pb_buffer *_buf); +int radeon_bo_map(struct radeon *radeon, struct radeon_bo *bo); +void radeon_bo_unmap(struct radeon *radeon, struct radeon_bo *bo); + +unsigned radeon_ws_bo_get_handle(struct radeon_ws_bo *pb_bo); + +static int r600_group_id_register_offset(unsigned offset) +{ + if (offset >= R600_CONFIG_REG_OFFSET && offset < R600_CONFIG_REG_END) { + return R600_GROUP_CONFIG; + } + if (offset >= R600_CONTEXT_REG_OFFSET && offset < R600_CONTEXT_REG_END) { + return R600_GROUP_CONTEXT; + } + if (offset >= R600_ALU_CONST_OFFSET && offset < R600_ALU_CONST_END) { + return R600_GROUP_ALU_CONST; + } + if (offset >= R600_RESOURCE_OFFSET && offset < R600_RESOURCE_END) { + return R600_GROUP_RESOURCE; + } + if (offset >= R600_SAMPLER_OFFSET && offset < R600_SAMPLER_END) { + return R600_GROUP_SAMPLER; + } + if (offset >= R600_CTL_CONST_OFFSET && offset < R600_CTL_CONST_END) { + return R600_GROUP_CTL_CONST; + } + if (offset >= R600_LOOP_CONST_OFFSET && offset < R600_LOOP_CONST_END) { + return R600_GROUP_LOOP_CONST; + } + if (offset >= R600_BOOL_CONST_OFFSET && offset < R600_BOOL_CONST_END) { + return R600_GROUP_BOOL_CONST; + } + return -1; +} + +static int r600_context_add_block(struct r600_context *ctx, const struct r600_reg *reg, unsigned nreg) +{ + struct r600_group_block *block, *tmp; + struct r600_group *group; + int group_id, id; + + for (unsigned i = 0, n = 0; i < nreg; i += n) { + u32 j, r; + /* find number of consecutive registers */ + for (j = i + 1, r = reg[i].offset + 4, n = 1; j < (nreg - i); j++, n++, r+=4) { + if (r != reg[j].offset) { + break; + } + } + + /* find into which group this block is */ + group_id = r600_group_id_register_offset(reg[i].offset); + assert(group_id >= 0); + group = &ctx->groups[group_id]; + + /* allocate new block */ + tmp = realloc(group->blocks, (group->nblocks + 1) * sizeof(struct r600_group_block)); + if (tmp == NULL) { + return -ENOMEM; + } + group->blocks = tmp; + block = &group->blocks[group->nblocks++]; + for (int j = 0; j < n; j++) { + group->offset_block_id[((reg[i].offset - group->start_offset) >> 2) + j] = group->nblocks - 1; + } + + /* initialize block */ + memset(block, 0, sizeof(struct r600_group_block)); + block->start_offset = reg[i].offset; + block->pm4_ndwords = n; + block->nreg = n; + for (j = 0; j < n; j++) { + if (reg[i+j].need_bo) { + block->nbo++; + assert(block->nbo < R600_BLOCK_MAX_BO); + block->pm4_bo_index[j] = block->nbo; + block->pm4[block->pm4_ndwords++] = PKT3(PKT3_NOP, 0); + block->pm4[block->pm4_ndwords++] = 0x00000000; + block->reloc[block->nbo].bo_pm4_index[block->reloc[block->nbo].nreloc++] = block->pm4_ndwords - 1; + } + } + for (j = 0; j < n; j++) { + if (reg[i+j].flush_flags) { + block->pm4[block->pm4_ndwords++] = PKT3(PKT3_SURFACE_SYNC, 3); + block->pm4[block->pm4_ndwords++] = reg[i+j].flush_flags; + block->pm4[block->pm4_ndwords++] = 0xFFFFFFFF; + block->pm4[block->pm4_ndwords++] = 0x00000000; + block->pm4[block->pm4_ndwords++] = 0x0000000A; + block->pm4[block->pm4_ndwords++] = PKT3(PKT3_NOP, 0); + block->pm4[block->pm4_ndwords++] = 0x00000000; + id = block->pm4_bo_index[j]; + block->reloc[id].bo_pm4_index[block->reloc[id].nreloc++] = block->pm4_ndwords - 1; + } + } + /* check that we stay in limit */ + assert(block->pm4_ndwords < R600_BLOCK_MAX_REG); + } + return 0; +} + +static int r600_group_init(struct r600_group *group, unsigned start_offset, unsigned end_offset) +{ + group->start_offset = start_offset; + group->end_offset = end_offset; + group->nblocks = 0; + group->blocks = NULL; + group->offset_block_id = calloc((end_offset - start_offset) >> 2, sizeof(unsigned)); + if (group->offset_block_id == NULL) + return -ENOMEM; + return 0; +} + +static void r600_group_fini(struct r600_group *group) +{ + free(group->offset_block_id); + free(group->blocks); +} + +/* R600/R700 configuration */ +static const struct r600_reg r600_reg_list[] = { + {0, 0, R_008C00_SQ_CONFIG}, + {0, 0, R_008C04_SQ_GPR_RESOURCE_MGMT_1}, + {0, 0, R_008C08_SQ_GPR_RESOURCE_MGMT_2}, + {0, 0, R_008C0C_SQ_THREAD_RESOURCE_MGMT}, + {0, 0, R_008C10_SQ_STACK_RESOURCE_MGMT_1}, + {0, 0, R_008C14_SQ_STACK_RESOURCE_MGMT_2}, + {0, 0, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ}, + {0, 0, R_009508_TA_CNTL_AUX}, + {0, 0, R_009714_VC_ENHANCE}, + {0, 0, R_009830_DB_DEBUG}, + {0, 0, R_009838_DB_WATERMARKS}, + {0, 0, R_028350_SX_MISC}, + {0, 0, R_0286C8_SPI_THREAD_GROUPING}, + {0, 0, R_0288A8_SQ_ESGS_RING_ITEMSIZE}, + {0, 0, R_0288AC_SQ_GSVS_RING_ITEMSIZE}, + {0, 0, R_0288B0_SQ_ESTMP_RING_ITEMSIZE}, + {0, 0, R_0288B4_SQ_GSTMP_RING_ITEMSIZE}, + {0, 0, R_0288B8_SQ_VSTMP_RING_ITEMSIZE}, + {0, 0, R_0288BC_SQ_PSTMP_RING_ITEMSIZE}, + {0, 0, R_0288C0_SQ_FBUF_RING_ITEMSIZE}, + {0, 0, R_0288C4_SQ_REDUC_RING_ITEMSIZE}, + {0, 0, R_0288C8_SQ_GS_VERT_ITEMSIZE}, + {0, 0, R_028A10_VGT_OUTPUT_PATH_CNTL}, + {0, 0, R_028A14_VGT_HOS_CNTL}, + {0, 0, R_028A18_VGT_HOS_MAX_TESS_LEVEL}, + {0, 0, R_028A1C_VGT_HOS_MIN_TESS_LEVEL}, + {0, 0, R_028A20_VGT_HOS_REUSE_DEPTH}, + {0, 0, R_028A24_VGT_GROUP_PRIM_TYPE}, + {0, 0, R_028A28_VGT_GROUP_FIRST_DECR}, + {0, 0, R_028A2C_VGT_GROUP_DECR}, + {0, 0, R_028A30_VGT_GROUP_VECT_0_CNTL}, + {0, 0, R_028A34_VGT_GROUP_VECT_1_CNTL}, + {0, 0, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL}, + {0, 0, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL}, + {0, 0, R_028A40_VGT_GS_MODE}, + {0, 0, R_028A4C_PA_SC_MODE_CNTL}, + {0, 0, R_028AB0_VGT_STRMOUT_EN}, + {0, 0, R_028AB4_VGT_REUSE_OFF}, + {0, 0, R_028AB8_VGT_VTX_CNT_EN}, + {0, 0, R_028B20_VGT_STRMOUT_BUFFER_EN}, + {0, 0, R_028028_DB_STENCIL_CLEAR}, + {0, 0, R_02802C_DB_DEPTH_CLEAR}, + {1, 0, R_028040_CB_COLOR0_BASE}, + {0, 0, R_0280A0_CB_COLOR0_INFO}, + {0, 0, R_028060_CB_COLOR0_SIZE}, + {0, 0, R_028080_CB_COLOR0_VIEW}, + {1, 0, R_0280E0_CB_COLOR0_FRAG}, + {1, 0, R_0280C0_CB_COLOR0_TILE}, + {0, 0, R_028100_CB_COLOR0_MASK}, + {1, 0, R_028044_CB_COLOR1_BASE}, + {0, 0, R_0280A4_CB_COLOR1_INFO}, + {0, 0, R_028064_CB_COLOR1_SIZE}, + {0, 0, R_028084_CB_COLOR1_VIEW}, + {1, 0, R_0280E4_CB_COLOR1_FRAG}, + {1, 0, R_0280C4_CB_COLOR1_TILE}, + {0, 0, R_028104_CB_COLOR1_MASK}, + {1, 0, R_028048_CB_COLOR2_BASE}, + {0, 0, R_0280A8_CB_COLOR2_INFO}, + {0, 0, R_028068_CB_COLOR2_SIZE}, + {0, 0, R_028088_CB_COLOR2_VIEW}, + {1, 0, R_0280E8_CB_COLOR2_FRAG}, + {1, 0, R_0280C8_CB_COLOR2_TILE}, + {0, 0, R_028108_CB_COLOR2_MASK}, + {1, 0, R_02804C_CB_COLOR3_BASE}, + {0, 0, R_0280AC_CB_COLOR3_INFO}, + {0, 0, R_02806C_CB_COLOR3_SIZE}, + {0, 0, R_02808C_CB_COLOR3_VIEW}, + {1, 0, R_0280EC_CB_COLOR3_FRAG}, + {1, 0, R_0280CC_CB_COLOR3_TILE}, + {0, 0, R_02810C_CB_COLOR3_MASK}, + {1, 0, R_028050_CB_COLOR4_BASE}, + {0, 0, R_0280B0_CB_COLOR4_INFO}, + {0, 0, R_028070_CB_COLOR4_SIZE}, + {0, 0, R_028090_CB_COLOR4_VIEW}, + {1, 0, R_0280F0_CB_COLOR4_FRAG}, + {1, 0, R_0280D0_CB_COLOR4_TILE}, + {0, 0, R_028110_CB_COLOR4_MASK}, + {1, 0, R_028054_CB_COLOR5_BASE}, + {0, 0, R_0280B4_CB_COLOR5_INFO}, + {0, 0, R_028074_CB_COLOR5_SIZE}, + {0, 0, R_028094_CB_COLOR5_VIEW}, + {1, 0, R_0280F4_CB_COLOR5_FRAG}, + {1, 0, R_0280D4_CB_COLOR5_TILE}, + {0, 0, R_028114_CB_COLOR5_MASK}, + {1, 0, R_028058_CB_COLOR6_BASE}, + {0, 0, R_0280B8_CB_COLOR6_INFO}, + {0, 0, R_028078_CB_COLOR6_SIZE}, + {0, 0, R_028098_CB_COLOR6_VIEW}, + {1, 0, R_0280F8_CB_COLOR6_FRAG}, + {1, 0, R_0280D8_CB_COLOR6_TILE}, + {0, 0, R_028118_CB_COLOR6_MASK}, + {1, 0, R_02805C_CB_COLOR7_BASE}, + {0, 0, R_0280BC_CB_COLOR7_INFO}, + {0, 0, R_02807C_CB_COLOR7_SIZE}, + {0, 0, R_02809C_CB_COLOR7_VIEW}, + {1, 0, R_0280FC_CB_COLOR7_FRAG}, + {1, 0, R_0280DC_CB_COLOR7_TILE}, + {0, 0, R_02811C_CB_COLOR7_MASK}, + {0, 0, R_028120_CB_CLEAR_RED}, + {0, 0, R_028124_CB_CLEAR_GREEN}, + {0, 0, R_028128_CB_CLEAR_BLUE}, + {0, 0, R_02812C_CB_CLEAR_ALPHA}, + {0, 0, R_02823C_CB_SHADER_MASK}, + {0, 0, R_028238_CB_TARGET_MASK}, + {0, 0, R_028410_SX_ALPHA_TEST_CONTROL}, + {0, 0, R_028414_CB_BLEND_RED}, + {0, 0, R_028418_CB_BLEND_GREEN}, + {0, 0, R_02841C_CB_BLEND_BLUE}, + {0, 0, R_028420_CB_BLEND_ALPHA}, + {0, 0, R_028424_CB_FOG_RED}, + {0, 0, R_028428_CB_FOG_GREEN}, + {0, 0, R_02842C_CB_FOG_BLUE}, + {0, 0, R_028430_DB_STENCILREFMASK}, + {0, 0, R_028434_DB_STENCILREFMASK_BF}, + {0, 0, R_028438_SX_ALPHA_REF}, + {0, 0, R_0286DC_SPI_FOG_CNTL}, + {0, 0, R_0286E0_SPI_FOG_FUNC_SCALE}, + {0, 0, R_0286E4_SPI_FOG_FUNC_BIAS}, + {0, 0, R_028780_CB_BLEND0_CONTROL}, + {0, 0, R_028784_CB_BLEND1_CONTROL}, + {0, 0, R_028788_CB_BLEND2_CONTROL}, + {0, 0, R_02878C_CB_BLEND3_CONTROL}, + {0, 0, R_028790_CB_BLEND4_CONTROL}, + {0, 0, R_028794_CB_BLEND5_CONTROL}, + {0, 0, R_028798_CB_BLEND6_CONTROL}, + {0, 0, R_02879C_CB_BLEND7_CONTROL}, + {0, 0, R_0287A0_CB_SHADER_CONTROL}, + {0, 0, R_028800_DB_DEPTH_CONTROL}, + {0, 0, R_028804_CB_BLEND_CONTROL}, + {0, 0, R_028808_CB_COLOR_CONTROL}, + {0, 0, R_02880C_DB_SHADER_CONTROL}, + {0, 0, R_028C04_PA_SC_AA_CONFIG}, + {0, 0, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX}, + {0, 0, R_028C20_PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX}, + {0, 0, R_028C30_CB_CLRCMP_CONTROL}, + {0, 0, R_028C34_CB_CLRCMP_SRC}, + {0, 0, R_028C38_CB_CLRCMP_DST}, + {0, 0, R_028C3C_CB_CLRCMP_MSK}, + {0, 0, R_028C48_PA_SC_AA_MASK}, + {0, 0, R_028D2C_DB_SRESULTS_COMPARE_STATE1}, + {0, 0, R_028D44_DB_ALPHA_TO_MASK}, + {1, 0, R_02800C_DB_DEPTH_BASE}, + {0, 0, R_028000_DB_DEPTH_SIZE}, + {0, 0, R_028004_DB_DEPTH_VIEW}, + {0, 0, R_028010_DB_DEPTH_INFO}, + {0, 0, R_028D0C_DB_RENDER_CONTROL}, + {0, 0, R_028D10_DB_RENDER_OVERRIDE}, + {0, 0, R_028D24_DB_HTILE_SURFACE}, + {0, 0, R_028D30_DB_PRELOAD_CONTROL}, + {0, 0, R_028D34_DB_PREFETCH_LIMIT}, + {0, 0, R_028030_PA_SC_SCREEN_SCISSOR_TL}, + {0, 0, R_028034_PA_SC_SCREEN_SCISSOR_BR}, + {0, 0, R_028200_PA_SC_WINDOW_OFFSET}, + {0, 0, R_028204_PA_SC_WINDOW_SCISSOR_TL}, + {0, 0, R_028208_PA_SC_WINDOW_SCISSOR_BR}, + {0, 0, R_02820C_PA_SC_CLIPRECT_RULE}, + {0, 0, R_028210_PA_SC_CLIPRECT_0_TL}, + {0, 0, R_028214_PA_SC_CLIPRECT_0_BR}, + {0, 0, R_028218_PA_SC_CLIPRECT_1_TL}, + {0, 0, R_02821C_PA_SC_CLIPRECT_1_BR}, + {0, 0, R_028220_PA_SC_CLIPRECT_2_TL}, + {0, 0, R_028224_PA_SC_CLIPRECT_2_BR}, + {0, 0, R_028228_PA_SC_CLIPRECT_3_TL}, + {0, 0, R_02822C_PA_SC_CLIPRECT_3_BR}, + {0, 0, R_028230_PA_SC_EDGERULE}, + {0, 0, R_028240_PA_SC_GENERIC_SCISSOR_TL}, + {0, 0, R_028244_PA_SC_GENERIC_SCISSOR_BR}, + {0, 0, R_028250_PA_SC_VPORT_SCISSOR_0_TL}, + {0, 0, R_028254_PA_SC_VPORT_SCISSOR_0_BR}, + {0, 0, R_0282D0_PA_SC_VPORT_ZMIN_0}, + {0, 0, R_0282D4_PA_SC_VPORT_ZMAX_0}, + {0, 0, R_02843C_PA_CL_VPORT_XSCALE_0}, + {0, 0, R_028440_PA_CL_VPORT_XOFFSET_0}, + {0, 0, R_028444_PA_CL_VPORT_YSCALE_0}, + {0, 0, R_028448_PA_CL_VPORT_YOFFSET_0}, + {0, 0, R_02844C_PA_CL_VPORT_ZSCALE_0}, + {0, 0, R_028450_PA_CL_VPORT_ZOFFSET_0}, + {0, 0, R_0286D4_SPI_INTERP_CONTROL_0}, + {0, 0, R_028810_PA_CL_CLIP_CNTL}, + {0, 0, R_028814_PA_SU_SC_MODE_CNTL}, + {0, 0, R_028818_PA_CL_VTE_CNTL}, + {0, 0, R_02881C_PA_CL_VS_OUT_CNTL}, + {0, 0, R_028820_PA_CL_NANINF_CNTL}, + {0, 0, R_028A00_PA_SU_POINT_SIZE}, + {0, 0, R_028A04_PA_SU_POINT_MINMAX}, + {0, 0, R_028A08_PA_SU_LINE_CNTL}, + {0, 0, R_028A0C_PA_SC_LINE_STIPPLE}, + {0, 0, R_028A48_PA_SC_MPASS_PS_CNTL}, + {0, 0, R_028C00_PA_SC_LINE_CNTL}, + {0, 0, R_028C0C_PA_CL_GB_VERT_CLIP_ADJ}, + {0, 0, R_028C10_PA_CL_GB_VERT_DISC_ADJ}, + {0, 0, R_028C14_PA_CL_GB_HORZ_CLIP_ADJ}, + {0, 0, R_028C18_PA_CL_GB_HORZ_DISC_ADJ}, + {0, 0, R_028DF8_PA_SU_POLY_OFFSET_DB_FMT_CNTL}, + {0, 0, R_028DFC_PA_SU_POLY_OFFSET_CLAMP}, + {0, 0, R_028E00_PA_SU_POLY_OFFSET_FRONT_SCALE}, + {0, 0, R_028E04_PA_SU_POLY_OFFSET_FRONT_OFFSET}, + {0, 0, R_028E08_PA_SU_POLY_OFFSET_BACK_SCALE}, + {0, 0, R_028E0C_PA_SU_POLY_OFFSET_BACK_OFFSET}, + {0, 0, R_028E20_PA_CL_UCP0_X}, + {0, 0, R_028E24_PA_CL_UCP0_Y}, + {0, 0, R_028E28_PA_CL_UCP0_Z}, + {0, 0, R_028E2C_PA_CL_UCP0_W}, + {0, 0, R_028E30_PA_CL_UCP1_X}, + {0, 0, R_028E34_PA_CL_UCP1_Y}, + {0, 0, R_028E38_PA_CL_UCP1_Z}, + {0, 0, R_028E3C_PA_CL_UCP1_W}, + {0, 0, R_028E40_PA_CL_UCP2_X}, + {0, 0, R_028E44_PA_CL_UCP2_Y}, + {0, 0, R_028E48_PA_CL_UCP2_Z}, + {0, 0, R_028E4C_PA_CL_UCP2_W}, + {0, 0, R_028E50_PA_CL_UCP3_X}, + {0, 0, R_028E54_PA_CL_UCP3_Y}, + {0, 0, R_028E58_PA_CL_UCP3_Z}, + {0, 0, R_028E5C_PA_CL_UCP3_W}, + {0, 0, R_028E60_PA_CL_UCP4_X}, + {0, 0, R_028E64_PA_CL_UCP4_Y}, + {0, 0, R_028E68_PA_CL_UCP4_Z}, + {0, 0, R_028E6C_PA_CL_UCP4_W}, + {0, 0, R_028E70_PA_CL_UCP5_X}, + {0, 0, R_028E74_PA_CL_UCP5_Y}, + {0, 0, R_028E78_PA_CL_UCP5_Z}, + {0, 0, R_028E7C_PA_CL_UCP5_W}, + {0, 0, R_028380_SQ_VTX_SEMANTIC_0}, + {0, 0, R_028384_SQ_VTX_SEMANTIC_1}, + {0, 0, R_028388_SQ_VTX_SEMANTIC_2}, + {0, 0, R_02838C_SQ_VTX_SEMANTIC_3}, + {0, 0, R_028390_SQ_VTX_SEMANTIC_4}, + {0, 0, R_028394_SQ_VTX_SEMANTIC_5}, + {0, 0, R_028398_SQ_VTX_SEMANTIC_6}, + {0, 0, R_02839C_SQ_VTX_SEMANTIC_7}, + {0, 0, R_0283A0_SQ_VTX_SEMANTIC_8}, + {0, 0, R_0283A4_SQ_VTX_SEMANTIC_9}, + {0, 0, R_0283A8_SQ_VTX_SEMANTIC_10}, + {0, 0, R_0283AC_SQ_VTX_SEMANTIC_11}, + {0, 0, R_0283B0_SQ_VTX_SEMANTIC_12}, + {0, 0, R_0283B4_SQ_VTX_SEMANTIC_13}, + {0, 0, R_0283B8_SQ_VTX_SEMANTIC_14}, + {0, 0, R_0283BC_SQ_VTX_SEMANTIC_15}, + {0, 0, R_0283C0_SQ_VTX_SEMANTIC_16}, + {0, 0, R_0283C4_SQ_VTX_SEMANTIC_17}, + {0, 0, R_0283C8_SQ_VTX_SEMANTIC_18}, + {0, 0, R_0283CC_SQ_VTX_SEMANTIC_19}, + {0, 0, R_0283D0_SQ_VTX_SEMANTIC_20}, + {0, 0, R_0283D4_SQ_VTX_SEMANTIC_21}, + {0, 0, R_0283D8_SQ_VTX_SEMANTIC_22}, + {0, 0, R_0283DC_SQ_VTX_SEMANTIC_23}, + {0, 0, R_0283E0_SQ_VTX_SEMANTIC_24}, + {0, 0, R_0283E4_SQ_VTX_SEMANTIC_25}, + {0, 0, R_0283E8_SQ_VTX_SEMANTIC_26}, + {0, 0, R_0283EC_SQ_VTX_SEMANTIC_27}, + {0, 0, R_0283F0_SQ_VTX_SEMANTIC_28}, + {0, 0, R_0283F4_SQ_VTX_SEMANTIC_29}, + {0, 0, R_0283F8_SQ_VTX_SEMANTIC_30}, + {0, 0, R_0283FC_SQ_VTX_SEMANTIC_31}, + {0, 0, R_028614_SPI_VS_OUT_ID_0}, + {0, 0, R_028618_SPI_VS_OUT_ID_1}, + {0, 0, R_02861C_SPI_VS_OUT_ID_2}, + {0, 0, R_028620_SPI_VS_OUT_ID_3}, + {0, 0, R_028624_SPI_VS_OUT_ID_4}, + {0, 0, R_028628_SPI_VS_OUT_ID_5}, + {0, 0, R_02862C_SPI_VS_OUT_ID_6}, + {0, 0, R_028630_SPI_VS_OUT_ID_7}, + {0, 0, R_028634_SPI_VS_OUT_ID_8}, + {0, 0, R_028638_SPI_VS_OUT_ID_9}, + {0, 0, R_0286C4_SPI_VS_OUT_CONFIG}, + {1, 0, R_028858_SQ_PGM_START_VS}, + {0, S_0085F0_SH_ACTION_ENA(1), R_028868_SQ_PGM_RESOURCES_VS}, + {1, 0, R_028894_SQ_PGM_START_FS}, + {0, S_0085F0_SH_ACTION_ENA(1), R_0288A4_SQ_PGM_RESOURCES_FS}, + {0, 0, R_0288D0_SQ_PGM_CF_OFFSET_VS}, + {0, 0, R_0288DC_SQ_PGM_CF_OFFSET_FS}, + {0, 0, R_028644_SPI_PS_INPUT_CNTL_0}, + {0, 0, R_028648_SPI_PS_INPUT_CNTL_1}, + {0, 0, R_02864C_SPI_PS_INPUT_CNTL_2}, + {0, 0, R_028650_SPI_PS_INPUT_CNTL_3}, + {0, 0, R_028654_SPI_PS_INPUT_CNTL_4}, + {0, 0, R_028658_SPI_PS_INPUT_CNTL_5}, + {0, 0, R_02865C_SPI_PS_INPUT_CNTL_6}, + {0, 0, R_028660_SPI_PS_INPUT_CNTL_7}, + {0, 0, R_028664_SPI_PS_INPUT_CNTL_8}, + {0, 0, R_028668_SPI_PS_INPUT_CNTL_9}, + {0, 0, R_02866C_SPI_PS_INPUT_CNTL_10}, + {0, 0, R_028670_SPI_PS_INPUT_CNTL_11}, + {0, 0, R_028674_SPI_PS_INPUT_CNTL_12}, + {0, 0, R_028678_SPI_PS_INPUT_CNTL_13}, + {0, 0, R_02867C_SPI_PS_INPUT_CNTL_14}, + {0, 0, R_028680_SPI_PS_INPUT_CNTL_15}, + {0, 0, R_028684_SPI_PS_INPUT_CNTL_16}, + {0, 0, R_028688_SPI_PS_INPUT_CNTL_17}, + {0, 0, R_02868C_SPI_PS_INPUT_CNTL_18}, + {0, 0, R_028690_SPI_PS_INPUT_CNTL_19}, + {0, 0, R_028694_SPI_PS_INPUT_CNTL_20}, + {0, 0, R_028698_SPI_PS_INPUT_CNTL_21}, + {0, 0, R_02869C_SPI_PS_INPUT_CNTL_22}, + {0, 0, R_0286A0_SPI_PS_INPUT_CNTL_23}, + {0, 0, R_0286A4_SPI_PS_INPUT_CNTL_24}, + {0, 0, R_0286A8_SPI_PS_INPUT_CNTL_25}, + {0, 0, R_0286AC_SPI_PS_INPUT_CNTL_26}, + {0, 0, R_0286B0_SPI_PS_INPUT_CNTL_27}, + {0, 0, R_0286B4_SPI_PS_INPUT_CNTL_28}, + {0, 0, R_0286B8_SPI_PS_INPUT_CNTL_29}, + {0, 0, R_0286BC_SPI_PS_INPUT_CNTL_30}, + {0, 0, R_0286C0_SPI_PS_INPUT_CNTL_31}, + {0, 0, R_0286CC_SPI_PS_IN_CONTROL_0}, + {0, 0, R_0286D0_SPI_PS_IN_CONTROL_1}, + {0, 0, R_0286D8_SPI_INPUT_Z}, + {1, S_0085F0_SH_ACTION_ENA(1), R_028840_SQ_PGM_START_PS}, + {0, 0, R_028850_SQ_PGM_RESOURCES_PS}, + {0, 0, R_028854_SQ_PGM_EXPORTS_PS}, + {0, 0, R_0288CC_SQ_PGM_CF_OFFSET_PS}, + {0, 0, R_008958_VGT_PRIMITIVE_TYPE}, + {0, 0, R_028400_VGT_MAX_VTX_INDX}, + {0, 0, R_028404_VGT_MIN_VTX_INDX}, + {0, 0, R_028408_VGT_INDX_OFFSET}, + {0, 0, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX}, + {0, 0, R_028A84_VGT_PRIMITIVEID_EN}, + {0, 0, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN}, + {0, 0, R_028AA0_VGT_INSTANCE_STEP_RATE_0}, + {0, 0, R_028AA4_VGT_INSTANCE_STEP_RATE_1}, +}; + +/* SHADER CONSTANT R600/R700 */ +static int r600_state_constant_init(struct r600_context *ctx, u32 offset) +{ + struct r600_reg r600_shader_constant[] = { + {0, 0, R_030000_SQ_ALU_CONSTANT0_0}, + {0, 0, R_030004_SQ_ALU_CONSTANT1_0}, + {0, 0, R_030008_SQ_ALU_CONSTANT2_0}, + {0, 0, R_03000C_SQ_ALU_CONSTANT3_0}, + }; + unsigned nreg = sizeof(r600_shader_constant)/sizeof(struct r600_reg); + + for (int i = 0; i < nreg; i++) { + r600_shader_constant[i].offset += offset; + } + return r600_context_add_block(ctx, r600_shader_constant, nreg); +} + +/* SHADER RESOURCE R600/R700 */ +static int r600_state_resource_init(struct r600_context *ctx, u32 offset) +{ + struct r600_reg r600_shader_resource[] = { + {0, 0, R_038000_RESOURCE0_WORD0}, + {0, 0, R_038004_RESOURCE0_WORD1}, + {1, S_0085F0_TC_ACTION_ENA(1) | S_0085F0_VC_ACTION_ENA(1), R_038008_RESOURCE0_WORD2}, + {1, S_0085F0_TC_ACTION_ENA(1) | S_0085F0_VC_ACTION_ENA(1), R_03800C_RESOURCE0_WORD3}, + {0, 0, R_038010_RESOURCE0_WORD4}, + {0, 0, R_038014_RESOURCE0_WORD5}, + {0, 0, R_038018_RESOURCE0_WORD6}, + }; + unsigned nreg = sizeof(r600_shader_resource)/sizeof(struct r600_reg); + + for (int i = 0; i < nreg; i++) { + r600_shader_resource[i].offset += offset; + } + return r600_context_add_block(ctx, r600_shader_resource, nreg); +} + +/* SHADER SAMPLER R600/R700 */ +static int r600_state_sampler_init(struct r600_context *ctx, u32 offset) +{ + struct r600_reg r600_shader_sampler[] = { + {0, 0, R_03C000_SQ_TEX_SAMPLER_WORD0_0}, + {0, 0, R_03C004_SQ_TEX_SAMPLER_WORD1_0}, + {0, 0, R_03C008_SQ_TEX_SAMPLER_WORD2_0}, + }; + unsigned nreg = sizeof(r600_shader_sampler)/sizeof(struct r600_reg); + + for (int i = 0; i < nreg; i++) { + r600_shader_sampler[i].offset += offset; + } + return r600_context_add_block(ctx, r600_shader_sampler, nreg); +} + +/* SHADER SAMPLER BORDER R600/R700 */ +static int r600_state_sampler_border_init(struct r600_context *ctx, u32 offset) +{ + struct r600_reg r600_shader_sampler_border[] = { + {0, 0, R_00A400_TD_PS_SAMPLER0_BORDER_RED}, + {0, 0, R_00A404_TD_PS_SAMPLER0_BORDER_GREEN}, + {0, 0, R_00A408_TD_PS_SAMPLER0_BORDER_BLUE}, + {0, 0, R_00A40C_TD_PS_SAMPLER0_BORDER_ALPHA}, + }; + unsigned nreg = sizeof(r600_shader_sampler_border)/sizeof(struct r600_reg); + + for (int i = 0; i < nreg; i++) { + r600_shader_sampler_border[i].offset += offset; + } + return r600_context_add_block(ctx, r600_shader_sampler_border, nreg); +} + +/* initialize */ +void r600_context_fini(struct r600_context *ctx) +{ + for (int i = 0; i < ctx->ngroups; i++) { + r600_group_fini(&ctx->groups[i]); + } + free(ctx->reloc); + free(ctx->pm4); + memset(ctx, 0, sizeof(struct r600_context)); +} + +int r600_context_init(struct r600_context *ctx, struct radeon *radeon) +{ + int r; + + memset(ctx, 0, sizeof(struct r600_context)); + ctx->radeon = radeon; + /* initialize groups */ + r = r600_group_init(&ctx->groups[R600_GROUP_CONFIG], R600_CONFIG_REG_OFFSET, R600_CONFIG_REG_END); + if (r) { + goto out_err; + } + r = r600_group_init(&ctx->groups[R600_GROUP_CTL_CONST], R600_CTL_CONST_OFFSET, R600_CTL_CONST_END); + if (r) { + goto out_err; + } + r = r600_group_init(&ctx->groups[R600_GROUP_LOOP_CONST], R600_LOOP_CONST_OFFSET, R600_LOOP_CONST_END); + if (r) { + goto out_err; + } + r = r600_group_init(&ctx->groups[R600_GROUP_BOOL_CONST], R600_BOOL_CONST_OFFSET, R600_BOOL_CONST_END); + if (r) { + goto out_err; + } + r = r600_group_init(&ctx->groups[R600_GROUP_SAMPLER], R600_SAMPLER_OFFSET, R600_SAMPLER_END); + if (r) { + goto out_err; + } + r = r600_group_init(&ctx->groups[R600_GROUP_RESOURCE], R600_RESOURCE_OFFSET, R600_RESOURCE_END); + if (r) { + goto out_err; + } + r = r600_group_init(&ctx->groups[R600_GROUP_ALU_CONST], R600_ALU_CONST_OFFSET, R600_ALU_CONST_END); + if (r) { + goto out_err; + } + r = r600_group_init(&ctx->groups[R600_GROUP_CONTEXT], R600_CONTEXT_REG_OFFSET, R600_CONTEXT_REG_END); + if (r) { + goto out_err; + } + ctx->ngroups = R600_NGROUPS; + + /* add blocks */ + r = r600_context_add_block(ctx, r600_reg_list, sizeof(r600_reg_list)/sizeof(struct r600_reg)); + if (r) + goto out_err; + + /* PS SAMPLER BORDER */ + for (int j = 0, offset = 0; j < 18; j++, offset += 0x10) { + r = r600_state_sampler_border_init(ctx, offset); + if (r) + goto out_err; + } + + /* VS SAMPLER BORDER */ + for (int j = 0, offset = 0x200; j < 18; j++, offset += 0x10) { + r = r600_state_sampler_border_init(ctx, offset); + if (r) + goto out_err; + } + /* PS SAMPLER */ + for (int j = 0, offset = 0; j < 18; j++, offset += 0xC) { + r = r600_state_sampler_init(ctx, offset); + if (r) + goto out_err; + } + /* VS SAMPLER */ + for (int j = 0, offset = 0xD8; j < 18; j++, offset += 0xC) { + r = r600_state_sampler_init(ctx, offset); + if (r) + goto out_err; + } + /* PS RESOURCE */ + for (int j = 0, offset = 0; j < 160; j++, offset += 0x1C) { + r = r600_state_resource_init(ctx, offset); + if (r) + goto out_err; + } + /* VS RESOURCE */ + for (int j = 0, offset = 0x1180; j < 160; j++, offset += 0x1C) { + r = r600_state_resource_init(ctx, offset); + if (r) + goto out_err; + } + /* PS CONSTANT */ + for (int j = 0, offset = 0; j < 256; j++, offset += 0x10) { + r = r600_state_constant_init(ctx, offset); + if (r) + goto out_err; + } + /* VS CONSTANT */ + for (int j = 0, offset = 0x1000; j < 256; j++, offset += 0x10) { + r = r600_state_constant_init(ctx, offset); + if (r) + goto out_err; + } + + /* allocate cs variables */ + ctx->nreloc = RADEON_CTX_MAX_PM4; + ctx->reloc = calloc(ctx->nreloc, sizeof(struct r600_reloc)); + if (ctx->reloc == NULL) { + r = -ENOMEM; + goto out_err; + } + ctx->bo = calloc(ctx->nreloc, sizeof(void *)); + if (ctx->bo == NULL) { + r = -ENOMEM; + goto out_err; + } + ctx->pm4_ndwords = RADEON_CTX_MAX_PM4; + ctx->pm4 = calloc(ctx->pm4_ndwords, 4); + if (ctx->pm4 == NULL) { + r = -ENOMEM; + goto out_err; + } + return 0; +out_err: + r600_context_fini(ctx); + return r; +} + +static void r600_context_bo_reloc(struct r600_context *ctx, u32 *pm4, struct radeon_ws_bo *bo) +{ + int i, reloc_id; + unsigned handle = radeon_ws_bo_get_handle(bo); + + assert(bo != NULL); + for (i = 0, reloc_id = -1; i < ctx->creloc; i++) { + if (ctx->reloc[i].handle == handle) { + reloc_id = i * sizeof(struct r600_reloc) / 4; + /* set PKT3 to point to proper reloc */ + *pm4 = reloc_id; + } + } + if (reloc_id == -1) { + /* add new relocation */ + if (ctx->creloc >= ctx->nreloc) { + r600_context_flush(ctx); + } + reloc_id = ctx->creloc * sizeof(struct r600_reloc) / 4; + ctx->reloc[ctx->creloc].handle = handle; + ctx->reloc[ctx->creloc].read_domain = RADEON_GEM_DOMAIN_GTT; + ctx->reloc[ctx->creloc].write_domain = RADEON_GEM_DOMAIN_GTT; + ctx->reloc[ctx->creloc].flags = 0; + radeon_ws_bo_reference(ctx->radeon, &ctx->bo[ctx->creloc], bo); + ctx->creloc++; + /* set PKT3 to point to proper reloc */ + *pm4 = reloc_id; + } +} + +void r600_context_pipe_state_set(struct r600_context *ctx, struct r600_pipe_state *state) +{ + struct r600_group *group; + struct r600_group_block *block; + + for (int i = 0; i < state->nregs; i++) { + unsigned id; + group = &ctx->groups[state->regs[i].group_id]; + id = group->offset_block_id[(state->regs[i].offset - group->start_offset) >> 2]; + block = &group->blocks[id]; + id = (state->regs[i].offset - block->start_offset) >> 2; + block->pm4[id] &= ~state->regs[i].mask; + block->pm4[id] |= state->regs[i].value; + if (block->pm4_bo_index[id]) { + /* find relocation */ + id = block->pm4_bo_index[id]; + radeon_ws_bo_reference(ctx->radeon, &block->reloc[id].bo, state->regs[i].bo); + for (int j = 0; j < block->reloc[id].nreloc; j++) { + r600_context_bo_reloc(ctx, &block->pm4[block->reloc[id].bo_pm4_index[j]], + block->reloc[id].bo); + } + } + block->status |= R600_BLOCK_STATUS_ENABLED; + block->status |= R600_BLOCK_STATUS_DIRTY; + ctx->pm4_dirty_cdwords += 2 + block->pm4_ndwords; + } +} + +static inline void r600_context_pipe_state_set_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset) +{ + struct r600_group_block *block; + unsigned id; + + offset -= ctx->groups[R600_GROUP_RESOURCE].start_offset; + id = ctx->groups[R600_GROUP_RESOURCE].offset_block_id[offset >> 2]; + block = &ctx->groups[R600_GROUP_RESOURCE].blocks[id]; + block->pm4[0] = state->regs[0].value; + block->pm4[1] = state->regs[1].value; + block->pm4[2] = state->regs[2].value; + block->pm4[3] = state->regs[3].value; + block->pm4[4] = state->regs[4].value; + block->pm4[5] = state->regs[5].value; + block->pm4[6] = state->regs[6].value; + radeon_ws_bo_reference(ctx->radeon, &block->reloc[1].bo, block->reloc[1].bo); + radeon_ws_bo_reference(ctx->radeon , &block->reloc[2].bo, block->reloc[2].bo); + if (state->regs[0].bo) { + /* VERTEX RESOURCE, we preted there is 2 bo to relocate so + * we have single case btw VERTEX & TEXTURE resource + */ + radeon_ws_bo_reference(ctx->radeon, &block->reloc[1].bo, state->regs[0].bo); + radeon_ws_bo_reference(ctx->radeon, &block->reloc[2].bo, state->regs[0].bo); + } else { + /* TEXTURE RESOURCE */ + radeon_ws_bo_reference(ctx->radeon, &block->reloc[1].bo, state->regs[2].bo); + radeon_ws_bo_reference(ctx->radeon, &block->reloc[2].bo, state->regs[3].bo); + } + r600_context_bo_reloc(ctx, &block->pm4[block->reloc[1].bo_pm4_index[0]], block->reloc[1].bo); + r600_context_bo_reloc(ctx, &block->pm4[block->reloc[2].bo_pm4_index[0]], block->reloc[2].bo); + block->status |= R600_BLOCK_STATUS_ENABLED; + block->status |= R600_BLOCK_STATUS_DIRTY; + ctx->pm4_dirty_cdwords += 2 + block->pm4_ndwords; +} + +void r600_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid) +{ + unsigned offset = R_038000_SQ_TEX_RESOURCE_WORD0_0 + 0x1C * rid; + + r600_context_pipe_state_set_resource(ctx, state, offset); +} + +void r600_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid) +{ + unsigned offset = R_038000_SQ_TEX_RESOURCE_WORD0_0 + 0x1180 + 0x1C * rid; + + r600_context_pipe_state_set_resource(ctx, state, offset); +} + +static inline void r600_context_pipe_state_set_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset) +{ + struct r600_group_block *block; + unsigned id; + + offset -= ctx->groups[R600_GROUP_SAMPLER].start_offset; + id = ctx->groups[R600_GROUP_SAMPLER].offset_block_id[offset >> 2]; + block = &ctx->groups[R600_GROUP_SAMPLER].blocks[id]; + block->pm4[0] = state->regs[0].value; + block->pm4[1] = state->regs[1].value; + block->pm4[2] = state->regs[2].value; + block->status |= R600_BLOCK_STATUS_ENABLED; + block->status |= R600_BLOCK_STATUS_DIRTY; + ctx->pm4_dirty_cdwords += 2 + block->pm4_ndwords; +} + +static inline void r600_context_pipe_state_set_sampler_border(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset) +{ + struct r600_group_block *block; + unsigned id; + + offset -= ctx->groups[R600_GROUP_CONFIG].start_offset; + id = ctx->groups[R600_GROUP_CONFIG].offset_block_id[offset >> 2]; + block = &ctx->groups[R600_GROUP_CONFIG].blocks[id]; + block->pm4[0] = state->regs[3].value; + block->pm4[1] = state->regs[4].value; + block->pm4[2] = state->regs[5].value; + block->pm4[3] = state->regs[6].value; + block->status |= R600_BLOCK_STATUS_ENABLED; + block->status |= R600_BLOCK_STATUS_DIRTY; + ctx->pm4_dirty_cdwords += 2 + block->pm4_ndwords; +} + +void r600_context_pipe_state_set_ps_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id) +{ + unsigned offset; + + offset = 0x0003C000 + id * 0xc; + r600_context_pipe_state_set_sampler(ctx, state, offset); + if (state->nregs > 3) { + offset = 0x0000A400 + id * 0x10; + r600_context_pipe_state_set_sampler_border(ctx, state, offset); + } +} + +void r600_context_pipe_state_set_vs_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id) +{ + unsigned offset; + + offset = 0x0003C0D8 + id * 0xc; + r600_context_pipe_state_set_sampler(ctx, state, offset); + if (state->nregs > 3) { + offset = 0x0000A600 + id * 0x10; + r600_context_pipe_state_set_sampler_border(ctx, state, offset); + } +} + +static inline void r600_context_group_emit_dirty(struct r600_context *ctx, struct r600_group *group, unsigned opcode) +{ + for (int i = 0; i < group->nblocks; i++) { + struct r600_group_block *block = &group->blocks[i]; + if (block->status & R600_BLOCK_STATUS_DIRTY) { + ctx->pm4[ctx->pm4_cdwords++] = PKT3(opcode, block->nreg); + ctx->pm4[ctx->pm4_cdwords++] = (block->start_offset - group->start_offset) >> 2; + memcpy(&ctx->pm4[ctx->pm4_cdwords], block->pm4, block->pm4_ndwords * 4); + ctx->pm4_cdwords += block->pm4_ndwords; + block->status ^= R600_BLOCK_STATUS_DIRTY; + } + } +} + +void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw) +{ + unsigned ndwords = 9; + + if (draw->indices) { + ndwords = 13; + /* make sure there is enough relocation space before scheduling draw */ + if (ctx->creloc >= (ctx->nreloc - 1)) { + r600_context_flush(ctx); + } + } + if ((ctx->pm4_dirty_cdwords + ndwords + ctx->pm4_cdwords) > ctx->pm4_ndwords) { + /* need to flush */ + r600_context_flush(ctx); + } + /* at that point everythings is flushed and ctx->pm4_cdwords = 0 */ + if ((ctx->pm4_dirty_cdwords + ndwords) > ctx->pm4_ndwords) { + R600_ERR("context is too big to be scheduled\n"); + return; + } + /* Ok we enough room to copy packet */ + r600_context_group_emit_dirty(ctx, &ctx->groups[R600_GROUP_CONFIG], PKT3_SET_CONFIG_REG); + r600_context_group_emit_dirty(ctx, &ctx->groups[R600_GROUP_CONTEXT], PKT3_SET_CONTEXT_REG); + r600_context_group_emit_dirty(ctx, &ctx->groups[R600_GROUP_ALU_CONST], PKT3_SET_ALU_CONST); + r600_context_group_emit_dirty(ctx, &ctx->groups[R600_GROUP_SAMPLER], PKT3_SET_SAMPLER); + r600_context_group_emit_dirty(ctx, &ctx->groups[R600_GROUP_RESOURCE], PKT3_SET_RESOURCE); + /* draw packet */ + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_INDEX_TYPE, 0); + ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_index_type; + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NUM_INSTANCES, 0); + ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_instances; + if (draw->indices) { + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX, 3); + ctx->pm4[ctx->pm4_cdwords++] = draw->indices_bo_offset; + ctx->pm4[ctx->pm4_cdwords++] = 0; + ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_indices; + ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_draw_initiator; + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0); + ctx->pm4[ctx->pm4_cdwords++] = 0; + r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], draw->indices); + } else { + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1); + ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_indices; + ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_draw_initiator; + } + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0); + ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT; +} + +void r600_context_flush(struct r600_context *ctx) +{ + struct drm_radeon_cs drmib; + struct drm_radeon_cs_chunk chunks[2]; + uint64_t chunk_array[2]; + struct r600_group_block *block; + int r; + + if (!ctx->pm4_cdwords) + return; + +#if 1 + /* emit cs */ + drmib.num_chunks = 2; + drmib.chunks = (uint64_t)(uintptr_t)chunk_array; + chunks[0].chunk_id = RADEON_CHUNK_ID_IB; + chunks[0].length_dw = ctx->pm4_cdwords; + chunks[0].chunk_data = (uint64_t)(uintptr_t)ctx->pm4; + chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS; + chunks[1].length_dw = ctx->creloc * sizeof(struct r600_reloc) / 4; + chunks[1].chunk_data = (uint64_t)(uintptr_t)ctx->reloc; + chunk_array[0] = (uint64_t)(uintptr_t)&chunks[0]; + chunk_array[1] = (uint64_t)(uintptr_t)&chunks[1]; + r = drmCommandWriteRead(ctx->radeon->fd, DRM_RADEON_CS, &drmib, + sizeof(struct drm_radeon_cs)); +#endif + /* restart */ + for (int i = 0; i < ctx->creloc; i++) { + radeon_ws_bo_reference(ctx->radeon, &ctx->bo[i], NULL); + } + ctx->creloc = 0; + ctx->pm4_dirty_cdwords = 0; + ctx->pm4_cdwords = 0; + for (int i = 0; i < ctx->ngroups; i++) { + for (int j = 0; j < ctx->groups[i].nblocks; j++) { + /* mark enabled block as dirty */ + block = &ctx->groups[i].blocks[j]; + if (block->status & R600_BLOCK_STATUS_ENABLED) { + ctx->pm4_dirty_cdwords += 2 + block->pm4_ndwords; + block->status |= R600_BLOCK_STATUS_DIRTY; + for (int k = 1; k <= block->nbo; k++) { + for (int l = 0; l < block->reloc[k].nreloc; l++) { + r600_context_bo_reloc(ctx, + &block->pm4[block->reloc[k].bo_pm4_index[l]], + block->reloc[k].bo); + } + } + } + } + } +} + +void r600_context_dump_bof(struct r600_context *ctx, const char *file) +{ + bof_t *bcs, *blob, *array, *bo, *size, *handle, *device_id, *root; + unsigned i; + + root = device_id = bcs = blob = array = bo = size = handle = NULL; + root = bof_object(); + if (root == NULL) + goto out_err; + device_id = bof_int32(ctx->radeon->device); + if (device_id == NULL) + return; + if (bof_object_set(root, "device_id", device_id)) + goto out_err; + bof_decref(device_id); + device_id = NULL; + /* dump relocs */ + blob = bof_blob(ctx->creloc * 16, ctx->reloc); + if (blob == NULL) + goto out_err; + if (bof_object_set(root, "reloc", blob)) + goto out_err; + bof_decref(blob); + blob = NULL; + /* dump cs */ + blob = bof_blob(ctx->pm4_cdwords * 4, ctx->pm4); + if (blob == NULL) + goto out_err; + if (bof_object_set(root, "pm4", blob)) + goto out_err; + bof_decref(blob); + blob = NULL; + /* dump bo */ + array = bof_array(); + if (array == NULL) + goto out_err; + for (i = 0; i < ctx->creloc; i++) { + struct radeon_bo *rbo = radeon_bo_pb_get_bo(ctx->bo[i]->pb); + bo = bof_object(); + if (bo == NULL) + goto out_err; + size = bof_int32(rbo->size); + if (size == NULL) + goto out_err; + if (bof_object_set(bo, "size", size)) + goto out_err; + bof_decref(size); + size = NULL; + handle = bof_int32(rbo->handle); + if (handle == NULL) + goto out_err; + if (bof_object_set(bo, "handle", handle)) + goto out_err; + bof_decref(handle); + handle = NULL; + radeon_bo_map(ctx->radeon, rbo); + blob = bof_blob(rbo->size, rbo->data); + radeon_bo_unmap(ctx->radeon, rbo); + if (blob == NULL) + goto out_err; + if (bof_object_set(bo, "data", blob)) + goto out_err; + bof_decref(blob); + blob = NULL; + if (bof_array_append(array, bo)) + goto out_err; + bof_decref(bo); + bo = NULL; + } + if (bof_object_set(root, "bo", array)) + goto out_err; + bof_dump_file(root, file); +out_err: + bof_decref(blob); + bof_decref(array); + bof_decref(bo); + bof_decref(size); + bof_decref(handle); + bof_decref(device_id); + bof_decref(root); +} diff --git a/src/gallium/winsys/r600/drm/r600d.h b/src/gallium/winsys/r600/drm/r600d.h index 05f31571f42..c5d5fe9ddfc 100644 --- a/src/gallium/winsys/r600/drm/r600d.h +++ b/src/gallium/winsys/r600/drm/r600d.h @@ -26,6 +26,15 @@ #ifndef R600D_H #define R600D_H +/* evergreen values */ +#define EG_RESOURCE_OFFSET 0x00030000 +#define EG_RESOURCE_END 0x00030400 +#define EG_LOOP_CONST_OFFSET 0x0003A200 +#define EG_LOOP_CONST_END 0x0003A26C +#define EG_BOOL_CONST_OFFSET 0x0003A500 +#define EG_BOOL_CONST_END 0x0003A506 + + #define R600_CONFIG_REG_OFFSET 0X00008000 #define R600_CONFIG_REG_END 0X0000AC00 #define R600_CONTEXT_REG_OFFSET 0X00028000 @@ -43,15 +52,6 @@ #define R600_BOOL_CONST_OFFSET 0X0003E380 #define R600_BOOL_CONST_END 0X00040000 -/* evergreen values */ -#define EG_RESOURCE_OFFSET 0x00030000 -#define EG_RESOURCE_END 0x00030400 -#define EG_LOOP_CONST_OFFSET 0x0003A200 -#define EG_LOOP_CONST_END 0x0003A26C -#define EG_BOOL_CONST_OFFSET 0x0003A500 -#define EG_BOOL_CONST_END 0x0003A506 - - #define PKT3_NOP 0x10 #define PKT3_INDIRECT_BUFFER_END 0x17 #define PKT3_SET_PREDICATION 0x20 @@ -939,6 +939,13 @@ #define S_028080_SLICE_MAX(x) (((x) & 0x7FF) << 13) #define G_028080_SLICE_MAX(x) (((x) >> 13) & 0x7FF) #define C_028080_SLICE_MAX 0xFF001FFF +#define R_028084_CB_COLOR1_VIEW 0x028084 +#define R_028088_CB_COLOR2_VIEW 0x028088 +#define R_02808C_CB_COLOR3_VIEW 0x02808C +#define R_028090_CB_COLOR4_VIEW 0x028090 +#define R_028094_CB_COLOR5_VIEW 0x028094 +#define R_028098_CB_COLOR6_VIEW 0x028098 +#define R_02809C_CB_COLOR7_VIEW 0x02809C #define R_028100_CB_COLOR0_MASK 0x028100 #define S_028100_CMASK_BLOCK_MAX(x) (((x) & 0xFFF) << 0) #define G_028100_CMASK_BLOCK_MAX(x) (((x) >> 0) & 0xFFF) @@ -946,6 +953,13 @@ #define S_028100_FMASK_TILE_MAX(x) (((x) & 0xFFFFF) << 12) #define G_028100_FMASK_TILE_MAX(x) (((x) >> 12) & 0xFFFFF) #define C_028100_FMASK_TILE_MAX 0x00000FFF +#define R_028104_CB_COLOR1_MASK 0x028104 +#define R_028108_CB_COLOR2_MASK 0x028108 +#define R_02810C_CB_COLOR3_MASK 0x02810C +#define R_028110_CB_COLOR4_MASK 0x028110 +#define R_028114_CB_COLOR5_MASK 0x028114 +#define R_028118_CB_COLOR6_MASK 0x028118 +#define R_02811C_CB_COLOR7_MASK 0x02811C #define R_028040_CB_COLOR0_BASE 0x028040 #define S_028040_BASE_256B(x) (((x) & 0xFFFFFFFF) << 0) #define G_028040_BASE_256B(x) (((x) >> 0) & 0xFFFFFFFF) @@ -954,10 +968,24 @@ #define S_0280E0_BASE_256B(x) (((x) & 0xFFFFFFFF) << 0) #define G_0280E0_BASE_256B(x) (((x) >> 0) & 0xFFFFFFFF) #define C_0280E0_BASE_256B 0x00000000 +#define R_0280E4_CB_COLOR1_FRAG 0x0280E4 +#define R_0280E8_CB_COLOR2_FRAG 0x0280E8 +#define R_0280EC_CB_COLOR3_FRAG 0x0280EC +#define R_0280F0_CB_COLOR4_FRAG 0x0280F0 +#define R_0280F4_CB_COLOR5_FRAG 0x0280F4 +#define R_0280F8_CB_COLOR6_FRAG 0x0280F8 +#define R_0280FC_CB_COLOR7_FRAG 0x0280FC #define R_0280C0_CB_COLOR0_TILE 0x0280C0 #define S_0280C0_BASE_256B(x) (((x) & 0xFFFFFFFF) << 0) #define G_0280C0_BASE_256B(x) (((x) >> 0) & 0xFFFFFFFF) #define C_0280C0_BASE_256B 0x00000000 +#define R_0280C4_CB_COLOR1_TILE 0x0280C4 +#define R_0280C8_CB_COLOR2_TILE 0x0280C8 +#define R_0280CC_CB_COLOR3_TILE 0x0280CC +#define R_0280D0_CB_COLOR4_TILE 0x0280D0 +#define R_0280D4_CB_COLOR5_TILE 0x0280D4 +#define R_0280D8_CB_COLOR6_TILE 0x0280D8 +#define R_0280DC_CB_COLOR7_TILE 0x0280DC #define R_028808_CB_COLOR_CONTROL 0x028808 #define S_028808_FOG_ENABLE(x) (((x) & 0x1) << 0) #define G_028808_FOG_ENABLE(x) (((x) >> 0) & 0x1) @@ -2130,5 +2158,36 @@ #define C_03000C_W 0x00000000 #define R_0287E4_VGT_DMA_BASE_HI 0x0287E4 #define R_0287E8_VGT_DMA_BASE 0x0287E8 +#define R_028E20_PA_CL_UCP0_X 0x028E20 +#define R_028E24_PA_CL_UCP0_Y 0x028E24 +#define R_028E28_PA_CL_UCP0_Z 0x028E28 +#define R_028E2C_PA_CL_UCP0_W 0x028E2C +#define R_028E30_PA_CL_UCP1_X 0x028E30 +#define R_028E34_PA_CL_UCP1_Y 0x028E34 +#define R_028E38_PA_CL_UCP1_Z 0x028E38 +#define R_028E3C_PA_CL_UCP1_W 0x028E3C +#define R_028E40_PA_CL_UCP2_X 0x028E40 +#define R_028E44_PA_CL_UCP2_Y 0x028E44 +#define R_028E48_PA_CL_UCP2_Z 0x028E48 +#define R_028E4C_PA_CL_UCP2_W 0x028E4C +#define R_028E50_PA_CL_UCP3_X 0x028E50 +#define R_028E54_PA_CL_UCP3_Y 0x028E54 +#define R_028E58_PA_CL_UCP3_Z 0x028E58 +#define R_028E5C_PA_CL_UCP3_W 0x028E5C +#define R_028E60_PA_CL_UCP4_X 0x028E60 +#define R_028E64_PA_CL_UCP4_Y 0x028E64 +#define R_028E68_PA_CL_UCP4_Z 0x028E68 +#define R_028E6C_PA_CL_UCP4_W 0x028E6C +#define R_028E70_PA_CL_UCP5_X 0x028E70 +#define R_028E74_PA_CL_UCP5_Y 0x028E74 +#define R_028E78_PA_CL_UCP5_Z 0x028E78 +#define R_028E7C_PA_CL_UCP5_W 0x028E7C +#define R_038000_RESOURCE0_WORD0 0x038000 +#define R_038004_RESOURCE0_WORD1 0x038004 +#define R_038008_RESOURCE0_WORD2 0x038008 +#define R_03800C_RESOURCE0_WORD3 0x03800C +#define R_038010_RESOURCE0_WORD4 0x038010 +#define R_038014_RESOURCE0_WORD5 0x038014 +#define R_038018_RESOURCE0_WORD6 0x038018 #endif |